106bf2a6aSMatt Macy /*-
2*4d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
31f4beb63SMatt Macy *
406bf2a6aSMatt Macy * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
506bf2a6aSMatt Macy *
606bf2a6aSMatt Macy * Redistribution and use in source and binary forms, with or without
71f4beb63SMatt Macy * modification, are permitted provided that the following conditions
81f4beb63SMatt Macy * are met:
91f4beb63SMatt Macy * 1. Redistributions of source code must retain the above copyright
101f4beb63SMatt Macy * notice, this list of conditions and the following disclaimer.
111f4beb63SMatt Macy * 2. Redistributions in binary form must reproduce the above copyright
121f4beb63SMatt Macy * notice, this list of conditions and the following disclaimer in the
131f4beb63SMatt Macy * documentation and/or other materials provided with the distribution.
1406bf2a6aSMatt Macy *
151f4beb63SMatt Macy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
161f4beb63SMatt Macy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1706bf2a6aSMatt Macy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
181f4beb63SMatt Macy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
191f4beb63SMatt Macy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
201f4beb63SMatt Macy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
211f4beb63SMatt Macy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
221f4beb63SMatt Macy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
231f4beb63SMatt Macy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
241f4beb63SMatt Macy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
251f4beb63SMatt Macy * SUCH DAMAGE.
261f4beb63SMatt Macy *
2706bf2a6aSMatt Macy */
2806bf2a6aSMatt Macy
2906bf2a6aSMatt Macy #include <sys/param.h>
3006bf2a6aSMatt Macy #include <sys/systm.h>
3106bf2a6aSMatt Macy #include <sys/counter.h>
3206bf2a6aSMatt Macy #include <sys/epoch.h>
3306bf2a6aSMatt Macy #include <sys/gtaskqueue.h>
3406bf2a6aSMatt Macy #include <sys/kernel.h>
3506bf2a6aSMatt Macy #include <sys/limits.h>
3606bf2a6aSMatt Macy #include <sys/lock.h>
3706bf2a6aSMatt Macy #include <sys/malloc.h>
3806bf2a6aSMatt Macy #include <sys/mutex.h>
39c4d901e9SMatt Macy #include <sys/pcpu.h>
4006bf2a6aSMatt Macy #include <sys/proc.h>
4106bf2a6aSMatt Macy #include <sys/sched.h>
42131b2b76SHans Petter Selasky #include <sys/sx.h>
4306bf2a6aSMatt Macy #include <sys/smp.h>
4406bf2a6aSMatt Macy #include <sys/sysctl.h>
4506bf2a6aSMatt Macy #include <sys/turnstile.h>
46dd902d01SGleb Smirnoff #ifdef EPOCH_TRACE
47dd902d01SGleb Smirnoff #include <machine/stdarg.h>
48dd902d01SGleb Smirnoff #include <sys/stack.h>
49dd902d01SGleb Smirnoff #include <sys/tree.h>
50dd902d01SGleb Smirnoff #endif
5106bf2a6aSMatt Macy #include <vm/vm.h>
5206bf2a6aSMatt Macy #include <vm/vm_extern.h>
5306bf2a6aSMatt Macy #include <vm/vm_kern.h>
54822e50e3SMatt Macy #include <vm/uma.h>
5506bf2a6aSMatt Macy
562555f175SKonstantin Belousov #include <machine/stack.h>
572555f175SKonstantin Belousov
5806bf2a6aSMatt Macy #include <ck_epoch.h>
5906bf2a6aSMatt Macy
60a82296c2SGleb Smirnoff #ifdef __amd64__
61a82296c2SGleb Smirnoff #define EPOCH_ALIGN CACHE_LINE_SIZE*2
62a82296c2SGleb Smirnoff #else
63a82296c2SGleb Smirnoff #define EPOCH_ALIGN CACHE_LINE_SIZE
64a82296c2SGleb Smirnoff #endif
65a82296c2SGleb Smirnoff
669f360eecSGleb Smirnoff TAILQ_HEAD (epoch_tdlist, epoch_tracker);
67a82296c2SGleb Smirnoff typedef struct epoch_record {
6891cf4975SMatt Macy ck_epoch_record_t er_record;
69131b2b76SHans Petter Selasky struct epoch_context er_drain_ctx;
70131b2b76SHans Petter Selasky struct epoch *er_parent;
71a82296c2SGleb Smirnoff volatile struct epoch_tdlist er_tdlist;
72a82296c2SGleb Smirnoff volatile uint32_t er_gen;
73a82296c2SGleb Smirnoff uint32_t er_cpuid;
747667824aSKyle Evans #ifdef INVARIANTS
757667824aSKyle Evans /* Used to verify record ownership for non-preemptible epochs. */
767667824aSKyle Evans struct thread *er_td;
777667824aSKyle Evans #endif
78a82296c2SGleb Smirnoff } __aligned(EPOCH_ALIGN) *epoch_record_t;
79a82296c2SGleb Smirnoff
80a82296c2SGleb Smirnoff struct epoch {
81a82296c2SGleb Smirnoff struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
82a82296c2SGleb Smirnoff epoch_record_t e_pcpu_record;
83826c0793SHans Petter Selasky int e_in_use;
84a82296c2SGleb Smirnoff int e_flags;
85131b2b76SHans Petter Selasky struct sx e_drain_sx;
86131b2b76SHans Petter Selasky struct mtx e_drain_mtx;
87131b2b76SHans Petter Selasky volatile int e_drain_count;
88dd902d01SGleb Smirnoff const char *e_name;
89a82296c2SGleb Smirnoff };
90a82296c2SGleb Smirnoff
9106bf2a6aSMatt Macy /* arbitrary --- needs benchmarking */
929fec45d8SMatt Macy #define MAX_ADAPTIVE_SPIN 100
93c4d901e9SMatt Macy #define MAX_EPOCHS 64
9406bf2a6aSMatt Macy
952a45e828SMatt Macy CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
967029da5cSPawel Biernacki SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
977029da5cSPawel Biernacki "epoch information");
987029da5cSPawel Biernacki SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
997029da5cSPawel Biernacki "epoch stats");
10006bf2a6aSMatt Macy
10106bf2a6aSMatt Macy /* Stats. */
10206bf2a6aSMatt Macy static counter_u64_t block_count;
103e445381fSMatt Macy
10406bf2a6aSMatt Macy SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW,
10506bf2a6aSMatt Macy &block_count, "# of times a thread was in an epoch when epoch_wait was called");
10606bf2a6aSMatt Macy static counter_u64_t migrate_count;
107e445381fSMatt Macy
10806bf2a6aSMatt Macy SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW,
10906bf2a6aSMatt Macy &migrate_count, "# of times thread was migrated to another CPU in epoch_wait");
11006bf2a6aSMatt Macy static counter_u64_t turnstile_count;
111e445381fSMatt Macy
11206bf2a6aSMatt Macy SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW,
11306bf2a6aSMatt Macy &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait");
11406bf2a6aSMatt Macy static counter_u64_t switch_count;
115e445381fSMatt Macy
11606bf2a6aSMatt Macy SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
11706bf2a6aSMatt Macy &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
1185e68a3dfSMatt Macy static counter_u64_t epoch_call_count;
119e445381fSMatt Macy
1205e68a3dfSMatt Macy SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
1215e68a3dfSMatt Macy &epoch_call_count, "# of times a callback was deferred");
1225e68a3dfSMatt Macy static counter_u64_t epoch_call_task_count;
123e445381fSMatt Macy
1245e68a3dfSMatt Macy SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
1255e68a3dfSMatt Macy &epoch_call_task_count, "# of times a callback task was run");
12606bf2a6aSMatt Macy
12706bf2a6aSMatt Macy TAILQ_HEAD (threadlist, thread);
12806bf2a6aSMatt Macy
1292a45e828SMatt Macy CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
1302a45e828SMatt Macy ck_epoch_entry_container)
13106bf2a6aSMatt Macy
132826c0793SHans Petter Selasky static struct epoch epoch_array[MAX_EPOCHS];
133c4d901e9SMatt Macy
134a5f10424SMatt Macy DPCPU_DEFINE(struct grouptask, epoch_cb_task);
135a5f10424SMatt Macy DPCPU_DEFINE(int, epoch_cb_count);
136c4d901e9SMatt Macy
13706bf2a6aSMatt Macy static __read_mostly int inited;
1381f4beb63SMatt Macy __read_mostly epoch_t global_epoch;
13970398c2fSMatt Macy __read_mostly epoch_t global_epoch_preempt;
14006bf2a6aSMatt Macy
141c4d901e9SMatt Macy static void epoch_call_task(void *context __unused);
142822e50e3SMatt Macy static uma_zone_t pcpu_zone_record;
14306bf2a6aSMatt Macy
144826c0793SHans Petter Selasky static struct sx epoch_sx;
145826c0793SHans Petter Selasky
146826c0793SHans Petter Selasky #define EPOCH_LOCK() sx_xlock(&epoch_sx)
147826c0793SHans Petter Selasky #define EPOCH_UNLOCK() sx_xunlock(&epoch_sx)
148826c0793SHans Petter Selasky
149db0ac6deSCy Schubert static epoch_record_t
epoch_currecord(epoch_t epoch)150db0ac6deSCy Schubert epoch_currecord(epoch_t epoch)
151db0ac6deSCy Schubert {
152db0ac6deSCy Schubert
153db0ac6deSCy Schubert return (zpcpu_get(epoch->e_pcpu_record));
154db0ac6deSCy Schubert }
155db0ac6deSCy Schubert
156dd902d01SGleb Smirnoff #ifdef EPOCH_TRACE
157dd902d01SGleb Smirnoff struct stackentry {
158dd902d01SGleb Smirnoff RB_ENTRY(stackentry) se_node;
159dd902d01SGleb Smirnoff struct stack se_stack;
160dd902d01SGleb Smirnoff };
161dd902d01SGleb Smirnoff
162dd902d01SGleb Smirnoff static int
stackentry_compare(struct stackentry * a,struct stackentry * b)163dd902d01SGleb Smirnoff stackentry_compare(struct stackentry *a, struct stackentry *b)
164dd902d01SGleb Smirnoff {
165dd902d01SGleb Smirnoff
166dd902d01SGleb Smirnoff if (a->se_stack.depth > b->se_stack.depth)
167dd902d01SGleb Smirnoff return (1);
168dd902d01SGleb Smirnoff if (a->se_stack.depth < b->se_stack.depth)
169dd902d01SGleb Smirnoff return (-1);
170dd902d01SGleb Smirnoff for (int i = 0; i < a->se_stack.depth; i++) {
171dd902d01SGleb Smirnoff if (a->se_stack.pcs[i] > b->se_stack.pcs[i])
172dd902d01SGleb Smirnoff return (1);
173dd902d01SGleb Smirnoff if (a->se_stack.pcs[i] < b->se_stack.pcs[i])
174dd902d01SGleb Smirnoff return (-1);
175dd902d01SGleb Smirnoff }
176dd902d01SGleb Smirnoff
177dd902d01SGleb Smirnoff return (0);
178dd902d01SGleb Smirnoff }
179dd902d01SGleb Smirnoff
180dd902d01SGleb Smirnoff RB_HEAD(stacktree, stackentry) epoch_stacks = RB_INITIALIZER(&epoch_stacks);
181dd902d01SGleb Smirnoff RB_GENERATE_STATIC(stacktree, stackentry, se_node, stackentry_compare);
182dd902d01SGleb Smirnoff
183dd902d01SGleb Smirnoff static struct mtx epoch_stacks_lock;
184dd902d01SGleb Smirnoff MTX_SYSINIT(epochstacks, &epoch_stacks_lock, "epoch_stacks", MTX_DEF);
185dd902d01SGleb Smirnoff
186173c062aSBjoern A. Zeeb static bool epoch_trace_stack_print = true;
187173c062aSBjoern A. Zeeb SYSCTL_BOOL(_kern_epoch, OID_AUTO, trace_stack_print, CTLFLAG_RWTUN,
188173c062aSBjoern A. Zeeb &epoch_trace_stack_print, 0, "Print stack traces on epoch reports");
189173c062aSBjoern A. Zeeb
190dd902d01SGleb Smirnoff static void epoch_trace_report(const char *fmt, ...) __printflike(1, 2);
191dd902d01SGleb Smirnoff static inline void
epoch_trace_report(const char * fmt,...)192dd902d01SGleb Smirnoff epoch_trace_report(const char *fmt, ...)
193dd902d01SGleb Smirnoff {
194dd902d01SGleb Smirnoff va_list ap;
195dd902d01SGleb Smirnoff struct stackentry se, *new;
196dd902d01SGleb Smirnoff
197dd902d01SGleb Smirnoff stack_save(&se.se_stack);
198dd902d01SGleb Smirnoff
199dd902d01SGleb Smirnoff /* Tree is never reduced - go lockless. */
200dd902d01SGleb Smirnoff if (RB_FIND(stacktree, &epoch_stacks, &se) != NULL)
201dd902d01SGleb Smirnoff return;
202dd902d01SGleb Smirnoff
203dd902d01SGleb Smirnoff new = malloc(sizeof(*new), M_STACK, M_NOWAIT);
204dd902d01SGleb Smirnoff if (new != NULL) {
205dd902d01SGleb Smirnoff bcopy(&se.se_stack, &new->se_stack, sizeof(struct stack));
206dd902d01SGleb Smirnoff
207dd902d01SGleb Smirnoff mtx_lock(&epoch_stacks_lock);
208dd902d01SGleb Smirnoff new = RB_INSERT(stacktree, &epoch_stacks, new);
209dd902d01SGleb Smirnoff mtx_unlock(&epoch_stacks_lock);
210dd902d01SGleb Smirnoff if (new != NULL)
211dd902d01SGleb Smirnoff free(new, M_STACK);
212dd902d01SGleb Smirnoff }
213dd902d01SGleb Smirnoff
214dd902d01SGleb Smirnoff va_start(ap, fmt);
215dd902d01SGleb Smirnoff (void)vprintf(fmt, ap);
216dd902d01SGleb Smirnoff va_end(ap);
217173c062aSBjoern A. Zeeb if (epoch_trace_stack_print)
218dd902d01SGleb Smirnoff stack_print_ddb(&se.se_stack);
219dd902d01SGleb Smirnoff }
220dd902d01SGleb Smirnoff
221dd902d01SGleb Smirnoff static inline void
epoch_trace_enter(struct thread * td,epoch_t epoch,epoch_tracker_t et,const char * file,int line)222dd902d01SGleb Smirnoff epoch_trace_enter(struct thread *td, epoch_t epoch, epoch_tracker_t et,
223dd902d01SGleb Smirnoff const char *file, int line)
224dd902d01SGleb Smirnoff {
225dd902d01SGleb Smirnoff epoch_tracker_t iet;
226dd902d01SGleb Smirnoff
227cc9bb7a9SHans Petter Selasky SLIST_FOREACH(iet, &td->td_epochs, et_tlink) {
228cc9bb7a9SHans Petter Selasky if (iet->et_epoch != epoch)
229cc9bb7a9SHans Petter Selasky continue;
230dd902d01SGleb Smirnoff epoch_trace_report("Recursively entering epoch %s "
231173c062aSBjoern A. Zeeb "at %s:%d, previously entered at %s:%d\n",
232173c062aSBjoern A. Zeeb epoch->e_name, file, line,
233173c062aSBjoern A. Zeeb iet->et_file, iet->et_line);
234cc9bb7a9SHans Petter Selasky }
235dd902d01SGleb Smirnoff et->et_epoch = epoch;
236dd902d01SGleb Smirnoff et->et_file = file;
237dd902d01SGleb Smirnoff et->et_line = line;
238db0ac6deSCy Schubert et->et_flags = 0;
239dd902d01SGleb Smirnoff SLIST_INSERT_HEAD(&td->td_epochs, et, et_tlink);
240dd902d01SGleb Smirnoff }
241dd902d01SGleb Smirnoff
242dd902d01SGleb Smirnoff static inline void
epoch_trace_exit(struct thread * td,epoch_t epoch,epoch_tracker_t et,const char * file,int line)243dd902d01SGleb Smirnoff epoch_trace_exit(struct thread *td, epoch_t epoch, epoch_tracker_t et,
244dd902d01SGleb Smirnoff const char *file, int line)
245dd902d01SGleb Smirnoff {
246dd902d01SGleb Smirnoff
247dd902d01SGleb Smirnoff if (SLIST_FIRST(&td->td_epochs) != et) {
248173c062aSBjoern A. Zeeb epoch_trace_report("Exiting epoch %s in a not nested order "
249173c062aSBjoern A. Zeeb "at %s:%d. Most recently entered %s at %s:%d\n",
250dd902d01SGleb Smirnoff epoch->e_name,
251173c062aSBjoern A. Zeeb file, line,
252dd902d01SGleb Smirnoff SLIST_FIRST(&td->td_epochs)->et_epoch->e_name,
253dd902d01SGleb Smirnoff SLIST_FIRST(&td->td_epochs)->et_file,
254dd902d01SGleb Smirnoff SLIST_FIRST(&td->td_epochs)->et_line);
255dd902d01SGleb Smirnoff /* This will panic if et is not anywhere on td_epochs. */
256dd902d01SGleb Smirnoff SLIST_REMOVE(&td->td_epochs, et, epoch_tracker, et_tlink);
257dd902d01SGleb Smirnoff } else
258dd902d01SGleb Smirnoff SLIST_REMOVE_HEAD(&td->td_epochs, et_tlink);
259db0ac6deSCy Schubert if (et->et_flags & ET_REPORT_EXIT)
260db0ac6deSCy Schubert printf("Td %p exiting epoch %s at %s:%d\n", td, epoch->e_name,
261db0ac6deSCy Schubert file, line);
262dd902d01SGleb Smirnoff }
263bac06038SGleb Smirnoff
264bac06038SGleb Smirnoff /* Used by assertions that check thread state before going to sleep. */
265bac06038SGleb Smirnoff void
epoch_trace_list(struct thread * td)266bac06038SGleb Smirnoff epoch_trace_list(struct thread *td)
267bac06038SGleb Smirnoff {
268bac06038SGleb Smirnoff epoch_tracker_t iet;
269bac06038SGleb Smirnoff
270bac06038SGleb Smirnoff SLIST_FOREACH(iet, &td->td_epochs, et_tlink)
271bac06038SGleb Smirnoff printf("Epoch %s entered at %s:%d\n", iet->et_epoch->e_name,
272bac06038SGleb Smirnoff iet->et_file, iet->et_line);
273bac06038SGleb Smirnoff }
274db0ac6deSCy Schubert
275db0ac6deSCy Schubert void
epoch_where_report(epoch_t epoch)276db0ac6deSCy Schubert epoch_where_report(epoch_t epoch)
277db0ac6deSCy Schubert {
278db0ac6deSCy Schubert epoch_record_t er;
279db0ac6deSCy Schubert struct epoch_tracker *tdwait;
280db0ac6deSCy Schubert
281db0ac6deSCy Schubert MPASS(epoch != NULL);
282db0ac6deSCy Schubert MPASS((epoch->e_flags & EPOCH_PREEMPT) != 0);
283db0ac6deSCy Schubert MPASS(!THREAD_CAN_SLEEP());
284db0ac6deSCy Schubert critical_enter();
285db0ac6deSCy Schubert er = epoch_currecord(epoch);
286db0ac6deSCy Schubert TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
287db0ac6deSCy Schubert if (tdwait->et_td == curthread)
288db0ac6deSCy Schubert break;
289db0ac6deSCy Schubert critical_exit();
290db0ac6deSCy Schubert if (tdwait != NULL) {
291db0ac6deSCy Schubert tdwait->et_flags |= ET_REPORT_EXIT;
292db0ac6deSCy Schubert printf("Td %p entered epoch %s at %s:%d\n", curthread,
293db0ac6deSCy Schubert epoch->e_name, tdwait->et_file, tdwait->et_line);
294db0ac6deSCy Schubert }
295db0ac6deSCy Schubert }
296dd902d01SGleb Smirnoff #endif /* EPOCH_TRACE */
297dd902d01SGleb Smirnoff
29806bf2a6aSMatt Macy static void
epoch_init(void * arg __unused)29906bf2a6aSMatt Macy epoch_init(void *arg __unused)
30006bf2a6aSMatt Macy {
301822e50e3SMatt Macy int cpu;
302b2cb2896SMatt Macy
303b2cb2896SMatt Macy block_count = counter_u64_alloc(M_WAITOK);
304b2cb2896SMatt Macy migrate_count = counter_u64_alloc(M_WAITOK);
305b2cb2896SMatt Macy turnstile_count = counter_u64_alloc(M_WAITOK);
306b2cb2896SMatt Macy switch_count = counter_u64_alloc(M_WAITOK);
30760b7b90dSMatt Macy epoch_call_count = counter_u64_alloc(M_WAITOK);
30860b7b90dSMatt Macy epoch_call_task_count = counter_u64_alloc(M_WAITOK);
30906bf2a6aSMatt Macy
310635c1884SGleb Smirnoff pcpu_zone_record = uma_zcreate("epoch_record pcpu",
311635c1884SGleb Smirnoff sizeof(struct epoch_record), NULL, NULL, NULL, NULL,
312635c1884SGleb Smirnoff UMA_ALIGN_PTR, UMA_ZONE_PCPU);
313c4d901e9SMatt Macy CPU_FOREACH(cpu) {
314635c1884SGleb Smirnoff GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0,
315635c1884SGleb Smirnoff epoch_call_task, NULL);
316635c1884SGleb Smirnoff taskqgroup_attach_cpu(qgroup_softirq,
317f855ec81SMarius Strobl DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, NULL, NULL,
318635c1884SGleb Smirnoff "epoch call task");
319c4d901e9SMatt Macy }
320f6eccf96SGleb Smirnoff #ifdef EPOCH_TRACE
321dd902d01SGleb Smirnoff SLIST_INIT(&thread0.td_epochs);
322f6eccf96SGleb Smirnoff #endif
323826c0793SHans Petter Selasky sx_init(&epoch_sx, "epoch-sx");
32406bf2a6aSMatt Macy inited = 1;
325dd902d01SGleb Smirnoff global_epoch = epoch_alloc("Global", 0);
326dd902d01SGleb Smirnoff global_epoch_preempt = epoch_alloc("Global preemptible", EPOCH_PREEMPT);
32706bf2a6aSMatt Macy }
3287993a104SConrad Meyer SYSINIT(epoch, SI_SUB_EPOCH, SI_ORDER_FIRST, epoch_init, NULL);
32906bf2a6aSMatt Macy
3300bcfb473SMatt Macy #if !defined(EARLY_AP_STARTUP)
3310bcfb473SMatt Macy static void
epoch_init_smp(void * dummy __unused)3320bcfb473SMatt Macy epoch_init_smp(void *dummy __unused)
3330bcfb473SMatt Macy {
3340bcfb473SMatt Macy inited = 2;
3350bcfb473SMatt Macy }
3360bcfb473SMatt Macy SYSINIT(epoch_smp, SI_SUB_SMP + 1, SI_ORDER_FIRST, epoch_init_smp, NULL);
3370bcfb473SMatt Macy #endif
3380bcfb473SMatt Macy
33906bf2a6aSMatt Macy static void
epoch_ctor(epoch_t epoch)340822e50e3SMatt Macy epoch_ctor(epoch_t epoch)
34106bf2a6aSMatt Macy {
34206bf2a6aSMatt Macy epoch_record_t er;
343822e50e3SMatt Macy int cpu;
34406bf2a6aSMatt Macy
345822e50e3SMatt Macy epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK);
346822e50e3SMatt Macy CPU_FOREACH(cpu) {
347822e50e3SMatt Macy er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
348822e50e3SMatt Macy bzero(er, sizeof(*er));
34991cf4975SMatt Macy ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
35006bf2a6aSMatt Macy TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
351822e50e3SMatt Macy er->er_cpuid = cpu;
352131b2b76SHans Petter Selasky er->er_parent = epoch;
35306bf2a6aSMatt Macy }
35406bf2a6aSMatt Macy }
35506bf2a6aSMatt Macy
356a82296c2SGleb Smirnoff static void
epoch_adjust_prio(struct thread * td,u_char prio)357a82296c2SGleb Smirnoff epoch_adjust_prio(struct thread *td, u_char prio)
358a82296c2SGleb Smirnoff {
359a82296c2SGleb Smirnoff
360a82296c2SGleb Smirnoff thread_lock(td);
361a82296c2SGleb Smirnoff sched_prio(td, prio);
362a82296c2SGleb Smirnoff thread_unlock(td);
363a82296c2SGleb Smirnoff }
364a82296c2SGleb Smirnoff
36506bf2a6aSMatt Macy epoch_t
epoch_alloc(const char * name,int flags)366dd902d01SGleb Smirnoff epoch_alloc(const char *name, int flags)
36706bf2a6aSMatt Macy {
36806bf2a6aSMatt Macy epoch_t epoch;
369826c0793SHans Petter Selasky int i;
370826c0793SHans Petter Selasky
371826c0793SHans Petter Selasky MPASS(name != NULL);
37206bf2a6aSMatt Macy
37306bf2a6aSMatt Macy if (__predict_false(!inited))
37406bf2a6aSMatt Macy panic("%s called too early in boot", __func__);
375826c0793SHans Petter Selasky
376826c0793SHans Petter Selasky EPOCH_LOCK();
377826c0793SHans Petter Selasky
378826c0793SHans Petter Selasky /*
379826c0793SHans Petter Selasky * Find a free index in the epoch array. If no free index is
380826c0793SHans Petter Selasky * found, try to use the index after the last one.
381826c0793SHans Petter Selasky */
382826c0793SHans Petter Selasky for (i = 0;; i++) {
383826c0793SHans Petter Selasky /*
384826c0793SHans Petter Selasky * If too many epochs are currently allocated,
385826c0793SHans Petter Selasky * return NULL.
386826c0793SHans Petter Selasky */
387826c0793SHans Petter Selasky if (i == MAX_EPOCHS) {
388826c0793SHans Petter Selasky epoch = NULL;
389826c0793SHans Petter Selasky goto done;
390826c0793SHans Petter Selasky }
391826c0793SHans Petter Selasky if (epoch_array[i].e_in_use == 0)
392826c0793SHans Petter Selasky break;
393826c0793SHans Petter Selasky }
394826c0793SHans Petter Selasky
395826c0793SHans Petter Selasky epoch = epoch_array + i;
39606bf2a6aSMatt Macy ck_epoch_init(&epoch->e_epoch);
397822e50e3SMatt Macy epoch_ctor(epoch);
3985e68a3dfSMatt Macy epoch->e_flags = flags;
399dd902d01SGleb Smirnoff epoch->e_name = name;
400131b2b76SHans Petter Selasky sx_init(&epoch->e_drain_sx, "epoch-drain-sx");
401131b2b76SHans Petter Selasky mtx_init(&epoch->e_drain_mtx, "epoch-drain-mtx", NULL, MTX_DEF);
402826c0793SHans Petter Selasky
403826c0793SHans Petter Selasky /*
404826c0793SHans Petter Selasky * Set e_in_use last, because when this field is set the
405826c0793SHans Petter Selasky * epoch_call_task() function will start scanning this epoch
406826c0793SHans Petter Selasky * structure.
407826c0793SHans Petter Selasky */
408826c0793SHans Petter Selasky atomic_store_rel_int(&epoch->e_in_use, 1);
409826c0793SHans Petter Selasky done:
410826c0793SHans Petter Selasky EPOCH_UNLOCK();
41106bf2a6aSMatt Macy return (epoch);
41206bf2a6aSMatt Macy }
41306bf2a6aSMatt Macy
41406bf2a6aSMatt Macy void
epoch_free(epoch_t epoch)41506bf2a6aSMatt Macy epoch_free(epoch_t epoch)
41606bf2a6aSMatt Macy {
4177667824aSKyle Evans #ifdef INVARIANTS
4187667824aSKyle Evans int cpu;
4197667824aSKyle Evans #endif
42006bf2a6aSMatt Macy
421826c0793SHans Petter Selasky EPOCH_LOCK();
422826c0793SHans Petter Selasky
423826c0793SHans Petter Selasky MPASS(epoch->e_in_use != 0);
424826c0793SHans Petter Selasky
425131b2b76SHans Petter Selasky epoch_drain_callbacks(epoch);
426826c0793SHans Petter Selasky
427826c0793SHans Petter Selasky atomic_store_rel_int(&epoch->e_in_use, 0);
428826c0793SHans Petter Selasky /*
429826c0793SHans Petter Selasky * Make sure the epoch_call_task() function see e_in_use equal
430826c0793SHans Petter Selasky * to zero, by calling epoch_wait() on the global_epoch:
431826c0793SHans Petter Selasky */
43270398c2fSMatt Macy epoch_wait(global_epoch);
4337667824aSKyle Evans #ifdef INVARIANTS
4347667824aSKyle Evans CPU_FOREACH(cpu) {
4357667824aSKyle Evans epoch_record_t er;
4367667824aSKyle Evans
4377667824aSKyle Evans er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
4387667824aSKyle Evans
4397667824aSKyle Evans /*
4407667824aSKyle Evans * Sanity check: none of the records should be in use anymore.
4417667824aSKyle Evans * We drained callbacks above and freeing the pcpu records is
4427667824aSKyle Evans * imminent.
4437667824aSKyle Evans */
4447667824aSKyle Evans MPASS(er->er_td == NULL);
4457667824aSKyle Evans MPASS(TAILQ_EMPTY(&er->er_tdlist));
4467667824aSKyle Evans }
4477667824aSKyle Evans #endif
448822e50e3SMatt Macy uma_zfree_pcpu(pcpu_zone_record, epoch->e_pcpu_record);
449131b2b76SHans Petter Selasky mtx_destroy(&epoch->e_drain_mtx);
450131b2b76SHans Petter Selasky sx_destroy(&epoch->e_drain_sx);
451826c0793SHans Petter Selasky memset(epoch, 0, sizeof(*epoch));
452826c0793SHans Petter Selasky
453826c0793SHans Petter Selasky EPOCH_UNLOCK();
45406bf2a6aSMatt Macy }
45506bf2a6aSMatt Macy
456a82296c2SGleb Smirnoff #define INIT_CHECK(epoch) \
457a82296c2SGleb Smirnoff do { \
458a82296c2SGleb Smirnoff if (__predict_false((epoch) == NULL)) \
459a82296c2SGleb Smirnoff return; \
460a82296c2SGleb Smirnoff } while (0)
461a82296c2SGleb Smirnoff
462a82296c2SGleb Smirnoff void
_epoch_enter_preempt(epoch_t epoch,epoch_tracker_t et EPOCH_FILE_LINE)463dd902d01SGleb Smirnoff _epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
464a82296c2SGleb Smirnoff {
465a82296c2SGleb Smirnoff struct epoch_record *er;
466a760c50cSGleb Smirnoff struct thread *td;
467a82296c2SGleb Smirnoff
468a82296c2SGleb Smirnoff MPASS(cold || epoch != NULL);
469a760c50cSGleb Smirnoff td = curthread;
470fa1d803cSBrooks Davis MPASS(kstack_contains(td, (vm_offset_t)et, sizeof(*et)));
47177d70e51SGleb Smirnoff
47277d70e51SGleb Smirnoff INIT_CHECK(epoch);
473c82c2006SHans Petter Selasky MPASS(epoch->e_flags & EPOCH_PREEMPT);
474c82c2006SHans Petter Selasky
475dd902d01SGleb Smirnoff #ifdef EPOCH_TRACE
476dd902d01SGleb Smirnoff epoch_trace_enter(td, epoch, et, file, line);
477dd902d01SGleb Smirnoff #endif
478a760c50cSGleb Smirnoff et->et_td = td;
4795757b59fSGleb Smirnoff THREAD_NO_SLEEPING();
480a82296c2SGleb Smirnoff critical_enter();
481a760c50cSGleb Smirnoff sched_pin();
482ef0f7ae9SHans Petter Selasky et->et_old_priority = td->td_priority;
483a82296c2SGleb Smirnoff er = epoch_currecord(epoch);
4847667824aSKyle Evans /* Record-level tracking is reserved for non-preemptible epochs. */
4857667824aSKyle Evans MPASS(er->er_td == NULL);
4869f360eecSGleb Smirnoff TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link);
48791cf4975SMatt Macy ck_epoch_begin(&er->er_record, &et->et_section);
488a82296c2SGleb Smirnoff critical_exit();
489c4d901e9SMatt Macy }
490c4d901e9SMatt Macy
49106bf2a6aSMatt Macy void
epoch_enter(epoch_t epoch)492a82296c2SGleb Smirnoff epoch_enter(epoch_t epoch)
49306bf2a6aSMatt Macy {
494a82296c2SGleb Smirnoff epoch_record_t er;
49506bf2a6aSMatt Macy
496a82296c2SGleb Smirnoff MPASS(cold || epoch != NULL);
497a82296c2SGleb Smirnoff INIT_CHECK(epoch);
498a82296c2SGleb Smirnoff critical_enter();
499a82296c2SGleb Smirnoff er = epoch_currecord(epoch);
5007667824aSKyle Evans #ifdef INVARIANTS
5017667824aSKyle Evans if (er->er_record.active == 0) {
5027667824aSKyle Evans MPASS(er->er_td == NULL);
5037667824aSKyle Evans er->er_td = curthread;
5047667824aSKyle Evans } else {
5057667824aSKyle Evans /* We've recursed, just make sure our accounting isn't wrong. */
5067667824aSKyle Evans MPASS(er->er_td == curthread);
5077667824aSKyle Evans }
5087667824aSKyle Evans #endif
50991cf4975SMatt Macy ck_epoch_begin(&er->er_record, NULL);
51006bf2a6aSMatt Macy }
51106bf2a6aSMatt Macy
5125e68a3dfSMatt Macy void
_epoch_exit_preempt(epoch_t epoch,epoch_tracker_t et EPOCH_FILE_LINE)513dd902d01SGleb Smirnoff _epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
514c4d901e9SMatt Macy {
515a82296c2SGleb Smirnoff struct epoch_record *er;
516a760c50cSGleb Smirnoff struct thread *td;
517c4d901e9SMatt Macy
518a82296c2SGleb Smirnoff INIT_CHECK(epoch);
519a760c50cSGleb Smirnoff td = curthread;
520a82296c2SGleb Smirnoff critical_enter();
521a760c50cSGleb Smirnoff sched_unpin();
5225757b59fSGleb Smirnoff THREAD_SLEEPING_OK();
523a82296c2SGleb Smirnoff er = epoch_currecord(epoch);
524a82296c2SGleb Smirnoff MPASS(epoch->e_flags & EPOCH_PREEMPT);
5259f360eecSGleb Smirnoff MPASS(et != NULL);
526a760c50cSGleb Smirnoff MPASS(et->et_td == td);
5279f360eecSGleb Smirnoff #ifdef INVARIANTS
5289f360eecSGleb Smirnoff et->et_td = (void*)0xDEADBEEF;
5297667824aSKyle Evans /* Record-level tracking is reserved for non-preemptible epochs. */
5307667824aSKyle Evans MPASS(er->er_td == NULL);
5319f360eecSGleb Smirnoff #endif
53291cf4975SMatt Macy ck_epoch_end(&er->er_record, &et->et_section);
5339f360eecSGleb Smirnoff TAILQ_REMOVE(&er->er_tdlist, et, et_link);
534a82296c2SGleb Smirnoff er->er_gen++;
535ef0f7ae9SHans Petter Selasky if (__predict_false(et->et_old_priority != td->td_priority))
536ef0f7ae9SHans Petter Selasky epoch_adjust_prio(td, et->et_old_priority);
537a82296c2SGleb Smirnoff critical_exit();
538dd902d01SGleb Smirnoff #ifdef EPOCH_TRACE
539dd902d01SGleb Smirnoff epoch_trace_exit(td, epoch, et, file, line);
540dd902d01SGleb Smirnoff #endif
5416573d758SMatt Macy }
5426573d758SMatt Macy
5436573d758SMatt Macy void
epoch_exit(epoch_t epoch)544a82296c2SGleb Smirnoff epoch_exit(epoch_t epoch)
5456573d758SMatt Macy {
546a82296c2SGleb Smirnoff epoch_record_t er;
5476573d758SMatt Macy
548a82296c2SGleb Smirnoff INIT_CHECK(epoch);
549a82296c2SGleb Smirnoff er = epoch_currecord(epoch);
55091cf4975SMatt Macy ck_epoch_end(&er->er_record, NULL);
5517667824aSKyle Evans #ifdef INVARIANTS
5527667824aSKyle Evans MPASS(er->er_td == curthread);
5537667824aSKyle Evans if (er->er_record.active == 0)
5547667824aSKyle Evans er->er_td = NULL;
5557667824aSKyle Evans #endif
556a82296c2SGleb Smirnoff critical_exit();
557c4d901e9SMatt Macy }
558c4d901e9SMatt Macy
55906bf2a6aSMatt Macy /*
560635c1884SGleb Smirnoff * epoch_block_handler_preempt() is a callback from the CK code when another
561635c1884SGleb Smirnoff * thread is currently in an epoch section.
56206bf2a6aSMatt Macy */
56306bf2a6aSMatt Macy static void
epoch_block_handler_preempt(struct ck_epoch * global __unused,ck_epoch_record_t * cr,void * arg __unused)564635c1884SGleb Smirnoff epoch_block_handler_preempt(struct ck_epoch *global __unused,
565635c1884SGleb Smirnoff ck_epoch_record_t *cr, void *arg __unused)
56606bf2a6aSMatt Macy {
56706bf2a6aSMatt Macy epoch_record_t record;
5686573d758SMatt Macy struct thread *td, *owner, *curwaittd;
5699f360eecSGleb Smirnoff struct epoch_tracker *tdwait;
57006bf2a6aSMatt Macy struct turnstile *ts;
57106bf2a6aSMatt Macy struct lock_object *lock;
572b2cb2896SMatt Macy int spincount, gen;
57374333b3dSMatt Macy int locksheld __unused;
57406bf2a6aSMatt Macy
57591cf4975SMatt Macy record = __containerof(cr, struct epoch_record, er_record);
57606bf2a6aSMatt Macy td = curthread;
57774333b3dSMatt Macy locksheld = td->td_locks;
57806bf2a6aSMatt Macy spincount = 0;
57906bf2a6aSMatt Macy counter_u64_add(block_count, 1);
5809fec45d8SMatt Macy /*
5819fec45d8SMatt Macy * We lost a race and there's no longer any threads
5829fec45d8SMatt Macy * on the CPU in an epoch section.
5839fec45d8SMatt Macy */
5849fec45d8SMatt Macy if (TAILQ_EMPTY(&record->er_tdlist))
5859fec45d8SMatt Macy return;
5869fec45d8SMatt Macy
58706bf2a6aSMatt Macy if (record->er_cpuid != curcpu) {
58806bf2a6aSMatt Macy /*
58906bf2a6aSMatt Macy * If the head of the list is running, we can wait for it
59006bf2a6aSMatt Macy * to remove itself from the list and thus save us the
59106bf2a6aSMatt Macy * overhead of a migration
59206bf2a6aSMatt Macy */
593b2cb2896SMatt Macy gen = record->er_gen;
594b2cb2896SMatt Macy thread_unlock(td);
5959fec45d8SMatt Macy /*
5969fec45d8SMatt Macy * We can't actually check if the waiting thread is running
5979fec45d8SMatt Macy * so we simply poll for it to exit before giving up and
5989fec45d8SMatt Macy * migrating.
5999fec45d8SMatt Macy */
600b2cb2896SMatt Macy do {
60106bf2a6aSMatt Macy cpu_spinwait();
6029fec45d8SMatt Macy } while (!TAILQ_EMPTY(&record->er_tdlist) &&
6039fec45d8SMatt Macy gen == record->er_gen &&
604b2cb2896SMatt Macy spincount++ < MAX_ADAPTIVE_SPIN);
605b2cb2896SMatt Macy thread_lock(td);
6069fec45d8SMatt Macy /*
6079fec45d8SMatt Macy * If the generation has changed we can poll again
6089fec45d8SMatt Macy * otherwise we need to migrate.
6099fec45d8SMatt Macy */
6109fec45d8SMatt Macy if (gen != record->er_gen)
61106bf2a6aSMatt Macy return;
61206bf2a6aSMatt Macy /*
61306bf2a6aSMatt Macy * Being on the same CPU as that of the record on which
61406bf2a6aSMatt Macy * we need to wait allows us access to the thread
61506bf2a6aSMatt Macy * list associated with that CPU. We can then examine the
61606bf2a6aSMatt Macy * oldest thread in the queue and wait on its turnstile
61706bf2a6aSMatt Macy * until it resumes and so on until a grace period
61806bf2a6aSMatt Macy * elapses.
61906bf2a6aSMatt Macy *
62006bf2a6aSMatt Macy */
62106bf2a6aSMatt Macy counter_u64_add(migrate_count, 1);
62206bf2a6aSMatt Macy sched_bind(td, record->er_cpuid);
62306bf2a6aSMatt Macy /*
62406bf2a6aSMatt Macy * At this point we need to return to the ck code
62506bf2a6aSMatt Macy * to scan to see if a grace period has elapsed.
62606bf2a6aSMatt Macy * We can't move on to check the thread list, because
62706bf2a6aSMatt Macy * in the meantime new threads may have arrived that
62806bf2a6aSMatt Macy * in fact belong to a different epoch.
62906bf2a6aSMatt Macy */
63006bf2a6aSMatt Macy return;
63106bf2a6aSMatt Macy }
63206bf2a6aSMatt Macy /*
63306bf2a6aSMatt Macy * Try to find a thread in an epoch section on this CPU
63406bf2a6aSMatt Macy * waiting on a turnstile. Otherwise find the lowest
63506bf2a6aSMatt Macy * priority thread (highest prio value) and drop our priority
63606bf2a6aSMatt Macy * to match to allow it to run.
63706bf2a6aSMatt Macy */
6386573d758SMatt Macy TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
639b2cb2896SMatt Macy /*
640b2cb2896SMatt Macy * Propagate our priority to any other waiters to prevent us
641b2cb2896SMatt Macy * from starving them. They will have their original priority
642b2cb2896SMatt Macy * restore on exit from epoch_wait().
643b2cb2896SMatt Macy */
6446573d758SMatt Macy curwaittd = tdwait->et_td;
6456573d758SMatt Macy if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
646d1bcb409SMatt Macy critical_enter();
647d1bcb409SMatt Macy thread_unlock(td);
6486573d758SMatt Macy thread_lock(curwaittd);
6496573d758SMatt Macy sched_prio(curwaittd, td->td_priority);
6506573d758SMatt Macy thread_unlock(curwaittd);
651d1bcb409SMatt Macy thread_lock(td);
652d1bcb409SMatt Macy critical_exit();
653b2cb2896SMatt Macy }
6546573d758SMatt Macy if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
6556573d758SMatt Macy ((ts = curwaittd->td_blocked) != NULL)) {
65606bf2a6aSMatt Macy /*
657635c1884SGleb Smirnoff * We unlock td to allow turnstile_wait to reacquire
658635c1884SGleb Smirnoff * the thread lock. Before unlocking it we enter a
659635c1884SGleb Smirnoff * critical section to prevent preemption after we
660635c1884SGleb Smirnoff * reenable interrupts by dropping the thread lock in
661635c1884SGleb Smirnoff * order to prevent curwaittd from getting to run.
66206bf2a6aSMatt Macy */
66306bf2a6aSMatt Macy critical_enter();
66406bf2a6aSMatt Macy thread_unlock(td);
6652fb62b1aSMark Johnston
6662fb62b1aSMark Johnston if (turnstile_lock(ts, &lock, &owner)) {
6672fb62b1aSMark Johnston if (ts == curwaittd->td_blocked) {
668635c1884SGleb Smirnoff MPASS(TD_IS_INHIBITED(curwaittd) &&
669635c1884SGleb Smirnoff TD_ON_LOCK(curwaittd));
67006bf2a6aSMatt Macy critical_exit();
6712fb62b1aSMark Johnston turnstile_wait(ts, owner,
6722fb62b1aSMark Johnston curwaittd->td_tsqueue);
67306bf2a6aSMatt Macy counter_u64_add(turnstile_count, 1);
67406bf2a6aSMatt Macy thread_lock(td);
67506bf2a6aSMatt Macy return;
6762fb62b1aSMark Johnston }
67706bf2a6aSMatt Macy turnstile_unlock(ts, lock);
6782fb62b1aSMark Johnston }
67906bf2a6aSMatt Macy thread_lock(td);
68006bf2a6aSMatt Macy critical_exit();
68174333b3dSMatt Macy KASSERT(td->td_locks == locksheld,
68274333b3dSMatt Macy ("%d extra locks held", td->td_locks - locksheld));
68306bf2a6aSMatt Macy }
68406bf2a6aSMatt Macy }
68506bf2a6aSMatt Macy /*
68606bf2a6aSMatt Macy * We didn't find any threads actually blocked on a lock
687b2cb2896SMatt Macy * so we have nothing to do except context switch away.
68806bf2a6aSMatt Macy */
68906bf2a6aSMatt Macy counter_u64_add(switch_count, 1);
690686bcb5cSJeff Roberson mi_switch(SW_VOL | SWT_RELINQUISH);
691cc79ea3aSHans Petter Selasky /*
692cc79ea3aSHans Petter Selasky * It is important the thread lock is dropped while yielding
693cc79ea3aSHans Petter Selasky * to allow other threads to acquire the lock pointed to by
694cc79ea3aSHans Petter Selasky * TDQ_LOCKPTR(td). Currently mi_switch() will unlock the
695cc79ea3aSHans Petter Selasky * thread lock before returning. Else a deadlock like
696cc79ea3aSHans Petter Selasky * situation might happen.
697cc79ea3aSHans Petter Selasky */
69806bf2a6aSMatt Macy thread_lock(td);
69906bf2a6aSMatt Macy }
70006bf2a6aSMatt Macy
70106bf2a6aSMatt Macy void
epoch_wait_preempt(epoch_t epoch)70270398c2fSMatt Macy epoch_wait_preempt(epoch_t epoch)
70306bf2a6aSMatt Macy {
70406bf2a6aSMatt Macy struct thread *td;
70506bf2a6aSMatt Macy int was_bound;
70606bf2a6aSMatt Macy int old_cpu;
70706bf2a6aSMatt Macy int old_pinned;
70806bf2a6aSMatt Macy u_char old_prio;
70974333b3dSMatt Macy int locks __unused;
71020ba6811SMatt Macy
71120ba6811SMatt Macy MPASS(cold || epoch != NULL);
71206bf2a6aSMatt Macy INIT_CHECK(epoch);
71306bf2a6aSMatt Macy td = curthread;
71474333b3dSMatt Macy #ifdef INVARIANTS
71574333b3dSMatt Macy locks = curthread->td_locks;
71674333b3dSMatt Macy MPASS(epoch->e_flags & EPOCH_PREEMPT);
71774333b3dSMatt Macy if ((epoch->e_flags & EPOCH_LOCKED) == 0)
71874333b3dSMatt Macy WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
71974333b3dSMatt Macy "epoch_wait() can be long running");
720635c1884SGleb Smirnoff KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle "
72110b8cd7fSMatt Macy "of an epoch section of the same epoch"));
72274333b3dSMatt Macy #endif
72306bf2a6aSMatt Macy DROP_GIANT();
724fedab1b4SKonstantin Belousov thread_lock(td);
72506bf2a6aSMatt Macy
72606bf2a6aSMatt Macy old_cpu = PCPU_GET(cpuid);
72706bf2a6aSMatt Macy old_pinned = td->td_pinned;
72806bf2a6aSMatt Macy old_prio = td->td_priority;
72906bf2a6aSMatt Macy was_bound = sched_is_bound(td);
73006bf2a6aSMatt Macy sched_unbind(td);
73106bf2a6aSMatt Macy td->td_pinned = 0;
73206bf2a6aSMatt Macy sched_bind(td, old_cpu);
73306bf2a6aSMatt Macy
734635c1884SGleb Smirnoff ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt,
735635c1884SGleb Smirnoff NULL);
73606bf2a6aSMatt Macy
73706bf2a6aSMatt Macy /* restore CPU binding, if any */
73806bf2a6aSMatt Macy if (was_bound != 0) {
73906bf2a6aSMatt Macy sched_bind(td, old_cpu);
74006bf2a6aSMatt Macy } else {
74106bf2a6aSMatt Macy /* get thread back to initial CPU, if any */
74206bf2a6aSMatt Macy if (old_pinned != 0)
74306bf2a6aSMatt Macy sched_bind(td, old_cpu);
74406bf2a6aSMatt Macy sched_unbind(td);
74506bf2a6aSMatt Macy }
74606bf2a6aSMatt Macy /* restore pinned after bind */
74706bf2a6aSMatt Macy td->td_pinned = old_pinned;
74806bf2a6aSMatt Macy
74906bf2a6aSMatt Macy /* restore thread priority */
75006bf2a6aSMatt Macy sched_prio(td, old_prio);
75106bf2a6aSMatt Macy thread_unlock(td);
75206bf2a6aSMatt Macy PICKUP_GIANT();
7530c58f85bSMatt Macy KASSERT(td->td_locks == locks,
7540c58f85bSMatt Macy ("%d residual locks held", td->td_locks - locks));
75506bf2a6aSMatt Macy }
75606bf2a6aSMatt Macy
7575e68a3dfSMatt Macy static void
epoch_block_handler(struct ck_epoch * g __unused,ck_epoch_record_t * c __unused,void * arg __unused)75870398c2fSMatt Macy epoch_block_handler(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused,
7595e68a3dfSMatt Macy void *arg __unused)
7605e68a3dfSMatt Macy {
7615e68a3dfSMatt Macy cpu_spinwait();
7625e68a3dfSMatt Macy }
7635e68a3dfSMatt Macy
7645e68a3dfSMatt Macy void
epoch_wait(epoch_t epoch)76570398c2fSMatt Macy epoch_wait(epoch_t epoch)
7665e68a3dfSMatt Macy {
7675e68a3dfSMatt Macy
76820ba6811SMatt Macy MPASS(cold || epoch != NULL);
76920ba6811SMatt Macy INIT_CHECK(epoch);
77070398c2fSMatt Macy MPASS(epoch->e_flags == 0);
7715e68a3dfSMatt Macy critical_enter();
77270398c2fSMatt Macy ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler, NULL);
7735e68a3dfSMatt Macy critical_exit();
7745e68a3dfSMatt Macy }
7755e68a3dfSMatt Macy
77606bf2a6aSMatt Macy void
epoch_call(epoch_t epoch,epoch_callback_t callback,epoch_context_t ctx)77766c6c556SGleb Smirnoff epoch_call(epoch_t epoch, epoch_callback_t callback, epoch_context_t ctx)
77806bf2a6aSMatt Macy {
7796573d758SMatt Macy epoch_record_t er;
7802a45e828SMatt Macy ck_epoch_entry_t *cb;
78106bf2a6aSMatt Macy
78206bf2a6aSMatt Macy cb = (void *)ctx;
783b2cb2896SMatt Macy
7841f4beb63SMatt Macy MPASS(callback);
7851f4beb63SMatt Macy /* too early in boot to have epoch set up */
7862a45e828SMatt Macy if (__predict_false(epoch == NULL))
7872a45e828SMatt Macy goto boottime;
7880bcfb473SMatt Macy #if !defined(EARLY_AP_STARTUP)
7890bcfb473SMatt Macy if (__predict_false(inited < 2))
7900bcfb473SMatt Macy goto boottime;
7910bcfb473SMatt Macy #endif
7922a45e828SMatt Macy
793b2cb2896SMatt Macy critical_enter();
794a5f10424SMatt Macy *DPCPU_PTR(epoch_cb_count) += 1;
795822e50e3SMatt Macy er = epoch_currecord(epoch);
79691cf4975SMatt Macy ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
797b2cb2896SMatt Macy critical_exit();
7982a45e828SMatt Macy return;
7992a45e828SMatt Macy boottime:
8002a45e828SMatt Macy callback(ctx);
80106bf2a6aSMatt Macy }
80206bf2a6aSMatt Macy
803c4d901e9SMatt Macy static void
epoch_call_task(void * arg __unused)804c4d901e9SMatt Macy epoch_call_task(void *arg __unused)
805c4d901e9SMatt Macy {
806c4d901e9SMatt Macy ck_stack_entry_t *cursor, *head, *next;
807c4d901e9SMatt Macy ck_epoch_record_t *record;
808822e50e3SMatt Macy epoch_record_t er;
809c4d901e9SMatt Macy epoch_t epoch;
810c4d901e9SMatt Macy ck_stack_t cb_stack;
811c4d901e9SMatt Macy int i, npending, total;
812c4d901e9SMatt Macy
813c4d901e9SMatt Macy ck_stack_init(&cb_stack);
814c4d901e9SMatt Macy critical_enter();
81570398c2fSMatt Macy epoch_enter(global_epoch);
816826c0793SHans Petter Selasky for (total = i = 0; i != MAX_EPOCHS; i++) {
817826c0793SHans Petter Selasky epoch = epoch_array + i;
818826c0793SHans Petter Selasky if (__predict_false(
819826c0793SHans Petter Selasky atomic_load_acq_int(&epoch->e_in_use) == 0))
820c4d901e9SMatt Macy continue;
821822e50e3SMatt Macy er = epoch_currecord(epoch);
82291cf4975SMatt Macy record = &er->er_record;
823c4d901e9SMatt Macy if ((npending = record->n_pending) == 0)
824c4d901e9SMatt Macy continue;
825c4d901e9SMatt Macy ck_epoch_poll_deferred(record, &cb_stack);
826c4d901e9SMatt Macy total += npending - record->n_pending;
827b2cb2896SMatt Macy }
82870398c2fSMatt Macy epoch_exit(global_epoch);
829a5f10424SMatt Macy *DPCPU_PTR(epoch_cb_count) -= total;
830c4d901e9SMatt Macy critical_exit();
831c4d901e9SMatt Macy
8325e68a3dfSMatt Macy counter_u64_add(epoch_call_count, total);
8335e68a3dfSMatt Macy counter_u64_add(epoch_call_task_count, 1);
8345e68a3dfSMatt Macy
835c4d901e9SMatt Macy head = ck_stack_batch_pop_npsc(&cb_stack);
836c4d901e9SMatt Macy for (cursor = head; cursor != NULL; cursor = next) {
8372a45e828SMatt Macy struct ck_epoch_entry *entry =
8382a45e828SMatt Macy ck_epoch_entry_container(cursor);
839e445381fSMatt Macy
840c4d901e9SMatt Macy next = CK_STACK_NEXT(cursor);
8412a45e828SMatt Macy entry->function(entry);
84206bf2a6aSMatt Macy }
843b2cb2896SMatt Macy }
84406bf2a6aSMatt Macy
8457667824aSKyle Evans static int
in_epoch_verbose_preempt(epoch_t epoch,int dump_onfail)8467667824aSKyle Evans in_epoch_verbose_preempt(epoch_t epoch, int dump_onfail)
84706bf2a6aSMatt Macy {
8487667824aSKyle Evans epoch_record_t er;
8499f360eecSGleb Smirnoff struct epoch_tracker *tdwait;
8506573d758SMatt Macy struct thread *td;
8516573d758SMatt Macy
8527667824aSKyle Evans MPASS(epoch != NULL);
8537667824aSKyle Evans MPASS((epoch->e_flags & EPOCH_PREEMPT) != 0);
8546573d758SMatt Macy td = curthread;
8555757b59fSGleb Smirnoff if (THREAD_CAN_SLEEP())
8566573d758SMatt Macy return (0);
8576573d758SMatt Macy critical_enter();
858822e50e3SMatt Macy er = epoch_currecord(epoch);
8596573d758SMatt Macy TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
8606573d758SMatt Macy if (tdwait->et_td == td) {
8616573d758SMatt Macy critical_exit();
8626573d758SMatt Macy return (1);
8636573d758SMatt Macy }
8646573d758SMatt Macy #ifdef INVARIANTS
8656573d758SMatt Macy if (dump_onfail) {
8666573d758SMatt Macy MPASS(td->td_pinned);
8676573d758SMatt Macy printf("cpu: %d id: %d\n", curcpu, td->td_tid);
8686573d758SMatt Macy TAILQ_FOREACH(tdwait, &er->er_tdlist, et_link)
8696573d758SMatt Macy printf("td_tid: %d ", tdwait->et_td->td_tid);
8706573d758SMatt Macy printf("\n");
8716573d758SMatt Macy }
8726573d758SMatt Macy #endif
8736573d758SMatt Macy critical_exit();
8746573d758SMatt Macy return (0);
8756573d758SMatt Macy }
8766573d758SMatt Macy
8777667824aSKyle Evans #ifdef INVARIANTS
8787667824aSKyle Evans static void
epoch_assert_nocpu(epoch_t epoch,struct thread * td)8797667824aSKyle Evans epoch_assert_nocpu(epoch_t epoch, struct thread *td)
8807667824aSKyle Evans {
8817667824aSKyle Evans epoch_record_t er;
8827667824aSKyle Evans int cpu;
8837667824aSKyle Evans bool crit;
8847667824aSKyle Evans
8857667824aSKyle Evans crit = td->td_critnest > 0;
8867667824aSKyle Evans
8877667824aSKyle Evans /* Check for a critical section mishap. */
8887667824aSKyle Evans CPU_FOREACH(cpu) {
8897667824aSKyle Evans er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
8907667824aSKyle Evans KASSERT(er->er_td != td,
8917667824aSKyle Evans ("%s critical section in epoch '%s', from cpu %d",
8927667824aSKyle Evans (crit ? "exited" : "re-entered"), epoch->e_name, cpu));
8937667824aSKyle Evans }
8947667824aSKyle Evans }
8957667824aSKyle Evans #else
896f3316835SHans Petter Selasky #define epoch_assert_nocpu(e, td) do {} while (0)
8977667824aSKyle Evans #endif
8987667824aSKyle Evans
8997667824aSKyle Evans int
in_epoch_verbose(epoch_t epoch,int dump_onfail)9007667824aSKyle Evans in_epoch_verbose(epoch_t epoch, int dump_onfail)
9017667824aSKyle Evans {
9027667824aSKyle Evans epoch_record_t er;
9037667824aSKyle Evans struct thread *td;
9047667824aSKyle Evans
9057667824aSKyle Evans if (__predict_false((epoch) == NULL))
9067667824aSKyle Evans return (0);
9077667824aSKyle Evans if ((epoch->e_flags & EPOCH_PREEMPT) != 0)
9087667824aSKyle Evans return (in_epoch_verbose_preempt(epoch, dump_onfail));
9097667824aSKyle Evans
9107667824aSKyle Evans /*
9117667824aSKyle Evans * The thread being in a critical section is a necessary
9127667824aSKyle Evans * condition to be correctly inside a non-preemptible epoch,
9137667824aSKyle Evans * so it's definitely not in this epoch.
9147667824aSKyle Evans */
9157667824aSKyle Evans td = curthread;
9167667824aSKyle Evans if (td->td_critnest == 0) {
9177667824aSKyle Evans epoch_assert_nocpu(epoch, td);
9187667824aSKyle Evans return (0);
9197667824aSKyle Evans }
9207667824aSKyle Evans
9217667824aSKyle Evans /*
9227667824aSKyle Evans * The current cpu is in a critical section, so the epoch record will be
9237667824aSKyle Evans * stable for the rest of this function. Knowing that the record is not
9247667824aSKyle Evans * active is sufficient for knowing whether we're in this epoch or not,
9257667824aSKyle Evans * since it's a pcpu record.
9267667824aSKyle Evans */
9277667824aSKyle Evans er = epoch_currecord(epoch);
9287667824aSKyle Evans if (er->er_record.active == 0) {
9297667824aSKyle Evans epoch_assert_nocpu(epoch, td);
9307667824aSKyle Evans return (0);
9317667824aSKyle Evans }
9327667824aSKyle Evans
9337667824aSKyle Evans MPASS(er->er_td == td);
9347667824aSKyle Evans return (1);
9357667824aSKyle Evans }
9367667824aSKyle Evans
9376573d758SMatt Macy int
in_epoch(epoch_t epoch)9386573d758SMatt Macy in_epoch(epoch_t epoch)
9396573d758SMatt Macy {
9406573d758SMatt Macy return (in_epoch_verbose(epoch, 0));
9416573d758SMatt Macy }
942b79aa45eSGleb Smirnoff
943131b2b76SHans Petter Selasky static void
epoch_drain_cb(struct epoch_context * ctx)944131b2b76SHans Petter Selasky epoch_drain_cb(struct epoch_context *ctx)
945131b2b76SHans Petter Selasky {
946131b2b76SHans Petter Selasky struct epoch *epoch =
947131b2b76SHans Petter Selasky __containerof(ctx, struct epoch_record, er_drain_ctx)->er_parent;
948131b2b76SHans Petter Selasky
949131b2b76SHans Petter Selasky if (atomic_fetchadd_int(&epoch->e_drain_count, -1) == 1) {
950131b2b76SHans Petter Selasky mtx_lock(&epoch->e_drain_mtx);
951131b2b76SHans Petter Selasky wakeup(epoch);
952131b2b76SHans Petter Selasky mtx_unlock(&epoch->e_drain_mtx);
953131b2b76SHans Petter Selasky }
954131b2b76SHans Petter Selasky }
955131b2b76SHans Petter Selasky
956131b2b76SHans Petter Selasky void
epoch_drain_callbacks(epoch_t epoch)957131b2b76SHans Petter Selasky epoch_drain_callbacks(epoch_t epoch)
958131b2b76SHans Petter Selasky {
959131b2b76SHans Petter Selasky epoch_record_t er;
960131b2b76SHans Petter Selasky struct thread *td;
961131b2b76SHans Petter Selasky int was_bound;
962131b2b76SHans Petter Selasky int old_pinned;
963131b2b76SHans Petter Selasky int old_cpu;
964131b2b76SHans Petter Selasky int cpu;
965131b2b76SHans Petter Selasky
966131b2b76SHans Petter Selasky WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
967131b2b76SHans Petter Selasky "epoch_drain_callbacks() may sleep!");
968131b2b76SHans Petter Selasky
969131b2b76SHans Petter Selasky /* too early in boot to have epoch set up */
970131b2b76SHans Petter Selasky if (__predict_false(epoch == NULL))
971131b2b76SHans Petter Selasky return;
972131b2b76SHans Petter Selasky #if !defined(EARLY_AP_STARTUP)
973131b2b76SHans Petter Selasky if (__predict_false(inited < 2))
974131b2b76SHans Petter Selasky return;
975131b2b76SHans Petter Selasky #endif
976131b2b76SHans Petter Selasky DROP_GIANT();
977131b2b76SHans Petter Selasky
978131b2b76SHans Petter Selasky sx_xlock(&epoch->e_drain_sx);
979131b2b76SHans Petter Selasky mtx_lock(&epoch->e_drain_mtx);
980131b2b76SHans Petter Selasky
981131b2b76SHans Petter Selasky td = curthread;
982131b2b76SHans Petter Selasky thread_lock(td);
983131b2b76SHans Petter Selasky old_cpu = PCPU_GET(cpuid);
984131b2b76SHans Petter Selasky old_pinned = td->td_pinned;
985131b2b76SHans Petter Selasky was_bound = sched_is_bound(td);
986131b2b76SHans Petter Selasky sched_unbind(td);
987131b2b76SHans Petter Selasky td->td_pinned = 0;
988131b2b76SHans Petter Selasky
989131b2b76SHans Petter Selasky CPU_FOREACH(cpu)
990131b2b76SHans Petter Selasky epoch->e_drain_count++;
991131b2b76SHans Petter Selasky CPU_FOREACH(cpu) {
992131b2b76SHans Petter Selasky er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
993131b2b76SHans Petter Selasky sched_bind(td, cpu);
99466c6c556SGleb Smirnoff epoch_call(epoch, &epoch_drain_cb, &er->er_drain_ctx);
995131b2b76SHans Petter Selasky }
996131b2b76SHans Petter Selasky
997131b2b76SHans Petter Selasky /* restore CPU binding, if any */
998131b2b76SHans Petter Selasky if (was_bound != 0) {
999131b2b76SHans Petter Selasky sched_bind(td, old_cpu);
1000131b2b76SHans Petter Selasky } else {
1001131b2b76SHans Petter Selasky /* get thread back to initial CPU, if any */
1002131b2b76SHans Petter Selasky if (old_pinned != 0)
1003131b2b76SHans Petter Selasky sched_bind(td, old_cpu);
1004131b2b76SHans Petter Selasky sched_unbind(td);
1005131b2b76SHans Petter Selasky }
1006131b2b76SHans Petter Selasky /* restore pinned after bind */
1007131b2b76SHans Petter Selasky td->td_pinned = old_pinned;
1008131b2b76SHans Petter Selasky
1009131b2b76SHans Petter Selasky thread_unlock(td);
1010131b2b76SHans Petter Selasky
1011131b2b76SHans Petter Selasky while (epoch->e_drain_count != 0)
1012131b2b76SHans Petter Selasky msleep(epoch, &epoch->e_drain_mtx, PZERO, "EDRAIN", 0);
1013131b2b76SHans Petter Selasky
1014131b2b76SHans Petter Selasky mtx_unlock(&epoch->e_drain_mtx);
1015131b2b76SHans Petter Selasky sx_xunlock(&epoch->e_drain_sx);
1016131b2b76SHans Petter Selasky
1017131b2b76SHans Petter Selasky PICKUP_GIANT();
1018131b2b76SHans Petter Selasky }
1019