11fb62fb0SOlivier Houchard /*
21fb62fb0SOlivier Houchard * Copyright 2011-2015 Samy Al Bahra.
31fb62fb0SOlivier Houchard * All rights reserved.
41fb62fb0SOlivier Houchard *
51fb62fb0SOlivier Houchard * Redistribution and use in source and binary forms, with or without
61fb62fb0SOlivier Houchard * modification, are permitted provided that the following conditions
71fb62fb0SOlivier Houchard * are met:
81fb62fb0SOlivier Houchard * 1. Redistributions of source code must retain the above copyright
91fb62fb0SOlivier Houchard * notice, this list of conditions and the following disclaimer.
101fb62fb0SOlivier Houchard * 2. Redistributions in binary form must reproduce the above copyright
111fb62fb0SOlivier Houchard * notice, this list of conditions and the following disclaimer in the
121fb62fb0SOlivier Houchard * documentation and/or other materials provided with the distribution.
131fb62fb0SOlivier Houchard *
141fb62fb0SOlivier Houchard * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151fb62fb0SOlivier Houchard * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161fb62fb0SOlivier Houchard * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171fb62fb0SOlivier Houchard * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181fb62fb0SOlivier Houchard * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191fb62fb0SOlivier Houchard * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201fb62fb0SOlivier Houchard * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211fb62fb0SOlivier Houchard * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221fb62fb0SOlivier Houchard * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231fb62fb0SOlivier Houchard * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241fb62fb0SOlivier Houchard * SUCH DAMAGE.
251fb62fb0SOlivier Houchard */
261fb62fb0SOlivier Houchard
271fb62fb0SOlivier Houchard /*
281fb62fb0SOlivier Houchard * The implementation here is inspired from the work described in:
291fb62fb0SOlivier Houchard * Fraser, K. 2004. Practical Lock-Freedom. PhD Thesis, University
301fb62fb0SOlivier Houchard * of Cambridge Computing Laboratory.
311fb62fb0SOlivier Houchard */
321fb62fb0SOlivier Houchard
331fb62fb0SOlivier Houchard #include <ck_backoff.h>
341fb62fb0SOlivier Houchard #include <ck_cc.h>
351fb62fb0SOlivier Houchard #include <ck_epoch.h>
361fb62fb0SOlivier Houchard #include <ck_pr.h>
371fb62fb0SOlivier Houchard #include <ck_stack.h>
381fb62fb0SOlivier Houchard #include <ck_stdbool.h>
391fb62fb0SOlivier Houchard #include <ck_string.h>
401fb62fb0SOlivier Houchard
411fb62fb0SOlivier Houchard /*
421fb62fb0SOlivier Houchard * Only three distinct values are used for reclamation, but reclamation occurs
431fb62fb0SOlivier Houchard * at e+2 rather than e+1. Any thread in a "critical section" would have
441fb62fb0SOlivier Houchard * acquired some snapshot (e) of the global epoch value (e_g) and set an active
451fb62fb0SOlivier Houchard * flag. Any hazardous references will only occur after a full memory barrier.
461fb62fb0SOlivier Houchard * For example, assume an initial e_g value of 1, e value of 0 and active value
471fb62fb0SOlivier Houchard * of 0.
481fb62fb0SOlivier Houchard *
491fb62fb0SOlivier Houchard * ck_epoch_begin(...)
501fb62fb0SOlivier Houchard * e = e_g
511fb62fb0SOlivier Houchard * active = 1
521fb62fb0SOlivier Houchard * memory_barrier();
531fb62fb0SOlivier Houchard *
541fb62fb0SOlivier Houchard * Any serialized reads may observe e = 0 or e = 1 with active = 0, or e = 0 or
551fb62fb0SOlivier Houchard * e = 1 with active = 1. The e_g value can only go from 1 to 2 if every thread
561fb62fb0SOlivier Houchard * has already observed the value of "1" (or the value we are incrementing
571fb62fb0SOlivier Houchard * from). This guarantees us that for any given value e_g, any threads with-in
581fb62fb0SOlivier Houchard * critical sections (referred to as "active" threads from here on) would have
591fb62fb0SOlivier Houchard * an e value of e_g-1 or e_g. This also means that hazardous references may be
601fb62fb0SOlivier Houchard * shared in both e_g-1 and e_g even if they are logically deleted in e_g.
611fb62fb0SOlivier Houchard *
621fb62fb0SOlivier Houchard * For example, assume all threads have an e value of e_g. Another thread may
631fb62fb0SOlivier Houchard * increment to e_g to e_g+1. Older threads may have a reference to an object
641fb62fb0SOlivier Houchard * which is only deleted in e_g+1. It could be that reader threads are
651fb62fb0SOlivier Houchard * executing some hash table look-ups, while some other writer thread (which
661fb62fb0SOlivier Houchard * causes epoch counter tick) actually deletes the same items that reader
671fb62fb0SOlivier Houchard * threads are looking up (this writer thread having an e value of e_g+1).
681fb62fb0SOlivier Houchard * This is possible if the writer thread re-observes the epoch after the
691fb62fb0SOlivier Houchard * counter tick.
701fb62fb0SOlivier Houchard *
711fb62fb0SOlivier Houchard * Psuedo-code for writer:
721fb62fb0SOlivier Houchard * ck_epoch_begin()
731fb62fb0SOlivier Houchard * ht_delete(x)
741fb62fb0SOlivier Houchard * ck_epoch_end()
751fb62fb0SOlivier Houchard * ck_epoch_begin()
761fb62fb0SOlivier Houchard * ht_delete(x)
771fb62fb0SOlivier Houchard * ck_epoch_end()
781fb62fb0SOlivier Houchard *
791fb62fb0SOlivier Houchard * Psuedo-code for reader:
801fb62fb0SOlivier Houchard * for (;;) {
811fb62fb0SOlivier Houchard * x = ht_lookup(x)
821fb62fb0SOlivier Houchard * ck_pr_inc(&x->value);
831fb62fb0SOlivier Houchard * }
841fb62fb0SOlivier Houchard *
851fb62fb0SOlivier Houchard * Of course, it is also possible for references logically deleted at e_g-1 to
861fb62fb0SOlivier Houchard * still be accessed at e_g as threads are "active" at the same time
871fb62fb0SOlivier Houchard * (real-world time) mutating shared objects.
881fb62fb0SOlivier Houchard *
891fb62fb0SOlivier Houchard * Now, if the epoch counter is ticked to e_g+1, then no new hazardous
901fb62fb0SOlivier Houchard * references could exist to objects logically deleted at e_g-1. The reason for
911fb62fb0SOlivier Houchard * this is that at e_g+1, all epoch read-side critical sections started at
921fb62fb0SOlivier Houchard * e_g-1 must have been completed. If any epoch read-side critical sections at
931fb62fb0SOlivier Houchard * e_g-1 were still active, then we would never increment to e_g+1 (active != 0
941fb62fb0SOlivier Houchard * ^ e != e_g). Additionally, e_g may still have hazardous references to
951fb62fb0SOlivier Houchard * objects logically deleted at e_g-1 which means objects logically deleted at
961fb62fb0SOlivier Houchard * e_g-1 cannot be deleted at e_g+1 unless all threads have observed e_g+1
971fb62fb0SOlivier Houchard * (since it is valid for active threads to be at e_g and threads at e_g still
981fb62fb0SOlivier Houchard * require safe memory accesses).
991fb62fb0SOlivier Houchard *
1001fb62fb0SOlivier Houchard * However, at e_g+2, all active threads must be either at e_g+1 or e_g+2.
1011fb62fb0SOlivier Houchard * Though e_g+2 may share hazardous references with e_g+1, and e_g+1 shares
1021fb62fb0SOlivier Houchard * hazardous references to e_g, no active threads are at e_g or e_g-1. This
1031fb62fb0SOlivier Houchard * means no hazardous references could exist to objects deleted at e_g-1 (at
1041fb62fb0SOlivier Houchard * e_g+2).
1051fb62fb0SOlivier Houchard *
1061fb62fb0SOlivier Houchard * To summarize these important points,
1071fb62fb0SOlivier Houchard * 1) Active threads will always have a value of e_g or e_g-1.
1081fb62fb0SOlivier Houchard * 2) Items that are logically deleted e_g or e_g-1 cannot be physically
1091fb62fb0SOlivier Houchard * deleted.
1101fb62fb0SOlivier Houchard * 3) Objects logically deleted at e_g-1 can be physically destroyed at e_g+2
1111fb62fb0SOlivier Houchard * or at e_g+1 if no threads are at e_g.
1121fb62fb0SOlivier Houchard *
1131fb62fb0SOlivier Houchard * Last but not least, if we are at e_g+2, then no active thread is at e_g
1141fb62fb0SOlivier Houchard * which means it is safe to apply modulo-3 arithmetic to e_g value in order to
1151fb62fb0SOlivier Houchard * re-use e_g to represent the e_g+3 state. This means it is sufficient to
1161fb62fb0SOlivier Houchard * represent e_g using only the values 0, 1 or 2. Every time a thread re-visits
1171fb62fb0SOlivier Houchard * a e_g (which can be determined with a non-empty deferral list) it can assume
1181fb62fb0SOlivier Houchard * objects in the e_g deferral list involved at least three e_g transitions and
1191fb62fb0SOlivier Houchard * are thus, safe, for physical deletion.
1201fb62fb0SOlivier Houchard *
1211fb62fb0SOlivier Houchard * Blocking semantics for epoch reclamation have additional restrictions.
1221fb62fb0SOlivier Houchard * Though we only require three deferral lists, reasonable blocking semantics
1231fb62fb0SOlivier Houchard * must be able to more gracefully handle bursty write work-loads which could
1241fb62fb0SOlivier Houchard * easily cause e_g wrap-around if modulo-3 arithmetic is used. This allows for
1251fb62fb0SOlivier Houchard * easy-to-trigger live-lock situations. The work-around to this is to not
1261fb62fb0SOlivier Houchard * apply modulo arithmetic to e_g but only to deferral list indexing.
1271fb62fb0SOlivier Houchard */
1281fb62fb0SOlivier Houchard #define CK_EPOCH_GRACE 3U
1291fb62fb0SOlivier Houchard
130*8064dab6SJonathan T. Looney /*
131*8064dab6SJonathan T. Looney * CK_EPOCH_LENGTH must be a power-of-2 (because (CK_EPOCH_LENGTH - 1) is used
132*8064dab6SJonathan T. Looney * as a mask, and it must be at least 3 (see comments above).
133*8064dab6SJonathan T. Looney */
134*8064dab6SJonathan T. Looney #if (CK_EPOCH_LENGTH < 3 || (CK_EPOCH_LENGTH & (CK_EPOCH_LENGTH - 1)) != 0)
135*8064dab6SJonathan T. Looney #error "CK_EPOCH_LENGTH must be a power of 2 and >= 3"
136*8064dab6SJonathan T. Looney #endif
137*8064dab6SJonathan T. Looney
1381fb62fb0SOlivier Houchard enum {
1391fb62fb0SOlivier Houchard CK_EPOCH_STATE_USED = 0,
1401fb62fb0SOlivier Houchard CK_EPOCH_STATE_FREE = 1
1411fb62fb0SOlivier Houchard };
1421fb62fb0SOlivier Houchard
CK_STACK_CONTAINER(struct ck_epoch_record,record_next,ck_epoch_record_container)1431fb62fb0SOlivier Houchard CK_STACK_CONTAINER(struct ck_epoch_record, record_next,
1441fb62fb0SOlivier Houchard ck_epoch_record_container)
1451fb62fb0SOlivier Houchard CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
1461fb62fb0SOlivier Houchard ck_epoch_entry_container)
1471fb62fb0SOlivier Houchard
1481fb62fb0SOlivier Houchard #define CK_EPOCH_SENSE_MASK (CK_EPOCH_SENSE - 1)
1491fb62fb0SOlivier Houchard
1507e8cd4e1SOlivier Houchard bool
1511fb62fb0SOlivier Houchard _ck_epoch_delref(struct ck_epoch_record *record,
1521fb62fb0SOlivier Houchard struct ck_epoch_section *section)
1531fb62fb0SOlivier Houchard {
1541fb62fb0SOlivier Houchard struct ck_epoch_ref *current, *other;
1551fb62fb0SOlivier Houchard unsigned int i = section->bucket;
1561fb62fb0SOlivier Houchard
1571fb62fb0SOlivier Houchard current = &record->local.bucket[i];
1581fb62fb0SOlivier Houchard current->count--;
1591fb62fb0SOlivier Houchard
1601fb62fb0SOlivier Houchard if (current->count > 0)
1617e8cd4e1SOlivier Houchard return false;
1621fb62fb0SOlivier Houchard
1631fb62fb0SOlivier Houchard /*
1641fb62fb0SOlivier Houchard * If the current bucket no longer has any references, then
1651fb62fb0SOlivier Houchard * determine whether we have already transitioned into a newer
1661fb62fb0SOlivier Houchard * epoch. If so, then make sure to update our shared snapshot
1671fb62fb0SOlivier Houchard * to allow for forward progress.
1681fb62fb0SOlivier Houchard *
1691fb62fb0SOlivier Houchard * If no other active bucket exists, then the record will go
1701fb62fb0SOlivier Houchard * inactive in order to allow for forward progress.
1711fb62fb0SOlivier Houchard */
1727e8cd4e1SOlivier Houchard other = &record->local.bucket[(i + 1) & CK_EPOCH_SENSE_MASK];
1731fb62fb0SOlivier Houchard if (other->count > 0 &&
1741fb62fb0SOlivier Houchard ((int)(current->epoch - other->epoch) < 0)) {
1751fb62fb0SOlivier Houchard /*
1761fb62fb0SOlivier Houchard * The other epoch value is actually the newest,
1771fb62fb0SOlivier Houchard * transition to it.
1781fb62fb0SOlivier Houchard */
1791fb62fb0SOlivier Houchard ck_pr_store_uint(&record->epoch, other->epoch);
1801fb62fb0SOlivier Houchard }
1811fb62fb0SOlivier Houchard
1827e8cd4e1SOlivier Houchard return true;
1831fb62fb0SOlivier Houchard }
1841fb62fb0SOlivier Houchard
1851fb62fb0SOlivier Houchard void
_ck_epoch_addref(struct ck_epoch_record * record,struct ck_epoch_section * section)1861fb62fb0SOlivier Houchard _ck_epoch_addref(struct ck_epoch_record *record,
1871fb62fb0SOlivier Houchard struct ck_epoch_section *section)
1881fb62fb0SOlivier Houchard {
1891fb62fb0SOlivier Houchard struct ck_epoch *global = record->global;
1901fb62fb0SOlivier Houchard struct ck_epoch_ref *ref;
1911fb62fb0SOlivier Houchard unsigned int epoch, i;
1921fb62fb0SOlivier Houchard
1931fb62fb0SOlivier Houchard epoch = ck_pr_load_uint(&global->epoch);
1941fb62fb0SOlivier Houchard i = epoch & CK_EPOCH_SENSE_MASK;
1951fb62fb0SOlivier Houchard ref = &record->local.bucket[i];
1961fb62fb0SOlivier Houchard
1971fb62fb0SOlivier Houchard if (ref->count++ == 0) {
1981fb62fb0SOlivier Houchard #ifndef CK_MD_TSO
1991fb62fb0SOlivier Houchard struct ck_epoch_ref *previous;
2001fb62fb0SOlivier Houchard
2011fb62fb0SOlivier Houchard /*
2021fb62fb0SOlivier Houchard * The system has already ticked. If another non-zero bucket
2031fb62fb0SOlivier Houchard * exists, make sure to order our observations with respect
2041fb62fb0SOlivier Houchard * to it. Otherwise, it is possible to acquire a reference
2051fb62fb0SOlivier Houchard * from the previous epoch generation.
2061fb62fb0SOlivier Houchard *
2071fb62fb0SOlivier Houchard * On TSO architectures, the monoticity of the global counter
2081fb62fb0SOlivier Houchard * and load-{store, load} ordering are sufficient to guarantee
2091fb62fb0SOlivier Houchard * this ordering.
2101fb62fb0SOlivier Houchard */
2111fb62fb0SOlivier Houchard previous = &record->local.bucket[(i + 1) &
2121fb62fb0SOlivier Houchard CK_EPOCH_SENSE_MASK];
2131fb62fb0SOlivier Houchard if (previous->count > 0)
2141fb62fb0SOlivier Houchard ck_pr_fence_acqrel();
2151fb62fb0SOlivier Houchard #endif /* !CK_MD_TSO */
2161fb62fb0SOlivier Houchard
2171fb62fb0SOlivier Houchard /*
2181fb62fb0SOlivier Houchard * If this is this is a new reference into the current
2191fb62fb0SOlivier Houchard * bucket then cache the associated epoch value.
2201fb62fb0SOlivier Houchard */
2211fb62fb0SOlivier Houchard ref->epoch = epoch;
2221fb62fb0SOlivier Houchard }
2231fb62fb0SOlivier Houchard
2241fb62fb0SOlivier Houchard section->bucket = i;
2251fb62fb0SOlivier Houchard return;
2261fb62fb0SOlivier Houchard }
2271fb62fb0SOlivier Houchard
2281fb62fb0SOlivier Houchard void
ck_epoch_init(struct ck_epoch * global)2291fb62fb0SOlivier Houchard ck_epoch_init(struct ck_epoch *global)
2301fb62fb0SOlivier Houchard {
2311fb62fb0SOlivier Houchard
2321fb62fb0SOlivier Houchard ck_stack_init(&global->records);
2331fb62fb0SOlivier Houchard global->epoch = 1;
2341fb62fb0SOlivier Houchard global->n_free = 0;
2351fb62fb0SOlivier Houchard ck_pr_fence_store();
2361fb62fb0SOlivier Houchard return;
2371fb62fb0SOlivier Houchard }
2381fb62fb0SOlivier Houchard
2391fb62fb0SOlivier Houchard struct ck_epoch_record *
ck_epoch_recycle(struct ck_epoch * global,void * ct)2407e8cd4e1SOlivier Houchard ck_epoch_recycle(struct ck_epoch *global, void *ct)
2411fb62fb0SOlivier Houchard {
2421fb62fb0SOlivier Houchard struct ck_epoch_record *record;
2431fb62fb0SOlivier Houchard ck_stack_entry_t *cursor;
2441fb62fb0SOlivier Houchard unsigned int state;
2451fb62fb0SOlivier Houchard
2461fb62fb0SOlivier Houchard if (ck_pr_load_uint(&global->n_free) == 0)
2471fb62fb0SOlivier Houchard return NULL;
2481fb62fb0SOlivier Houchard
2491fb62fb0SOlivier Houchard CK_STACK_FOREACH(&global->records, cursor) {
2501fb62fb0SOlivier Houchard record = ck_epoch_record_container(cursor);
2511fb62fb0SOlivier Houchard
2521fb62fb0SOlivier Houchard if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) {
2531fb62fb0SOlivier Houchard /* Serialize with respect to deferral list clean-up. */
2541fb62fb0SOlivier Houchard ck_pr_fence_load();
2551fb62fb0SOlivier Houchard state = ck_pr_fas_uint(&record->state,
2561fb62fb0SOlivier Houchard CK_EPOCH_STATE_USED);
2571fb62fb0SOlivier Houchard if (state == CK_EPOCH_STATE_FREE) {
2581fb62fb0SOlivier Houchard ck_pr_dec_uint(&global->n_free);
2597e8cd4e1SOlivier Houchard ck_pr_store_ptr(&record->ct, ct);
2607e8cd4e1SOlivier Houchard
2617e8cd4e1SOlivier Houchard /*
2627e8cd4e1SOlivier Houchard * The context pointer is ordered by a
2637e8cd4e1SOlivier Houchard * subsequent protected section.
2647e8cd4e1SOlivier Houchard */
2651fb62fb0SOlivier Houchard return record;
2661fb62fb0SOlivier Houchard }
2671fb62fb0SOlivier Houchard }
2681fb62fb0SOlivier Houchard }
2691fb62fb0SOlivier Houchard
2701fb62fb0SOlivier Houchard return NULL;
2711fb62fb0SOlivier Houchard }
2721fb62fb0SOlivier Houchard
2731fb62fb0SOlivier Houchard void
ck_epoch_register(struct ck_epoch * global,struct ck_epoch_record * record,void * ct)2747e8cd4e1SOlivier Houchard ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record,
2757e8cd4e1SOlivier Houchard void *ct)
2761fb62fb0SOlivier Houchard {
2771fb62fb0SOlivier Houchard size_t i;
2781fb62fb0SOlivier Houchard
2791fb62fb0SOlivier Houchard record->global = global;
2801fb62fb0SOlivier Houchard record->state = CK_EPOCH_STATE_USED;
2811fb62fb0SOlivier Houchard record->active = 0;
2821fb62fb0SOlivier Houchard record->epoch = 0;
2831fb62fb0SOlivier Houchard record->n_dispatch = 0;
2841fb62fb0SOlivier Houchard record->n_peak = 0;
2851fb62fb0SOlivier Houchard record->n_pending = 0;
2867e8cd4e1SOlivier Houchard record->ct = ct;
2871fb62fb0SOlivier Houchard memset(&record->local, 0, sizeof record->local);
2881fb62fb0SOlivier Houchard
2891fb62fb0SOlivier Houchard for (i = 0; i < CK_EPOCH_LENGTH; i++)
2901fb62fb0SOlivier Houchard ck_stack_init(&record->pending[i]);
2911fb62fb0SOlivier Houchard
2921fb62fb0SOlivier Houchard ck_pr_fence_store();
2931fb62fb0SOlivier Houchard ck_stack_push_upmc(&global->records, &record->record_next);
2941fb62fb0SOlivier Houchard return;
2951fb62fb0SOlivier Houchard }
2961fb62fb0SOlivier Houchard
2971fb62fb0SOlivier Houchard void
ck_epoch_unregister(struct ck_epoch_record * record)2981fb62fb0SOlivier Houchard ck_epoch_unregister(struct ck_epoch_record *record)
2991fb62fb0SOlivier Houchard {
3001fb62fb0SOlivier Houchard struct ck_epoch *global = record->global;
3011fb62fb0SOlivier Houchard size_t i;
3021fb62fb0SOlivier Houchard
3031fb62fb0SOlivier Houchard record->active = 0;
3041fb62fb0SOlivier Houchard record->epoch = 0;
3051fb62fb0SOlivier Houchard record->n_dispatch = 0;
3061fb62fb0SOlivier Houchard record->n_peak = 0;
3071fb62fb0SOlivier Houchard record->n_pending = 0;
3081fb62fb0SOlivier Houchard memset(&record->local, 0, sizeof record->local);
3091fb62fb0SOlivier Houchard
3101fb62fb0SOlivier Houchard for (i = 0; i < CK_EPOCH_LENGTH; i++)
3111fb62fb0SOlivier Houchard ck_stack_init(&record->pending[i]);
3121fb62fb0SOlivier Houchard
3137e8cd4e1SOlivier Houchard ck_pr_store_ptr(&record->ct, NULL);
3141fb62fb0SOlivier Houchard ck_pr_fence_store();
3151fb62fb0SOlivier Houchard ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE);
3161fb62fb0SOlivier Houchard ck_pr_inc_uint(&global->n_free);
3171fb62fb0SOlivier Houchard return;
3181fb62fb0SOlivier Houchard }
3191fb62fb0SOlivier Houchard
3201fb62fb0SOlivier Houchard static struct ck_epoch_record *
ck_epoch_scan(struct ck_epoch * global,struct ck_epoch_record * cr,unsigned int epoch,bool * af)3211fb62fb0SOlivier Houchard ck_epoch_scan(struct ck_epoch *global,
3221fb62fb0SOlivier Houchard struct ck_epoch_record *cr,
3231fb62fb0SOlivier Houchard unsigned int epoch,
3241fb62fb0SOlivier Houchard bool *af)
3251fb62fb0SOlivier Houchard {
3261fb62fb0SOlivier Houchard ck_stack_entry_t *cursor;
3271fb62fb0SOlivier Houchard
3281fb62fb0SOlivier Houchard if (cr == NULL) {
3291fb62fb0SOlivier Houchard cursor = CK_STACK_FIRST(&global->records);
33008c22689SOlivier Houchard *af = false;
3311fb62fb0SOlivier Houchard } else {
3321fb62fb0SOlivier Houchard cursor = &cr->record_next;
33308c22689SOlivier Houchard *af = true;
3341fb62fb0SOlivier Houchard }
3351fb62fb0SOlivier Houchard
3361fb62fb0SOlivier Houchard while (cursor != NULL) {
3371fb62fb0SOlivier Houchard unsigned int state, active;
3381fb62fb0SOlivier Houchard
3391fb62fb0SOlivier Houchard cr = ck_epoch_record_container(cursor);
3401fb62fb0SOlivier Houchard
3411fb62fb0SOlivier Houchard state = ck_pr_load_uint(&cr->state);
3421fb62fb0SOlivier Houchard if (state & CK_EPOCH_STATE_FREE) {
3431fb62fb0SOlivier Houchard cursor = CK_STACK_NEXT(cursor);
3441fb62fb0SOlivier Houchard continue;
3451fb62fb0SOlivier Houchard }
3461fb62fb0SOlivier Houchard
3471fb62fb0SOlivier Houchard active = ck_pr_load_uint(&cr->active);
3481fb62fb0SOlivier Houchard *af |= active;
3491fb62fb0SOlivier Houchard
3501fb62fb0SOlivier Houchard if (active != 0 && ck_pr_load_uint(&cr->epoch) != epoch)
3511fb62fb0SOlivier Houchard return cr;
3521fb62fb0SOlivier Houchard
3531fb62fb0SOlivier Houchard cursor = CK_STACK_NEXT(cursor);
3541fb62fb0SOlivier Houchard }
3551fb62fb0SOlivier Houchard
3561fb62fb0SOlivier Houchard return NULL;
3571fb62fb0SOlivier Houchard }
3581fb62fb0SOlivier Houchard
359*8064dab6SJonathan T. Looney static unsigned int
ck_epoch_dispatch(struct ck_epoch_record * record,unsigned int e,ck_stack_t * deferred)3604c7d0d92SMatt Macy ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *deferred)
3611fb62fb0SOlivier Houchard {
3621fb62fb0SOlivier Houchard unsigned int epoch = e & (CK_EPOCH_LENGTH - 1);
3631fb62fb0SOlivier Houchard ck_stack_entry_t *head, *next, *cursor;
3647e8cd4e1SOlivier Houchard unsigned int n_pending, n_peak;
3651fb62fb0SOlivier Houchard unsigned int i = 0;
3661fb62fb0SOlivier Houchard
3677e8cd4e1SOlivier Houchard head = ck_stack_batch_pop_upmc(&record->pending[epoch]);
3681fb62fb0SOlivier Houchard for (cursor = head; cursor != NULL; cursor = next) {
3691fb62fb0SOlivier Houchard struct ck_epoch_entry *entry =
3701fb62fb0SOlivier Houchard ck_epoch_entry_container(cursor);
3711fb62fb0SOlivier Houchard
3721fb62fb0SOlivier Houchard next = CK_STACK_NEXT(cursor);
3734c7d0d92SMatt Macy if (deferred != NULL)
3744c7d0d92SMatt Macy ck_stack_push_spnc(deferred, &entry->stack_entry);
3754c7d0d92SMatt Macy else
3761fb62fb0SOlivier Houchard entry->function(entry);
377*8064dab6SJonathan T. Looney
3781fb62fb0SOlivier Houchard i++;
3791fb62fb0SOlivier Houchard }
3801fb62fb0SOlivier Houchard
3817e8cd4e1SOlivier Houchard n_peak = ck_pr_load_uint(&record->n_peak);
3827e8cd4e1SOlivier Houchard n_pending = ck_pr_load_uint(&record->n_pending);
3831fb62fb0SOlivier Houchard
3847e8cd4e1SOlivier Houchard /* We don't require accuracy around peak calculation. */
3857e8cd4e1SOlivier Houchard if (n_pending > n_peak)
3867e8cd4e1SOlivier Houchard ck_pr_store_uint(&record->n_peak, n_peak);
3877e8cd4e1SOlivier Houchard
3887e8cd4e1SOlivier Houchard if (i > 0) {
3897e8cd4e1SOlivier Houchard ck_pr_add_uint(&record->n_dispatch, i);
3907e8cd4e1SOlivier Houchard ck_pr_sub_uint(&record->n_pending, i);
3917e8cd4e1SOlivier Houchard }
3927e8cd4e1SOlivier Houchard
393*8064dab6SJonathan T. Looney return i;
3941fb62fb0SOlivier Houchard }
3951fb62fb0SOlivier Houchard
3961fb62fb0SOlivier Houchard /*
3971fb62fb0SOlivier Houchard * Reclaim all objects associated with a record.
3981fb62fb0SOlivier Houchard */
3991fb62fb0SOlivier Houchard void
ck_epoch_reclaim(struct ck_epoch_record * record)4001fb62fb0SOlivier Houchard ck_epoch_reclaim(struct ck_epoch_record *record)
4011fb62fb0SOlivier Houchard {
4021fb62fb0SOlivier Houchard unsigned int epoch;
4031fb62fb0SOlivier Houchard
4041fb62fb0SOlivier Houchard for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
4054c7d0d92SMatt Macy ck_epoch_dispatch(record, epoch, NULL);
4061fb62fb0SOlivier Houchard
4071fb62fb0SOlivier Houchard return;
4081fb62fb0SOlivier Houchard }
4091fb62fb0SOlivier Houchard
4107e8cd4e1SOlivier Houchard CK_CC_FORCE_INLINE static void
epoch_block(struct ck_epoch * global,struct ck_epoch_record * cr,ck_epoch_wait_cb_t * cb,void * ct)4117e8cd4e1SOlivier Houchard epoch_block(struct ck_epoch *global, struct ck_epoch_record *cr,
4127e8cd4e1SOlivier Houchard ck_epoch_wait_cb_t *cb, void *ct)
4137e8cd4e1SOlivier Houchard {
4147e8cd4e1SOlivier Houchard
4157e8cd4e1SOlivier Houchard if (cb != NULL)
4167e8cd4e1SOlivier Houchard cb(global, cr, ct);
4177e8cd4e1SOlivier Houchard
4187e8cd4e1SOlivier Houchard return;
4197e8cd4e1SOlivier Houchard }
4207e8cd4e1SOlivier Houchard
4211fb62fb0SOlivier Houchard /*
4221fb62fb0SOlivier Houchard * This function must not be called with-in read section.
4231fb62fb0SOlivier Houchard */
4241fb62fb0SOlivier Houchard void
ck_epoch_synchronize_wait(struct ck_epoch * global,ck_epoch_wait_cb_t * cb,void * ct)4257e8cd4e1SOlivier Houchard ck_epoch_synchronize_wait(struct ck_epoch *global,
4267e8cd4e1SOlivier Houchard ck_epoch_wait_cb_t *cb, void *ct)
4271fb62fb0SOlivier Houchard {
4281fb62fb0SOlivier Houchard struct ck_epoch_record *cr;
4291fb62fb0SOlivier Houchard unsigned int delta, epoch, goal, i;
4301fb62fb0SOlivier Houchard bool active;
4311fb62fb0SOlivier Houchard
4321fb62fb0SOlivier Houchard ck_pr_fence_memory();
4331fb62fb0SOlivier Houchard
4341fb62fb0SOlivier Houchard /*
4351fb62fb0SOlivier Houchard * The observation of the global epoch must be ordered with respect to
4361fb62fb0SOlivier Houchard * all prior operations. The re-ordering of loads is permitted given
4371fb62fb0SOlivier Houchard * monoticity of global epoch counter.
4381fb62fb0SOlivier Houchard *
4391fb62fb0SOlivier Houchard * If UINT_MAX concurrent mutations were to occur then it is possible
4401fb62fb0SOlivier Houchard * to encounter an ABA-issue. If this is a concern, consider tuning
4411fb62fb0SOlivier Houchard * write-side concurrency.
4421fb62fb0SOlivier Houchard */
4431fb62fb0SOlivier Houchard delta = epoch = ck_pr_load_uint(&global->epoch);
4441fb62fb0SOlivier Houchard goal = epoch + CK_EPOCH_GRACE;
4451fb62fb0SOlivier Houchard
4461fb62fb0SOlivier Houchard for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
4471fb62fb0SOlivier Houchard bool r;
4481fb62fb0SOlivier Houchard
4491fb62fb0SOlivier Houchard /*
4501fb62fb0SOlivier Houchard * Determine whether all threads have observed the current
4511fb62fb0SOlivier Houchard * epoch with respect to the updates on invocation.
4521fb62fb0SOlivier Houchard */
4531fb62fb0SOlivier Houchard while (cr = ck_epoch_scan(global, cr, delta, &active),
4541fb62fb0SOlivier Houchard cr != NULL) {
4551fb62fb0SOlivier Houchard unsigned int e_d;
4561fb62fb0SOlivier Houchard
4571fb62fb0SOlivier Houchard ck_pr_stall();
4581fb62fb0SOlivier Houchard
4591fb62fb0SOlivier Houchard /*
4601fb62fb0SOlivier Houchard * Another writer may have already observed a grace
4611fb62fb0SOlivier Houchard * period.
4621fb62fb0SOlivier Houchard */
4631fb62fb0SOlivier Houchard e_d = ck_pr_load_uint(&global->epoch);
4647e8cd4e1SOlivier Houchard if (e_d == delta) {
4657e8cd4e1SOlivier Houchard epoch_block(global, cr, cb, ct);
4667e8cd4e1SOlivier Houchard continue;
4671fb62fb0SOlivier Houchard }
4687e8cd4e1SOlivier Houchard
4697e8cd4e1SOlivier Houchard /*
4707e8cd4e1SOlivier Houchard * If the epoch has been updated, we may have already
4717e8cd4e1SOlivier Houchard * met our goal.
4727e8cd4e1SOlivier Houchard */
4737e8cd4e1SOlivier Houchard delta = e_d;
4747e8cd4e1SOlivier Houchard if ((goal > epoch) & (delta >= goal))
4757e8cd4e1SOlivier Houchard goto leave;
4767e8cd4e1SOlivier Houchard
4777e8cd4e1SOlivier Houchard epoch_block(global, cr, cb, ct);
4787e8cd4e1SOlivier Houchard
4797e8cd4e1SOlivier Houchard /*
4807e8cd4e1SOlivier Houchard * If the epoch has been updated, then a grace period
4817e8cd4e1SOlivier Houchard * requires that all threads are observed idle at the
4827e8cd4e1SOlivier Houchard * same epoch.
4837e8cd4e1SOlivier Houchard */
4847e8cd4e1SOlivier Houchard cr = NULL;
4851fb62fb0SOlivier Houchard }
4861fb62fb0SOlivier Houchard
4871fb62fb0SOlivier Houchard /*
4881fb62fb0SOlivier Houchard * If we have observed all threads as inactive, then we assume
4891fb62fb0SOlivier Houchard * we are at a grace period.
4901fb62fb0SOlivier Houchard */
4911fb62fb0SOlivier Houchard if (active == false)
4921fb62fb0SOlivier Houchard break;
4931fb62fb0SOlivier Houchard
4941fb62fb0SOlivier Houchard /*
4951fb62fb0SOlivier Houchard * Increment current epoch. CAS semantics are used to eliminate
4961fb62fb0SOlivier Houchard * increment operations for synchronization that occurs for the
4971fb62fb0SOlivier Houchard * same global epoch value snapshot.
4981fb62fb0SOlivier Houchard *
4991fb62fb0SOlivier Houchard * If we can guarantee there will only be one active barrier or
5001fb62fb0SOlivier Houchard * epoch tick at a given time, then it is sufficient to use an
5011fb62fb0SOlivier Houchard * increment operation. In a multi-barrier workload, however,
5021fb62fb0SOlivier Houchard * it is possible to overflow the epoch value if we apply
5031fb62fb0SOlivier Houchard * modulo-3 arithmetic.
5041fb62fb0SOlivier Houchard */
5051fb62fb0SOlivier Houchard r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
5061fb62fb0SOlivier Houchard &delta);
5071fb62fb0SOlivier Houchard
5081fb62fb0SOlivier Houchard /* Order subsequent thread active checks. */
5091fb62fb0SOlivier Houchard ck_pr_fence_atomic_load();
5101fb62fb0SOlivier Houchard
5111fb62fb0SOlivier Houchard /*
5121fb62fb0SOlivier Houchard * If CAS has succeeded, then set delta to latest snapshot.
5131fb62fb0SOlivier Houchard * Otherwise, we have just acquired latest snapshot.
5141fb62fb0SOlivier Houchard */
5151fb62fb0SOlivier Houchard delta = delta + r;
5161fb62fb0SOlivier Houchard }
5171fb62fb0SOlivier Houchard
5181fb62fb0SOlivier Houchard /*
5191fb62fb0SOlivier Houchard * A majority of use-cases will not require full barrier semantics.
5201fb62fb0SOlivier Houchard * However, if non-temporal instructions are used, full barrier
5211fb62fb0SOlivier Houchard * semantics are necessary.
5221fb62fb0SOlivier Houchard */
5237e8cd4e1SOlivier Houchard leave:
5241fb62fb0SOlivier Houchard ck_pr_fence_memory();
5257e8cd4e1SOlivier Houchard return;
5267e8cd4e1SOlivier Houchard }
5277e8cd4e1SOlivier Houchard
5287e8cd4e1SOlivier Houchard void
ck_epoch_synchronize(struct ck_epoch_record * record)5297e8cd4e1SOlivier Houchard ck_epoch_synchronize(struct ck_epoch_record *record)
5307e8cd4e1SOlivier Houchard {
5317e8cd4e1SOlivier Houchard
5327e8cd4e1SOlivier Houchard ck_epoch_synchronize_wait(record->global, NULL, NULL);
5331fb62fb0SOlivier Houchard return;
5341fb62fb0SOlivier Houchard }
5351fb62fb0SOlivier Houchard
5361fb62fb0SOlivier Houchard void
ck_epoch_barrier(struct ck_epoch_record * record)5371fb62fb0SOlivier Houchard ck_epoch_barrier(struct ck_epoch_record *record)
5381fb62fb0SOlivier Houchard {
5391fb62fb0SOlivier Houchard
5401fb62fb0SOlivier Houchard ck_epoch_synchronize(record);
5411fb62fb0SOlivier Houchard ck_epoch_reclaim(record);
5421fb62fb0SOlivier Houchard return;
5431fb62fb0SOlivier Houchard }
5441fb62fb0SOlivier Houchard
5457e8cd4e1SOlivier Houchard void
ck_epoch_barrier_wait(struct ck_epoch_record * record,ck_epoch_wait_cb_t * cb,void * ct)5467e8cd4e1SOlivier Houchard ck_epoch_barrier_wait(struct ck_epoch_record *record, ck_epoch_wait_cb_t *cb,
5477e8cd4e1SOlivier Houchard void *ct)
5487e8cd4e1SOlivier Houchard {
5497e8cd4e1SOlivier Houchard
5507e8cd4e1SOlivier Houchard ck_epoch_synchronize_wait(record->global, cb, ct);
5517e8cd4e1SOlivier Houchard ck_epoch_reclaim(record);
5527e8cd4e1SOlivier Houchard return;
5537e8cd4e1SOlivier Houchard }
5547e8cd4e1SOlivier Houchard
5551fb62fb0SOlivier Houchard /*
5561fb62fb0SOlivier Houchard * It may be worth it to actually apply these deferral semantics to an epoch
5571fb62fb0SOlivier Houchard * that was observed at ck_epoch_call time. The problem is that the latter
5581fb62fb0SOlivier Houchard * would require a full fence.
5591fb62fb0SOlivier Houchard *
5601fb62fb0SOlivier Houchard * ck_epoch_call will dispatch to the latest epoch snapshot that was observed.
5611fb62fb0SOlivier Houchard * There are cases where it will fail to reclaim as early as it could. If this
5621fb62fb0SOlivier Houchard * becomes a problem, we could actually use a heap for epoch buckets but that
5631fb62fb0SOlivier Houchard * is far from ideal too.
5641fb62fb0SOlivier Houchard */
5651fb62fb0SOlivier Houchard bool
ck_epoch_poll_deferred(struct ck_epoch_record * record,ck_stack_t * deferred)5664c7d0d92SMatt Macy ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred)
5671fb62fb0SOlivier Houchard {
5681fb62fb0SOlivier Houchard bool active;
5691fb62fb0SOlivier Houchard unsigned int epoch;
5701fb62fb0SOlivier Houchard struct ck_epoch_record *cr = NULL;
5711fb62fb0SOlivier Houchard struct ck_epoch *global = record->global;
572*8064dab6SJonathan T. Looney unsigned int n_dispatch;
5731fb62fb0SOlivier Houchard
5741fb62fb0SOlivier Houchard epoch = ck_pr_load_uint(&global->epoch);
5751fb62fb0SOlivier Houchard
5761fb62fb0SOlivier Houchard /* Serialize epoch snapshots with respect to global epoch. */
5771fb62fb0SOlivier Houchard ck_pr_fence_memory();
578*8064dab6SJonathan T. Looney
579*8064dab6SJonathan T. Looney /*
580*8064dab6SJonathan T. Looney * At this point, epoch is the current global epoch value.
581*8064dab6SJonathan T. Looney * There may or may not be active threads which observed epoch - 1.
582*8064dab6SJonathan T. Looney * (ck_epoch_scan() will tell us that). However, there should be
583*8064dab6SJonathan T. Looney * no active threads which observed epoch - 2.
584*8064dab6SJonathan T. Looney *
585*8064dab6SJonathan T. Looney * Note that checking epoch - 2 is necessary, as race conditions can
586*8064dab6SJonathan T. Looney * allow another thread to increment the global epoch before this
587*8064dab6SJonathan T. Looney * thread runs.
588*8064dab6SJonathan T. Looney */
589*8064dab6SJonathan T. Looney n_dispatch = ck_epoch_dispatch(record, epoch - 2, deferred);
590*8064dab6SJonathan T. Looney
5911fb62fb0SOlivier Houchard cr = ck_epoch_scan(global, cr, epoch, &active);
592*8064dab6SJonathan T. Looney if (cr != NULL)
593*8064dab6SJonathan T. Looney return (n_dispatch > 0);
5941fb62fb0SOlivier Houchard
5951fb62fb0SOlivier Houchard /* We are at a grace period if all threads are inactive. */
5961fb62fb0SOlivier Houchard if (active == false) {
5971fb62fb0SOlivier Houchard record->epoch = epoch;
5981fb62fb0SOlivier Houchard for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
5994c7d0d92SMatt Macy ck_epoch_dispatch(record, epoch, deferred);
6001fb62fb0SOlivier Houchard
6011fb62fb0SOlivier Houchard return true;
6021fb62fb0SOlivier Houchard }
6031fb62fb0SOlivier Houchard
604*8064dab6SJonathan T. Looney /*
605*8064dab6SJonathan T. Looney * If an active thread exists, rely on epoch observation.
606*8064dab6SJonathan T. Looney *
607*8064dab6SJonathan T. Looney * All the active threads entered the epoch section during
608*8064dab6SJonathan T. Looney * the current epoch. Therefore, we can now run the handlers
609*8064dab6SJonathan T. Looney * for the immediately preceding epoch and attempt to
610*8064dab6SJonathan T. Looney * advance the epoch if it hasn't been already.
611*8064dab6SJonathan T. Looney */
6127e8cd4e1SOlivier Houchard (void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1);
6131fb62fb0SOlivier Houchard
614*8064dab6SJonathan T. Looney ck_epoch_dispatch(record, epoch - 1, deferred);
6151fb62fb0SOlivier Houchard return true;
6161fb62fb0SOlivier Houchard }
6174c7d0d92SMatt Macy
6184c7d0d92SMatt Macy bool
ck_epoch_poll(struct ck_epoch_record * record)6194c7d0d92SMatt Macy ck_epoch_poll(struct ck_epoch_record *record)
6204c7d0d92SMatt Macy {
6214c7d0d92SMatt Macy
6224c7d0d92SMatt Macy return ck_epoch_poll_deferred(record, NULL);
6234c7d0d92SMatt Macy }
624