xref: /freebsd-src/sys/contrib/ck/src/ck_epoch.c (revision c6879c6c14eedbd060ba588a3129a6c60ebbe783)
11fb62fb0SOlivier Houchard /*
21fb62fb0SOlivier Houchard  * Copyright 2011-2015 Samy Al Bahra.
31fb62fb0SOlivier Houchard  * All rights reserved.
41fb62fb0SOlivier Houchard  *
51fb62fb0SOlivier Houchard  * Redistribution and use in source and binary forms, with or without
61fb62fb0SOlivier Houchard  * modification, are permitted provided that the following conditions
71fb62fb0SOlivier Houchard  * are met:
81fb62fb0SOlivier Houchard  * 1. Redistributions of source code must retain the above copyright
91fb62fb0SOlivier Houchard  *    notice, this list of conditions and the following disclaimer.
101fb62fb0SOlivier Houchard  * 2. Redistributions in binary form must reproduce the above copyright
111fb62fb0SOlivier Houchard  *    notice, this list of conditions and the following disclaimer in the
121fb62fb0SOlivier Houchard  *    documentation and/or other materials provided with the distribution.
131fb62fb0SOlivier Houchard  *
141fb62fb0SOlivier Houchard  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151fb62fb0SOlivier Houchard  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161fb62fb0SOlivier Houchard  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171fb62fb0SOlivier Houchard  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181fb62fb0SOlivier Houchard  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191fb62fb0SOlivier Houchard  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201fb62fb0SOlivier Houchard  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211fb62fb0SOlivier Houchard  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221fb62fb0SOlivier Houchard  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231fb62fb0SOlivier Houchard  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241fb62fb0SOlivier Houchard  * SUCH DAMAGE.
251fb62fb0SOlivier Houchard  */
261fb62fb0SOlivier Houchard 
271fb62fb0SOlivier Houchard /*
281fb62fb0SOlivier Houchard  * The implementation here is inspired from the work described in:
291fb62fb0SOlivier Houchard  *   Fraser, K. 2004. Practical Lock-Freedom. PhD Thesis, University
301fb62fb0SOlivier Houchard  *   of Cambridge Computing Laboratory.
311fb62fb0SOlivier Houchard  */
321fb62fb0SOlivier Houchard 
331fb62fb0SOlivier Houchard #include <ck_backoff.h>
341fb62fb0SOlivier Houchard #include <ck_cc.h>
351fb62fb0SOlivier Houchard #include <ck_epoch.h>
361fb62fb0SOlivier Houchard #include <ck_pr.h>
371fb62fb0SOlivier Houchard #include <ck_stack.h>
381fb62fb0SOlivier Houchard #include <ck_stdbool.h>
391fb62fb0SOlivier Houchard #include <ck_string.h>
401fb62fb0SOlivier Houchard 
411fb62fb0SOlivier Houchard /*
421fb62fb0SOlivier Houchard  * Only three distinct values are used for reclamation, but reclamation occurs
431fb62fb0SOlivier Houchard  * at e+2 rather than e+1. Any thread in a "critical section" would have
441fb62fb0SOlivier Houchard  * acquired some snapshot (e) of the global epoch value (e_g) and set an active
451fb62fb0SOlivier Houchard  * flag. Any hazardous references will only occur after a full memory barrier.
461fb62fb0SOlivier Houchard  * For example, assume an initial e_g value of 1, e value of 0 and active value
471fb62fb0SOlivier Houchard  * of 0.
481fb62fb0SOlivier Houchard  *
491fb62fb0SOlivier Houchard  * ck_epoch_begin(...)
501fb62fb0SOlivier Houchard  *   e = e_g
511fb62fb0SOlivier Houchard  *   active = 1
521fb62fb0SOlivier Houchard  *   memory_barrier();
531fb62fb0SOlivier Houchard  *
541fb62fb0SOlivier Houchard  * Any serialized reads may observe e = 0 or e = 1 with active = 0, or e = 0 or
551fb62fb0SOlivier Houchard  * e = 1 with active = 1. The e_g value can only go from 1 to 2 if every thread
561fb62fb0SOlivier Houchard  * has already observed the value of "1" (or the value we are incrementing
571fb62fb0SOlivier Houchard  * from). This guarantees us that for any given value e_g, any threads with-in
581fb62fb0SOlivier Houchard  * critical sections (referred to as "active" threads from here on) would have
591fb62fb0SOlivier Houchard  * an e value of e_g-1 or e_g. This also means that hazardous references may be
601fb62fb0SOlivier Houchard  * shared in both e_g-1 and e_g even if they are logically deleted in e_g.
611fb62fb0SOlivier Houchard  *
621fb62fb0SOlivier Houchard  * For example, assume all threads have an e value of e_g. Another thread may
631fb62fb0SOlivier Houchard  * increment to e_g to e_g+1. Older threads may have a reference to an object
641fb62fb0SOlivier Houchard  * which is only deleted in e_g+1. It could be that reader threads are
651fb62fb0SOlivier Houchard  * executing some hash table look-ups, while some other writer thread (which
661fb62fb0SOlivier Houchard  * causes epoch counter tick) actually deletes the same items that reader
671fb62fb0SOlivier Houchard  * threads are looking up (this writer thread having an e value of e_g+1).
681fb62fb0SOlivier Houchard  * This is possible if the writer thread re-observes the epoch after the
691fb62fb0SOlivier Houchard  * counter tick.
701fb62fb0SOlivier Houchard  *
711fb62fb0SOlivier Houchard  * Psuedo-code for writer:
721fb62fb0SOlivier Houchard  *   ck_epoch_begin()
731fb62fb0SOlivier Houchard  *   ht_delete(x)
741fb62fb0SOlivier Houchard  *   ck_epoch_end()
751fb62fb0SOlivier Houchard  *   ck_epoch_begin()
761fb62fb0SOlivier Houchard  *   ht_delete(x)
771fb62fb0SOlivier Houchard  *   ck_epoch_end()
781fb62fb0SOlivier Houchard  *
791fb62fb0SOlivier Houchard  * Psuedo-code for reader:
801fb62fb0SOlivier Houchard  *   for (;;) {
811fb62fb0SOlivier Houchard  *      x = ht_lookup(x)
821fb62fb0SOlivier Houchard  *      ck_pr_inc(&x->value);
831fb62fb0SOlivier Houchard  *   }
841fb62fb0SOlivier Houchard  *
851fb62fb0SOlivier Houchard  * Of course, it is also possible for references logically deleted at e_g-1 to
861fb62fb0SOlivier Houchard  * still be accessed at e_g as threads are "active" at the same time
871fb62fb0SOlivier Houchard  * (real-world time) mutating shared objects.
881fb62fb0SOlivier Houchard  *
891fb62fb0SOlivier Houchard  * Now, if the epoch counter is ticked to e_g+1, then no new hazardous
901fb62fb0SOlivier Houchard  * references could exist to objects logically deleted at e_g-1. The reason for
911fb62fb0SOlivier Houchard  * this is that at e_g+1, all epoch read-side critical sections started at
921fb62fb0SOlivier Houchard  * e_g-1 must have been completed. If any epoch read-side critical sections at
931fb62fb0SOlivier Houchard  * e_g-1 were still active, then we would never increment to e_g+1 (active != 0
941fb62fb0SOlivier Houchard  * ^ e != e_g).  Additionally, e_g may still have hazardous references to
951fb62fb0SOlivier Houchard  * objects logically deleted at e_g-1 which means objects logically deleted at
961fb62fb0SOlivier Houchard  * e_g-1 cannot be deleted at e_g+1 unless all threads have observed e_g+1
971fb62fb0SOlivier Houchard  * (since it is valid for active threads to be at e_g and threads at e_g still
981fb62fb0SOlivier Houchard  * require safe memory accesses).
991fb62fb0SOlivier Houchard  *
1001fb62fb0SOlivier Houchard  * However, at e_g+2, all active threads must be either at e_g+1 or e_g+2.
1011fb62fb0SOlivier Houchard  * Though e_g+2 may share hazardous references with e_g+1, and e_g+1 shares
1021fb62fb0SOlivier Houchard  * hazardous references to e_g, no active threads are at e_g or e_g-1. This
1031fb62fb0SOlivier Houchard  * means no hazardous references could exist to objects deleted at e_g-1 (at
1041fb62fb0SOlivier Houchard  * e_g+2).
1051fb62fb0SOlivier Houchard  *
1061fb62fb0SOlivier Houchard  * To summarize these important points,
1071fb62fb0SOlivier Houchard  *   1) Active threads will always have a value of e_g or e_g-1.
1081fb62fb0SOlivier Houchard  *   2) Items that are logically deleted e_g or e_g-1 cannot be physically
1091fb62fb0SOlivier Houchard  *      deleted.
1101fb62fb0SOlivier Houchard  *   3) Objects logically deleted at e_g-1 can be physically destroyed at e_g+2
1111fb62fb0SOlivier Houchard  *      or at e_g+1 if no threads are at e_g.
1121fb62fb0SOlivier Houchard  *
1131fb62fb0SOlivier Houchard  * Last but not least, if we are at e_g+2, then no active thread is at e_g
1141fb62fb0SOlivier Houchard  * which means it is safe to apply modulo-3 arithmetic to e_g value in order to
1151fb62fb0SOlivier Houchard  * re-use e_g to represent the e_g+3 state. This means it is sufficient to
1161fb62fb0SOlivier Houchard  * represent e_g using only the values 0, 1 or 2. Every time a thread re-visits
1171fb62fb0SOlivier Houchard  * a e_g (which can be determined with a non-empty deferral list) it can assume
1181fb62fb0SOlivier Houchard  * objects in the e_g deferral list involved at least three e_g transitions and
1191fb62fb0SOlivier Houchard  * are thus, safe, for physical deletion.
1201fb62fb0SOlivier Houchard  *
1211fb62fb0SOlivier Houchard  * Blocking semantics for epoch reclamation have additional restrictions.
1221fb62fb0SOlivier Houchard  * Though we only require three deferral lists, reasonable blocking semantics
1231fb62fb0SOlivier Houchard  * must be able to more gracefully handle bursty write work-loads which could
1241fb62fb0SOlivier Houchard  * easily cause e_g wrap-around if modulo-3 arithmetic is used. This allows for
1251fb62fb0SOlivier Houchard  * easy-to-trigger live-lock situations. The work-around to this is to not
1261fb62fb0SOlivier Houchard  * apply modulo arithmetic to e_g but only to deferral list indexing.
1271fb62fb0SOlivier Houchard  */
1281fb62fb0SOlivier Houchard #define CK_EPOCH_GRACE 3U
1291fb62fb0SOlivier Houchard 
130*8064dab6SJonathan T. Looney /*
131*8064dab6SJonathan T. Looney  * CK_EPOCH_LENGTH must be a power-of-2 (because (CK_EPOCH_LENGTH - 1) is used
132*8064dab6SJonathan T. Looney  * as a mask, and it must be at least 3 (see comments above).
133*8064dab6SJonathan T. Looney  */
134*8064dab6SJonathan T. Looney #if (CK_EPOCH_LENGTH < 3 || (CK_EPOCH_LENGTH & (CK_EPOCH_LENGTH - 1)) != 0)
135*8064dab6SJonathan T. Looney #error "CK_EPOCH_LENGTH must be a power of 2 and >= 3"
136*8064dab6SJonathan T. Looney #endif
137*8064dab6SJonathan T. Looney 
1381fb62fb0SOlivier Houchard enum {
1391fb62fb0SOlivier Houchard 	CK_EPOCH_STATE_USED = 0,
1401fb62fb0SOlivier Houchard 	CK_EPOCH_STATE_FREE = 1
1411fb62fb0SOlivier Houchard };
1421fb62fb0SOlivier Houchard 
CK_STACK_CONTAINER(struct ck_epoch_record,record_next,ck_epoch_record_container)1431fb62fb0SOlivier Houchard CK_STACK_CONTAINER(struct ck_epoch_record, record_next,
1441fb62fb0SOlivier Houchard     ck_epoch_record_container)
1451fb62fb0SOlivier Houchard CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
1461fb62fb0SOlivier Houchard     ck_epoch_entry_container)
1471fb62fb0SOlivier Houchard 
1481fb62fb0SOlivier Houchard #define CK_EPOCH_SENSE_MASK	(CK_EPOCH_SENSE - 1)
1491fb62fb0SOlivier Houchard 
1507e8cd4e1SOlivier Houchard bool
1511fb62fb0SOlivier Houchard _ck_epoch_delref(struct ck_epoch_record *record,
1521fb62fb0SOlivier Houchard     struct ck_epoch_section *section)
1531fb62fb0SOlivier Houchard {
1541fb62fb0SOlivier Houchard 	struct ck_epoch_ref *current, *other;
1551fb62fb0SOlivier Houchard 	unsigned int i = section->bucket;
1561fb62fb0SOlivier Houchard 
1571fb62fb0SOlivier Houchard 	current = &record->local.bucket[i];
1581fb62fb0SOlivier Houchard 	current->count--;
1591fb62fb0SOlivier Houchard 
1601fb62fb0SOlivier Houchard 	if (current->count > 0)
1617e8cd4e1SOlivier Houchard 		return false;
1621fb62fb0SOlivier Houchard 
1631fb62fb0SOlivier Houchard 	/*
1641fb62fb0SOlivier Houchard 	 * If the current bucket no longer has any references, then
1651fb62fb0SOlivier Houchard 	 * determine whether we have already transitioned into a newer
1661fb62fb0SOlivier Houchard 	 * epoch. If so, then make sure to update our shared snapshot
1671fb62fb0SOlivier Houchard 	 * to allow for forward progress.
1681fb62fb0SOlivier Houchard 	 *
1691fb62fb0SOlivier Houchard 	 * If no other active bucket exists, then the record will go
1701fb62fb0SOlivier Houchard 	 * inactive in order to allow for forward progress.
1711fb62fb0SOlivier Houchard 	 */
1727e8cd4e1SOlivier Houchard 	other = &record->local.bucket[(i + 1) & CK_EPOCH_SENSE_MASK];
1731fb62fb0SOlivier Houchard 	if (other->count > 0 &&
1741fb62fb0SOlivier Houchard 	    ((int)(current->epoch - other->epoch) < 0)) {
1751fb62fb0SOlivier Houchard 		/*
1761fb62fb0SOlivier Houchard 		 * The other epoch value is actually the newest,
1771fb62fb0SOlivier Houchard 		 * transition to it.
1781fb62fb0SOlivier Houchard 		 */
1791fb62fb0SOlivier Houchard 		ck_pr_store_uint(&record->epoch, other->epoch);
1801fb62fb0SOlivier Houchard 	}
1811fb62fb0SOlivier Houchard 
1827e8cd4e1SOlivier Houchard 	return true;
1831fb62fb0SOlivier Houchard }
1841fb62fb0SOlivier Houchard 
1851fb62fb0SOlivier Houchard void
_ck_epoch_addref(struct ck_epoch_record * record,struct ck_epoch_section * section)1861fb62fb0SOlivier Houchard _ck_epoch_addref(struct ck_epoch_record *record,
1871fb62fb0SOlivier Houchard     struct ck_epoch_section *section)
1881fb62fb0SOlivier Houchard {
1891fb62fb0SOlivier Houchard 	struct ck_epoch *global = record->global;
1901fb62fb0SOlivier Houchard 	struct ck_epoch_ref *ref;
1911fb62fb0SOlivier Houchard 	unsigned int epoch, i;
1921fb62fb0SOlivier Houchard 
1931fb62fb0SOlivier Houchard 	epoch = ck_pr_load_uint(&global->epoch);
1941fb62fb0SOlivier Houchard 	i = epoch & CK_EPOCH_SENSE_MASK;
1951fb62fb0SOlivier Houchard 	ref = &record->local.bucket[i];
1961fb62fb0SOlivier Houchard 
1971fb62fb0SOlivier Houchard 	if (ref->count++ == 0) {
1981fb62fb0SOlivier Houchard #ifndef CK_MD_TSO
1991fb62fb0SOlivier Houchard 		struct ck_epoch_ref *previous;
2001fb62fb0SOlivier Houchard 
2011fb62fb0SOlivier Houchard 		/*
2021fb62fb0SOlivier Houchard 		 * The system has already ticked. If another non-zero bucket
2031fb62fb0SOlivier Houchard 		 * exists, make sure to order our observations with respect
2041fb62fb0SOlivier Houchard 		 * to it. Otherwise, it is possible to acquire a reference
2051fb62fb0SOlivier Houchard 		 * from the previous epoch generation.
2061fb62fb0SOlivier Houchard 		 *
2071fb62fb0SOlivier Houchard 		 * On TSO architectures, the monoticity of the global counter
2081fb62fb0SOlivier Houchard 		 * and load-{store, load} ordering are sufficient to guarantee
2091fb62fb0SOlivier Houchard 		 * this ordering.
2101fb62fb0SOlivier Houchard 		 */
2111fb62fb0SOlivier Houchard 		previous = &record->local.bucket[(i + 1) &
2121fb62fb0SOlivier Houchard 		    CK_EPOCH_SENSE_MASK];
2131fb62fb0SOlivier Houchard 		if (previous->count > 0)
2141fb62fb0SOlivier Houchard 			ck_pr_fence_acqrel();
2151fb62fb0SOlivier Houchard #endif /* !CK_MD_TSO */
2161fb62fb0SOlivier Houchard 
2171fb62fb0SOlivier Houchard 		/*
2181fb62fb0SOlivier Houchard 		 * If this is this is a new reference into the current
2191fb62fb0SOlivier Houchard 		 * bucket then cache the associated epoch value.
2201fb62fb0SOlivier Houchard 		 */
2211fb62fb0SOlivier Houchard 		ref->epoch = epoch;
2221fb62fb0SOlivier Houchard 	}
2231fb62fb0SOlivier Houchard 
2241fb62fb0SOlivier Houchard 	section->bucket = i;
2251fb62fb0SOlivier Houchard 	return;
2261fb62fb0SOlivier Houchard }
2271fb62fb0SOlivier Houchard 
2281fb62fb0SOlivier Houchard void
ck_epoch_init(struct ck_epoch * global)2291fb62fb0SOlivier Houchard ck_epoch_init(struct ck_epoch *global)
2301fb62fb0SOlivier Houchard {
2311fb62fb0SOlivier Houchard 
2321fb62fb0SOlivier Houchard 	ck_stack_init(&global->records);
2331fb62fb0SOlivier Houchard 	global->epoch = 1;
2341fb62fb0SOlivier Houchard 	global->n_free = 0;
2351fb62fb0SOlivier Houchard 	ck_pr_fence_store();
2361fb62fb0SOlivier Houchard 	return;
2371fb62fb0SOlivier Houchard }
2381fb62fb0SOlivier Houchard 
2391fb62fb0SOlivier Houchard struct ck_epoch_record *
ck_epoch_recycle(struct ck_epoch * global,void * ct)2407e8cd4e1SOlivier Houchard ck_epoch_recycle(struct ck_epoch *global, void *ct)
2411fb62fb0SOlivier Houchard {
2421fb62fb0SOlivier Houchard 	struct ck_epoch_record *record;
2431fb62fb0SOlivier Houchard 	ck_stack_entry_t *cursor;
2441fb62fb0SOlivier Houchard 	unsigned int state;
2451fb62fb0SOlivier Houchard 
2461fb62fb0SOlivier Houchard 	if (ck_pr_load_uint(&global->n_free) == 0)
2471fb62fb0SOlivier Houchard 		return NULL;
2481fb62fb0SOlivier Houchard 
2491fb62fb0SOlivier Houchard 	CK_STACK_FOREACH(&global->records, cursor) {
2501fb62fb0SOlivier Houchard 		record = ck_epoch_record_container(cursor);
2511fb62fb0SOlivier Houchard 
2521fb62fb0SOlivier Houchard 		if (ck_pr_load_uint(&record->state) == CK_EPOCH_STATE_FREE) {
2531fb62fb0SOlivier Houchard 			/* Serialize with respect to deferral list clean-up. */
2541fb62fb0SOlivier Houchard 			ck_pr_fence_load();
2551fb62fb0SOlivier Houchard 			state = ck_pr_fas_uint(&record->state,
2561fb62fb0SOlivier Houchard 			    CK_EPOCH_STATE_USED);
2571fb62fb0SOlivier Houchard 			if (state == CK_EPOCH_STATE_FREE) {
2581fb62fb0SOlivier Houchard 				ck_pr_dec_uint(&global->n_free);
2597e8cd4e1SOlivier Houchard 				ck_pr_store_ptr(&record->ct, ct);
2607e8cd4e1SOlivier Houchard 
2617e8cd4e1SOlivier Houchard 				/*
2627e8cd4e1SOlivier Houchard 				 * The context pointer is ordered by a
2637e8cd4e1SOlivier Houchard 				 * subsequent protected section.
2647e8cd4e1SOlivier Houchard 				 */
2651fb62fb0SOlivier Houchard 				return record;
2661fb62fb0SOlivier Houchard 			}
2671fb62fb0SOlivier Houchard 		}
2681fb62fb0SOlivier Houchard 	}
2691fb62fb0SOlivier Houchard 
2701fb62fb0SOlivier Houchard 	return NULL;
2711fb62fb0SOlivier Houchard }
2721fb62fb0SOlivier Houchard 
2731fb62fb0SOlivier Houchard void
ck_epoch_register(struct ck_epoch * global,struct ck_epoch_record * record,void * ct)2747e8cd4e1SOlivier Houchard ck_epoch_register(struct ck_epoch *global, struct ck_epoch_record *record,
2757e8cd4e1SOlivier Houchard     void *ct)
2761fb62fb0SOlivier Houchard {
2771fb62fb0SOlivier Houchard 	size_t i;
2781fb62fb0SOlivier Houchard 
2791fb62fb0SOlivier Houchard 	record->global = global;
2801fb62fb0SOlivier Houchard 	record->state = CK_EPOCH_STATE_USED;
2811fb62fb0SOlivier Houchard 	record->active = 0;
2821fb62fb0SOlivier Houchard 	record->epoch = 0;
2831fb62fb0SOlivier Houchard 	record->n_dispatch = 0;
2841fb62fb0SOlivier Houchard 	record->n_peak = 0;
2851fb62fb0SOlivier Houchard 	record->n_pending = 0;
2867e8cd4e1SOlivier Houchard 	record->ct = ct;
2871fb62fb0SOlivier Houchard 	memset(&record->local, 0, sizeof record->local);
2881fb62fb0SOlivier Houchard 
2891fb62fb0SOlivier Houchard 	for (i = 0; i < CK_EPOCH_LENGTH; i++)
2901fb62fb0SOlivier Houchard 		ck_stack_init(&record->pending[i]);
2911fb62fb0SOlivier Houchard 
2921fb62fb0SOlivier Houchard 	ck_pr_fence_store();
2931fb62fb0SOlivier Houchard 	ck_stack_push_upmc(&global->records, &record->record_next);
2941fb62fb0SOlivier Houchard 	return;
2951fb62fb0SOlivier Houchard }
2961fb62fb0SOlivier Houchard 
2971fb62fb0SOlivier Houchard void
ck_epoch_unregister(struct ck_epoch_record * record)2981fb62fb0SOlivier Houchard ck_epoch_unregister(struct ck_epoch_record *record)
2991fb62fb0SOlivier Houchard {
3001fb62fb0SOlivier Houchard 	struct ck_epoch *global = record->global;
3011fb62fb0SOlivier Houchard 	size_t i;
3021fb62fb0SOlivier Houchard 
3031fb62fb0SOlivier Houchard 	record->active = 0;
3041fb62fb0SOlivier Houchard 	record->epoch = 0;
3051fb62fb0SOlivier Houchard 	record->n_dispatch = 0;
3061fb62fb0SOlivier Houchard 	record->n_peak = 0;
3071fb62fb0SOlivier Houchard 	record->n_pending = 0;
3081fb62fb0SOlivier Houchard 	memset(&record->local, 0, sizeof record->local);
3091fb62fb0SOlivier Houchard 
3101fb62fb0SOlivier Houchard 	for (i = 0; i < CK_EPOCH_LENGTH; i++)
3111fb62fb0SOlivier Houchard 		ck_stack_init(&record->pending[i]);
3121fb62fb0SOlivier Houchard 
3137e8cd4e1SOlivier Houchard 	ck_pr_store_ptr(&record->ct, NULL);
3141fb62fb0SOlivier Houchard 	ck_pr_fence_store();
3151fb62fb0SOlivier Houchard 	ck_pr_store_uint(&record->state, CK_EPOCH_STATE_FREE);
3161fb62fb0SOlivier Houchard 	ck_pr_inc_uint(&global->n_free);
3171fb62fb0SOlivier Houchard 	return;
3181fb62fb0SOlivier Houchard }
3191fb62fb0SOlivier Houchard 
3201fb62fb0SOlivier Houchard static struct ck_epoch_record *
ck_epoch_scan(struct ck_epoch * global,struct ck_epoch_record * cr,unsigned int epoch,bool * af)3211fb62fb0SOlivier Houchard ck_epoch_scan(struct ck_epoch *global,
3221fb62fb0SOlivier Houchard     struct ck_epoch_record *cr,
3231fb62fb0SOlivier Houchard     unsigned int epoch,
3241fb62fb0SOlivier Houchard     bool *af)
3251fb62fb0SOlivier Houchard {
3261fb62fb0SOlivier Houchard 	ck_stack_entry_t *cursor;
3271fb62fb0SOlivier Houchard 
3281fb62fb0SOlivier Houchard 	if (cr == NULL) {
3291fb62fb0SOlivier Houchard 		cursor = CK_STACK_FIRST(&global->records);
33008c22689SOlivier Houchard 		*af = false;
3311fb62fb0SOlivier Houchard 	} else {
3321fb62fb0SOlivier Houchard 		cursor = &cr->record_next;
33308c22689SOlivier Houchard 		*af = true;
3341fb62fb0SOlivier Houchard 	}
3351fb62fb0SOlivier Houchard 
3361fb62fb0SOlivier Houchard 	while (cursor != NULL) {
3371fb62fb0SOlivier Houchard 		unsigned int state, active;
3381fb62fb0SOlivier Houchard 
3391fb62fb0SOlivier Houchard 		cr = ck_epoch_record_container(cursor);
3401fb62fb0SOlivier Houchard 
3411fb62fb0SOlivier Houchard 		state = ck_pr_load_uint(&cr->state);
3421fb62fb0SOlivier Houchard 		if (state & CK_EPOCH_STATE_FREE) {
3431fb62fb0SOlivier Houchard 			cursor = CK_STACK_NEXT(cursor);
3441fb62fb0SOlivier Houchard 			continue;
3451fb62fb0SOlivier Houchard 		}
3461fb62fb0SOlivier Houchard 
3471fb62fb0SOlivier Houchard 		active = ck_pr_load_uint(&cr->active);
3481fb62fb0SOlivier Houchard 		*af |= active;
3491fb62fb0SOlivier Houchard 
3501fb62fb0SOlivier Houchard 		if (active != 0 && ck_pr_load_uint(&cr->epoch) != epoch)
3511fb62fb0SOlivier Houchard 			return cr;
3521fb62fb0SOlivier Houchard 
3531fb62fb0SOlivier Houchard 		cursor = CK_STACK_NEXT(cursor);
3541fb62fb0SOlivier Houchard 	}
3551fb62fb0SOlivier Houchard 
3561fb62fb0SOlivier Houchard 	return NULL;
3571fb62fb0SOlivier Houchard }
3581fb62fb0SOlivier Houchard 
359*8064dab6SJonathan T. Looney static unsigned int
ck_epoch_dispatch(struct ck_epoch_record * record,unsigned int e,ck_stack_t * deferred)3604c7d0d92SMatt Macy ck_epoch_dispatch(struct ck_epoch_record *record, unsigned int e, ck_stack_t *deferred)
3611fb62fb0SOlivier Houchard {
3621fb62fb0SOlivier Houchard 	unsigned int epoch = e & (CK_EPOCH_LENGTH - 1);
3631fb62fb0SOlivier Houchard 	ck_stack_entry_t *head, *next, *cursor;
3647e8cd4e1SOlivier Houchard 	unsigned int n_pending, n_peak;
3651fb62fb0SOlivier Houchard 	unsigned int i = 0;
3661fb62fb0SOlivier Houchard 
3677e8cd4e1SOlivier Houchard 	head = ck_stack_batch_pop_upmc(&record->pending[epoch]);
3681fb62fb0SOlivier Houchard 	for (cursor = head; cursor != NULL; cursor = next) {
3691fb62fb0SOlivier Houchard 		struct ck_epoch_entry *entry =
3701fb62fb0SOlivier Houchard 		    ck_epoch_entry_container(cursor);
3711fb62fb0SOlivier Houchard 
3721fb62fb0SOlivier Houchard 		next = CK_STACK_NEXT(cursor);
3734c7d0d92SMatt Macy 		if (deferred != NULL)
3744c7d0d92SMatt Macy 			ck_stack_push_spnc(deferred, &entry->stack_entry);
3754c7d0d92SMatt Macy 		else
3761fb62fb0SOlivier Houchard 			entry->function(entry);
377*8064dab6SJonathan T. Looney 
3781fb62fb0SOlivier Houchard 		i++;
3791fb62fb0SOlivier Houchard 	}
3801fb62fb0SOlivier Houchard 
3817e8cd4e1SOlivier Houchard 	n_peak = ck_pr_load_uint(&record->n_peak);
3827e8cd4e1SOlivier Houchard 	n_pending = ck_pr_load_uint(&record->n_pending);
3831fb62fb0SOlivier Houchard 
3847e8cd4e1SOlivier Houchard 	/* We don't require accuracy around peak calculation. */
3857e8cd4e1SOlivier Houchard 	if (n_pending > n_peak)
3867e8cd4e1SOlivier Houchard 		ck_pr_store_uint(&record->n_peak, n_peak);
3877e8cd4e1SOlivier Houchard 
3887e8cd4e1SOlivier Houchard 	if (i > 0) {
3897e8cd4e1SOlivier Houchard 		ck_pr_add_uint(&record->n_dispatch, i);
3907e8cd4e1SOlivier Houchard 		ck_pr_sub_uint(&record->n_pending, i);
3917e8cd4e1SOlivier Houchard 	}
3927e8cd4e1SOlivier Houchard 
393*8064dab6SJonathan T. Looney 	return i;
3941fb62fb0SOlivier Houchard }
3951fb62fb0SOlivier Houchard 
3961fb62fb0SOlivier Houchard /*
3971fb62fb0SOlivier Houchard  * Reclaim all objects associated with a record.
3981fb62fb0SOlivier Houchard  */
3991fb62fb0SOlivier Houchard void
ck_epoch_reclaim(struct ck_epoch_record * record)4001fb62fb0SOlivier Houchard ck_epoch_reclaim(struct ck_epoch_record *record)
4011fb62fb0SOlivier Houchard {
4021fb62fb0SOlivier Houchard 	unsigned int epoch;
4031fb62fb0SOlivier Houchard 
4041fb62fb0SOlivier Houchard 	for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
4054c7d0d92SMatt Macy 		ck_epoch_dispatch(record, epoch, NULL);
4061fb62fb0SOlivier Houchard 
4071fb62fb0SOlivier Houchard 	return;
4081fb62fb0SOlivier Houchard }
4091fb62fb0SOlivier Houchard 
4107e8cd4e1SOlivier Houchard CK_CC_FORCE_INLINE static void
epoch_block(struct ck_epoch * global,struct ck_epoch_record * cr,ck_epoch_wait_cb_t * cb,void * ct)4117e8cd4e1SOlivier Houchard epoch_block(struct ck_epoch *global, struct ck_epoch_record *cr,
4127e8cd4e1SOlivier Houchard     ck_epoch_wait_cb_t *cb, void *ct)
4137e8cd4e1SOlivier Houchard {
4147e8cd4e1SOlivier Houchard 
4157e8cd4e1SOlivier Houchard 	if (cb != NULL)
4167e8cd4e1SOlivier Houchard 		cb(global, cr, ct);
4177e8cd4e1SOlivier Houchard 
4187e8cd4e1SOlivier Houchard 	return;
4197e8cd4e1SOlivier Houchard }
4207e8cd4e1SOlivier Houchard 
4211fb62fb0SOlivier Houchard /*
4221fb62fb0SOlivier Houchard  * This function must not be called with-in read section.
4231fb62fb0SOlivier Houchard  */
4241fb62fb0SOlivier Houchard void
ck_epoch_synchronize_wait(struct ck_epoch * global,ck_epoch_wait_cb_t * cb,void * ct)4257e8cd4e1SOlivier Houchard ck_epoch_synchronize_wait(struct ck_epoch *global,
4267e8cd4e1SOlivier Houchard     ck_epoch_wait_cb_t *cb, void *ct)
4271fb62fb0SOlivier Houchard {
4281fb62fb0SOlivier Houchard 	struct ck_epoch_record *cr;
4291fb62fb0SOlivier Houchard 	unsigned int delta, epoch, goal, i;
4301fb62fb0SOlivier Houchard 	bool active;
4311fb62fb0SOlivier Houchard 
4321fb62fb0SOlivier Houchard 	ck_pr_fence_memory();
4331fb62fb0SOlivier Houchard 
4341fb62fb0SOlivier Houchard 	/*
4351fb62fb0SOlivier Houchard 	 * The observation of the global epoch must be ordered with respect to
4361fb62fb0SOlivier Houchard 	 * all prior operations. The re-ordering of loads is permitted given
4371fb62fb0SOlivier Houchard 	 * monoticity of global epoch counter.
4381fb62fb0SOlivier Houchard 	 *
4391fb62fb0SOlivier Houchard 	 * If UINT_MAX concurrent mutations were to occur then it is possible
4401fb62fb0SOlivier Houchard 	 * to encounter an ABA-issue. If this is a concern, consider tuning
4411fb62fb0SOlivier Houchard 	 * write-side concurrency.
4421fb62fb0SOlivier Houchard 	 */
4431fb62fb0SOlivier Houchard 	delta = epoch = ck_pr_load_uint(&global->epoch);
4441fb62fb0SOlivier Houchard 	goal = epoch + CK_EPOCH_GRACE;
4451fb62fb0SOlivier Houchard 
4461fb62fb0SOlivier Houchard 	for (i = 0, cr = NULL; i < CK_EPOCH_GRACE - 1; cr = NULL, i++) {
4471fb62fb0SOlivier Houchard 		bool r;
4481fb62fb0SOlivier Houchard 
4491fb62fb0SOlivier Houchard 		/*
4501fb62fb0SOlivier Houchard 		 * Determine whether all threads have observed the current
4511fb62fb0SOlivier Houchard 		 * epoch with respect to the updates on invocation.
4521fb62fb0SOlivier Houchard 		 */
4531fb62fb0SOlivier Houchard 		while (cr = ck_epoch_scan(global, cr, delta, &active),
4541fb62fb0SOlivier Houchard 		    cr != NULL) {
4551fb62fb0SOlivier Houchard 			unsigned int e_d;
4561fb62fb0SOlivier Houchard 
4571fb62fb0SOlivier Houchard 			ck_pr_stall();
4581fb62fb0SOlivier Houchard 
4591fb62fb0SOlivier Houchard 			/*
4601fb62fb0SOlivier Houchard 			 * Another writer may have already observed a grace
4611fb62fb0SOlivier Houchard 			 * period.
4621fb62fb0SOlivier Houchard 			 */
4631fb62fb0SOlivier Houchard 			e_d = ck_pr_load_uint(&global->epoch);
4647e8cd4e1SOlivier Houchard 			if (e_d == delta) {
4657e8cd4e1SOlivier Houchard 				epoch_block(global, cr, cb, ct);
4667e8cd4e1SOlivier Houchard 				continue;
4671fb62fb0SOlivier Houchard 			}
4687e8cd4e1SOlivier Houchard 
4697e8cd4e1SOlivier Houchard 			/*
4707e8cd4e1SOlivier Houchard 			 * If the epoch has been updated, we may have already
4717e8cd4e1SOlivier Houchard 			 * met our goal.
4727e8cd4e1SOlivier Houchard 			 */
4737e8cd4e1SOlivier Houchard 			delta = e_d;
4747e8cd4e1SOlivier Houchard 			if ((goal > epoch) & (delta >= goal))
4757e8cd4e1SOlivier Houchard 				goto leave;
4767e8cd4e1SOlivier Houchard 
4777e8cd4e1SOlivier Houchard 			epoch_block(global, cr, cb, ct);
4787e8cd4e1SOlivier Houchard 
4797e8cd4e1SOlivier Houchard 			/*
4807e8cd4e1SOlivier Houchard 			 * If the epoch has been updated, then a grace period
4817e8cd4e1SOlivier Houchard 			 * requires that all threads are observed idle at the
4827e8cd4e1SOlivier Houchard 			 * same epoch.
4837e8cd4e1SOlivier Houchard 			 */
4847e8cd4e1SOlivier Houchard 			cr = NULL;
4851fb62fb0SOlivier Houchard 		}
4861fb62fb0SOlivier Houchard 
4871fb62fb0SOlivier Houchard 		/*
4881fb62fb0SOlivier Houchard 		 * If we have observed all threads as inactive, then we assume
4891fb62fb0SOlivier Houchard 		 * we are at a grace period.
4901fb62fb0SOlivier Houchard 		 */
4911fb62fb0SOlivier Houchard 		if (active == false)
4921fb62fb0SOlivier Houchard 			break;
4931fb62fb0SOlivier Houchard 
4941fb62fb0SOlivier Houchard 		/*
4951fb62fb0SOlivier Houchard 		 * Increment current epoch. CAS semantics are used to eliminate
4961fb62fb0SOlivier Houchard 		 * increment operations for synchronization that occurs for the
4971fb62fb0SOlivier Houchard 		 * same global epoch value snapshot.
4981fb62fb0SOlivier Houchard 		 *
4991fb62fb0SOlivier Houchard 		 * If we can guarantee there will only be one active barrier or
5001fb62fb0SOlivier Houchard 		 * epoch tick at a given time, then it is sufficient to use an
5011fb62fb0SOlivier Houchard 		 * increment operation. In a multi-barrier workload, however,
5021fb62fb0SOlivier Houchard 		 * it is possible to overflow the epoch value if we apply
5031fb62fb0SOlivier Houchard 		 * modulo-3 arithmetic.
5041fb62fb0SOlivier Houchard 		 */
5051fb62fb0SOlivier Houchard 		r = ck_pr_cas_uint_value(&global->epoch, delta, delta + 1,
5061fb62fb0SOlivier Houchard 		    &delta);
5071fb62fb0SOlivier Houchard 
5081fb62fb0SOlivier Houchard 		/* Order subsequent thread active checks. */
5091fb62fb0SOlivier Houchard 		ck_pr_fence_atomic_load();
5101fb62fb0SOlivier Houchard 
5111fb62fb0SOlivier Houchard 		/*
5121fb62fb0SOlivier Houchard 		 * If CAS has succeeded, then set delta to latest snapshot.
5131fb62fb0SOlivier Houchard 		 * Otherwise, we have just acquired latest snapshot.
5141fb62fb0SOlivier Houchard 		 */
5151fb62fb0SOlivier Houchard 		delta = delta + r;
5161fb62fb0SOlivier Houchard 	}
5171fb62fb0SOlivier Houchard 
5181fb62fb0SOlivier Houchard 	/*
5191fb62fb0SOlivier Houchard 	 * A majority of use-cases will not require full barrier semantics.
5201fb62fb0SOlivier Houchard 	 * However, if non-temporal instructions are used, full barrier
5211fb62fb0SOlivier Houchard 	 * semantics are necessary.
5221fb62fb0SOlivier Houchard 	 */
5237e8cd4e1SOlivier Houchard leave:
5241fb62fb0SOlivier Houchard 	ck_pr_fence_memory();
5257e8cd4e1SOlivier Houchard 	return;
5267e8cd4e1SOlivier Houchard }
5277e8cd4e1SOlivier Houchard 
5287e8cd4e1SOlivier Houchard void
ck_epoch_synchronize(struct ck_epoch_record * record)5297e8cd4e1SOlivier Houchard ck_epoch_synchronize(struct ck_epoch_record *record)
5307e8cd4e1SOlivier Houchard {
5317e8cd4e1SOlivier Houchard 
5327e8cd4e1SOlivier Houchard 	ck_epoch_synchronize_wait(record->global, NULL, NULL);
5331fb62fb0SOlivier Houchard 	return;
5341fb62fb0SOlivier Houchard }
5351fb62fb0SOlivier Houchard 
5361fb62fb0SOlivier Houchard void
ck_epoch_barrier(struct ck_epoch_record * record)5371fb62fb0SOlivier Houchard ck_epoch_barrier(struct ck_epoch_record *record)
5381fb62fb0SOlivier Houchard {
5391fb62fb0SOlivier Houchard 
5401fb62fb0SOlivier Houchard 	ck_epoch_synchronize(record);
5411fb62fb0SOlivier Houchard 	ck_epoch_reclaim(record);
5421fb62fb0SOlivier Houchard 	return;
5431fb62fb0SOlivier Houchard }
5441fb62fb0SOlivier Houchard 
5457e8cd4e1SOlivier Houchard void
ck_epoch_barrier_wait(struct ck_epoch_record * record,ck_epoch_wait_cb_t * cb,void * ct)5467e8cd4e1SOlivier Houchard ck_epoch_barrier_wait(struct ck_epoch_record *record, ck_epoch_wait_cb_t *cb,
5477e8cd4e1SOlivier Houchard     void *ct)
5487e8cd4e1SOlivier Houchard {
5497e8cd4e1SOlivier Houchard 
5507e8cd4e1SOlivier Houchard 	ck_epoch_synchronize_wait(record->global, cb, ct);
5517e8cd4e1SOlivier Houchard 	ck_epoch_reclaim(record);
5527e8cd4e1SOlivier Houchard 	return;
5537e8cd4e1SOlivier Houchard }
5547e8cd4e1SOlivier Houchard 
5551fb62fb0SOlivier Houchard /*
5561fb62fb0SOlivier Houchard  * It may be worth it to actually apply these deferral semantics to an epoch
5571fb62fb0SOlivier Houchard  * that was observed at ck_epoch_call time. The problem is that the latter
5581fb62fb0SOlivier Houchard  * would require a full fence.
5591fb62fb0SOlivier Houchard  *
5601fb62fb0SOlivier Houchard  * ck_epoch_call will dispatch to the latest epoch snapshot that was observed.
5611fb62fb0SOlivier Houchard  * There are cases where it will fail to reclaim as early as it could. If this
5621fb62fb0SOlivier Houchard  * becomes a problem, we could actually use a heap for epoch buckets but that
5631fb62fb0SOlivier Houchard  * is far from ideal too.
5641fb62fb0SOlivier Houchard  */
5651fb62fb0SOlivier Houchard bool
ck_epoch_poll_deferred(struct ck_epoch_record * record,ck_stack_t * deferred)5664c7d0d92SMatt Macy ck_epoch_poll_deferred(struct ck_epoch_record *record, ck_stack_t *deferred)
5671fb62fb0SOlivier Houchard {
5681fb62fb0SOlivier Houchard 	bool active;
5691fb62fb0SOlivier Houchard 	unsigned int epoch;
5701fb62fb0SOlivier Houchard 	struct ck_epoch_record *cr = NULL;
5711fb62fb0SOlivier Houchard 	struct ck_epoch *global = record->global;
572*8064dab6SJonathan T. Looney 	unsigned int n_dispatch;
5731fb62fb0SOlivier Houchard 
5741fb62fb0SOlivier Houchard 	epoch = ck_pr_load_uint(&global->epoch);
5751fb62fb0SOlivier Houchard 
5761fb62fb0SOlivier Houchard 	/* Serialize epoch snapshots with respect to global epoch. */
5771fb62fb0SOlivier Houchard 	ck_pr_fence_memory();
578*8064dab6SJonathan T. Looney 
579*8064dab6SJonathan T. Looney 	/*
580*8064dab6SJonathan T. Looney 	 * At this point, epoch is the current global epoch value.
581*8064dab6SJonathan T. Looney 	 * There may or may not be active threads which observed epoch - 1.
582*8064dab6SJonathan T. Looney 	 * (ck_epoch_scan() will tell us that). However, there should be
583*8064dab6SJonathan T. Looney 	 * no active threads which observed epoch - 2.
584*8064dab6SJonathan T. Looney 	 *
585*8064dab6SJonathan T. Looney 	 * Note that checking epoch - 2 is necessary, as race conditions can
586*8064dab6SJonathan T. Looney 	 * allow another thread to increment the global epoch before this
587*8064dab6SJonathan T. Looney 	 * thread runs.
588*8064dab6SJonathan T. Looney 	 */
589*8064dab6SJonathan T. Looney 	n_dispatch = ck_epoch_dispatch(record, epoch - 2, deferred);
590*8064dab6SJonathan T. Looney 
5911fb62fb0SOlivier Houchard 	cr = ck_epoch_scan(global, cr, epoch, &active);
592*8064dab6SJonathan T. Looney 	if (cr != NULL)
593*8064dab6SJonathan T. Looney 		return (n_dispatch > 0);
5941fb62fb0SOlivier Houchard 
5951fb62fb0SOlivier Houchard 	/* We are at a grace period if all threads are inactive. */
5961fb62fb0SOlivier Houchard 	if (active == false) {
5971fb62fb0SOlivier Houchard 		record->epoch = epoch;
5981fb62fb0SOlivier Houchard 		for (epoch = 0; epoch < CK_EPOCH_LENGTH; epoch++)
5994c7d0d92SMatt Macy 			ck_epoch_dispatch(record, epoch, deferred);
6001fb62fb0SOlivier Houchard 
6011fb62fb0SOlivier Houchard 		return true;
6021fb62fb0SOlivier Houchard 	}
6031fb62fb0SOlivier Houchard 
604*8064dab6SJonathan T. Looney 	/*
605*8064dab6SJonathan T. Looney 	 * If an active thread exists, rely on epoch observation.
606*8064dab6SJonathan T. Looney 	 *
607*8064dab6SJonathan T. Looney 	 * All the active threads entered the epoch section during
608*8064dab6SJonathan T. Looney 	 * the current epoch. Therefore, we can now run the handlers
609*8064dab6SJonathan T. Looney 	 * for the immediately preceding epoch and attempt to
610*8064dab6SJonathan T. Looney 	 * advance the epoch if it hasn't been already.
611*8064dab6SJonathan T. Looney 	 */
6127e8cd4e1SOlivier Houchard 	(void)ck_pr_cas_uint(&global->epoch, epoch, epoch + 1);
6131fb62fb0SOlivier Houchard 
614*8064dab6SJonathan T. Looney 	ck_epoch_dispatch(record, epoch - 1, deferred);
6151fb62fb0SOlivier Houchard 	return true;
6161fb62fb0SOlivier Houchard }
6174c7d0d92SMatt Macy 
6184c7d0d92SMatt Macy bool
ck_epoch_poll(struct ck_epoch_record * record)6194c7d0d92SMatt Macy ck_epoch_poll(struct ck_epoch_record *record)
6204c7d0d92SMatt Macy {
6214c7d0d92SMatt Macy 
6224c7d0d92SMatt Macy 	return ck_epoch_poll_deferred(record, NULL);
6234c7d0d92SMatt Macy }
624