1 /* $NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $ */
2
3 /* epoch.c - epoch based memory reclamation */
4 /* $OpenLDAP$ */
5 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6 *
7 * Copyright 2018-2021 The OpenLDAP Foundation.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 */
18
19 /** @file epoch.c
20 *
21 * Implementation of epoch based memory reclamation, in principle
22 * similar to the algorithm presented in
23 * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
24 *
25 * Not completely lock-free at the moment.
26 *
27 * Also the problems with epoch based memory reclamation are still
28 * present - a thread actively observing an epoch getting stuck will
29 * prevent managed objects (in our case connections and operations)
30 * from being freed, potentially running out of memory.
31 */
32
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $");
35
36 #include "portable.h"
37
38 #include "lload.h"
39 #include <epoch.h>
40
41 /* Has to be >= 3 */
42 #define EPOCH_MASK ( 1 << 2 )
43 #define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
44 #define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )
45
46 struct pending_ref {
47 void *object;
48 dispose_cb *dispose;
49 struct pending_ref *next;
50 };
51
52 ldap_pvt_thread_rdwr_t epoch_mutex;
53
54 static epoch_t current_epoch;
55 static uintptr_t epoch_threads[EPOCH_MASK];
56 static struct pending_ref *references[EPOCH_MASK];
57
58 void
epoch_init(void)59 epoch_init( void )
60 {
61 epoch_t epoch;
62
63 current_epoch = 0;
64 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
65 assert( !epoch_threads[epoch] );
66 assert( !references[epoch] );
67 }
68
69 ldap_pvt_thread_rdwr_init( &epoch_mutex );
70 }
71
72 void
epoch_shutdown(void)73 epoch_shutdown( void )
74 {
75 epoch_t epoch;
76 struct pending_ref *old, *next;
77
78 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
79 assert( !epoch_threads[epoch] );
80 }
81
82 /*
83 * Even with the work in epoch_leave(), shutdown code doesn't currently
84 * observe any epoch, so there might still be references left to free.
85 */
86 epoch = EPOCH_PREV(current_epoch);
87 next = references[epoch];
88 references[epoch] = NULL;
89 for ( old = next; old; old = next ) {
90 next = old->next;
91
92 old->dispose( old->object );
93 ch_free( old );
94 }
95
96 epoch = current_epoch;
97 next = references[epoch];
98 references[epoch] = NULL;
99 for ( old = next; old; old = next ) {
100 next = old->next;
101
102 old->dispose( old->object );
103 ch_free( old );
104 }
105
106 /* No references should exist anywhere now */
107 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
108 assert( !references[epoch] );
109 }
110
111 ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
112 }
113
114 epoch_t
epoch_join(void)115 epoch_join( void )
116 {
117 epoch_t epoch;
118 struct pending_ref *old, *ref = NULL;
119
120 retry:
121 /* TODO: make this completely lock-free */
122 ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
123 epoch = current_epoch;
124 __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
125 ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
126
127 if ( __atomic_load_n(
128 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
129 return epoch;
130 }
131
132 __atomic_exchange(
133 &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );
134
135 Debug( LDAP_DEBUG_TRACE, "epoch_join: "
136 "advancing epoch to %zu with %s objects to free\n",
137 EPOCH_NEXT(epoch), ref ? "some" : "no" );
138
139 ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
140 current_epoch = EPOCH_NEXT(epoch);
141 ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );
142
143 if ( !ref ) {
144 return epoch;
145 }
146
147 /*
148 * The below is now safe to free outside epochs and we don't want to make
149 * the current epoch last any longer than necessary.
150 *
151 * Looks like there might be fairness issues in massively parallel
152 * environments but they haven't been observed on 32-core machines.
153 */
154 epoch_leave( epoch );
155
156 for ( old = ref; old; old = ref ) {
157 ref = old->next;
158
159 old->dispose( old->object );
160 ch_free( old );
161 }
162
163 goto retry;
164 }
165
166 void
epoch_leave(epoch_t epoch)167 epoch_leave( epoch_t epoch )
168 {
169 struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;
170
171 /* Are there other threads observing our epoch? */
172 if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
173 return;
174 }
175
176 /*
177 * Optimisation for the case when we're mostly idle. Otherwise we won't
178 * release resources until another thread comes by and joins the epoch
179 * (twice), and there's no idea how soon (or late) that is going to happen.
180 *
181 * NB. There is no limit to the number of threads executing the following
182 * code in parallel.
183 */
184 ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
185 /*
186 * Anything could happen between the subtract and the lock being acquired
187 * above, so check again. But once we hold this lock (and confirm no more
188 * threads still observe either prospective epoch), noone will be able to
189 * finish epoch_join until we've released epoch_mutex since it holds that:
190 *
191 * epoch_threads[EPOCH_PREV(current_epoch)] == 0
192 *
193 * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
194 */
195 if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) {
196 /* Epoch counter has run full circle */
197 ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
198 return;
199 } else if ( epoch == current_epoch ) {
200 if ( __atomic_load_n(
201 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) {
202 /* There is another (older) thread still running */
203 ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
204 return;
205 }
206
207 /* We're all alone, it's safe to claim all references and free them. */
208 __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs,
209 &old_refs, __ATOMIC_ACQ_REL );
210 __atomic_exchange( &references[epoch], ¤t_refs, ¤t_refs,
211 __ATOMIC_ACQ_REL );
212 } else if ( epoch == EPOCH_PREV(current_epoch) ) {
213 if ( __atomic_load_n(
214 &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) {
215 /* There is another (newer) thread still running */
216 ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
217 return;
218 }
219
220 /* We're all alone, it's safe to claim all references and free them. */
221 __atomic_exchange(
222 &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL );
223 __atomic_exchange( &references[EPOCH_NEXT(epoch)], ¤t_refs,
224 ¤t_refs, __ATOMIC_ACQ_REL );
225 }
226 /*
227 * Else the current_epoch has moved far enough that no references remain to
228 * be freed.
229 */
230 ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
231
232 /*
233 * Trigger a memory-independent read fence to make sure we're reading the
234 * state after all threads actually finished - which might have happened
235 * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not
236 * catch everything.
237 *
238 * TODO is to confirm the below:
239 * It might be that the tests and exchanges above only enforce a fence for
240 * the locations affected, so we could still read stale memory for
241 * unrelated locations? At least that's the only explanation I've been able
242 * to establish for repeated crashes that seem to have gone away with this
243 * in place.
244 *
245 * But then that's contrary to the second example in Acquire/Release
246 * section here:
247 * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync
248 */
249 __atomic_thread_fence( __ATOMIC_ACQUIRE );
250
251 for ( p = old_refs; p; p = next ) {
252 next = p->next;
253
254 p->dispose( p->object );
255 ch_free( p );
256 }
257
258 for ( p = current_refs; p; p = next ) {
259 next = p->next;
260
261 p->dispose( p->object );
262 ch_free( p );
263 }
264 }
265
266 /*
267 * Add the object to the "current global epoch", not the epoch our thread
268 * entered.
269 */
270 void
epoch_append(void * ptr,dispose_cb * cb)271 epoch_append( void *ptr, dispose_cb *cb )
272 {
273 struct pending_ref *new;
274 epoch_t epoch = __atomic_load_n( ¤t_epoch, __ATOMIC_ACQUIRE );
275
276 /*
277 * BTW, the following is not appropriate here:
278 * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
279 *
280 * We might be a thread lagging behind in the "previous epoch" with no
281 * other threads executing at all.
282 */
283
284 new = ch_malloc( sizeof(struct pending_ref) );
285 new->object = ptr;
286 new->dispose = cb;
287 new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );
288
289 while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
290 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
291 /* iterate until we succeed */;
292 }
293
294 int
acquire_ref(uintptr_t * refp)295 acquire_ref( uintptr_t *refp )
296 {
297 uintptr_t refcnt, new_refcnt;
298
299 refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
300
301 /*
302 * If we just incremented the refcnt and checked for zero after, another
303 * thread might falsely believe the object was going to stick around.
304 *
305 * Checking whether the object is still dead at disposal time might not be
306 * able to distinguish it from being freed in a later epoch.
307 */
308 do {
309 if ( !refcnt ) {
310 return refcnt;
311 }
312
313 new_refcnt = refcnt + 1;
314 } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
315 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
316 assert( new_refcnt == refcnt + 1 );
317
318 return refcnt;
319 }
320
321 int
try_release_ref(uintptr_t * refp,void * object,dispose_cb * cb)322 try_release_ref( uintptr_t *refp, void *object, dispose_cb *cb )
323 {
324 uintptr_t refcnt, new_refcnt;
325
326 refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
327
328 /* We promise the caller that we won't decrease refcnt below 0 */
329 do {
330 if ( !refcnt ) {
331 return refcnt;
332 }
333
334 new_refcnt = refcnt - 1;
335 } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
336 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
337 assert( new_refcnt == refcnt - 1 );
338
339 if ( !new_refcnt ) {
340 epoch_append( object, cb );
341 }
342
343 return refcnt;
344 }
345