xref: /netbsd-src/external/bsd/openldap/dist/servers/lloadd/epoch.c (revision 549b59ed3ccf0d36d3097190a0db27b770f3a839)
1*549b59edSchristos /*	$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $	*/
2e670fd5cSchristos 
3e670fd5cSchristos /* epoch.c - epoch based memory reclamation */
4e670fd5cSchristos /* $OpenLDAP$ */
5e670fd5cSchristos /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6e670fd5cSchristos  *
7e670fd5cSchristos  * Copyright 2018-2021 The OpenLDAP Foundation.
8e670fd5cSchristos  * All rights reserved.
9e670fd5cSchristos  *
10e670fd5cSchristos  * Redistribution and use in source and binary forms, with or without
11e670fd5cSchristos  * modification, are permitted only as authorized by the OpenLDAP
12e670fd5cSchristos  * Public License.
13e670fd5cSchristos  *
14e670fd5cSchristos  * A copy of this license is available in the file LICENSE in the
15e670fd5cSchristos  * top-level directory of the distribution or, alternatively, at
16e670fd5cSchristos  * <http://www.OpenLDAP.org/license.html>.
17e670fd5cSchristos  */
18e670fd5cSchristos 
19e670fd5cSchristos /** @file epoch.c
20e670fd5cSchristos  *
21e670fd5cSchristos  * Implementation of epoch based memory reclamation, in principle
22e670fd5cSchristos  * similar to the algorithm presented in
23e670fd5cSchristos  * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
24e670fd5cSchristos  *
25e670fd5cSchristos  * Not completely lock-free at the moment.
26e670fd5cSchristos  *
27e670fd5cSchristos  * Also the problems with epoch based memory reclamation are still
28e670fd5cSchristos  * present - a thread actively observing an epoch getting stuck will
29e670fd5cSchristos  * prevent managed objects (in our case connections and operations)
30e670fd5cSchristos  * from being freed, potentially running out of memory.
31e670fd5cSchristos  */
32e670fd5cSchristos 
33e670fd5cSchristos #include <sys/cdefs.h>
34*549b59edSchristos __RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $");
35e670fd5cSchristos 
36e670fd5cSchristos #include "portable.h"
37e670fd5cSchristos 
38e670fd5cSchristos #include "lload.h"
39e670fd5cSchristos #include <epoch.h>
40e670fd5cSchristos 
41e670fd5cSchristos /* Has to be >= 3 */
42e670fd5cSchristos #define EPOCH_MASK ( 1 << 2 )
43e670fd5cSchristos #define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
44e670fd5cSchristos #define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )
45e670fd5cSchristos 
46e670fd5cSchristos struct pending_ref {
47e670fd5cSchristos     void *object;
48e670fd5cSchristos     dispose_cb *dispose;
49e670fd5cSchristos     struct pending_ref *next;
50e670fd5cSchristos };
51e670fd5cSchristos 
52e670fd5cSchristos ldap_pvt_thread_rdwr_t epoch_mutex;
53e670fd5cSchristos 
54e670fd5cSchristos static epoch_t current_epoch;
55e670fd5cSchristos static uintptr_t epoch_threads[EPOCH_MASK];
56e670fd5cSchristos static struct pending_ref *references[EPOCH_MASK];
57e670fd5cSchristos 
58e670fd5cSchristos void
epoch_init(void)59e670fd5cSchristos epoch_init( void )
60e670fd5cSchristos {
61e670fd5cSchristos     epoch_t epoch;
62e670fd5cSchristos 
63e670fd5cSchristos     current_epoch = 0;
64e670fd5cSchristos     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
65e670fd5cSchristos         assert( !epoch_threads[epoch] );
66e670fd5cSchristos         assert( !references[epoch] );
67e670fd5cSchristos     }
68e670fd5cSchristos 
69e670fd5cSchristos     ldap_pvt_thread_rdwr_init( &epoch_mutex );
70e670fd5cSchristos }
71e670fd5cSchristos 
72e670fd5cSchristos void
epoch_shutdown(void)73e670fd5cSchristos epoch_shutdown( void )
74e670fd5cSchristos {
75e670fd5cSchristos     epoch_t epoch;
76e670fd5cSchristos     struct pending_ref *old, *next;
77e670fd5cSchristos 
78e670fd5cSchristos     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
79e670fd5cSchristos         assert( !epoch_threads[epoch] );
80e670fd5cSchristos     }
81e670fd5cSchristos 
82e670fd5cSchristos     /*
83e670fd5cSchristos      * Even with the work in epoch_leave(), shutdown code doesn't currently
84e670fd5cSchristos      * observe any epoch, so there might still be references left to free.
85e670fd5cSchristos      */
86e670fd5cSchristos     epoch = EPOCH_PREV(current_epoch);
87e670fd5cSchristos     next = references[epoch];
88e670fd5cSchristos     references[epoch] = NULL;
89e670fd5cSchristos     for ( old = next; old; old = next ) {
90e670fd5cSchristos         next = old->next;
91e670fd5cSchristos 
92e670fd5cSchristos         old->dispose( old->object );
93e670fd5cSchristos         ch_free( old );
94e670fd5cSchristos     }
95e670fd5cSchristos 
96e670fd5cSchristos     epoch = current_epoch;
97e670fd5cSchristos     next = references[epoch];
98e670fd5cSchristos     references[epoch] = NULL;
99e670fd5cSchristos     for ( old = next; old; old = next ) {
100e670fd5cSchristos         next = old->next;
101e670fd5cSchristos 
102e670fd5cSchristos         old->dispose( old->object );
103e670fd5cSchristos         ch_free( old );
104e670fd5cSchristos     }
105e670fd5cSchristos 
106e670fd5cSchristos     /* No references should exist anywhere now */
107e670fd5cSchristos     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
108e670fd5cSchristos         assert( !references[epoch] );
109e670fd5cSchristos     }
110e670fd5cSchristos 
111e670fd5cSchristos     ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
112e670fd5cSchristos }
113e670fd5cSchristos 
114e670fd5cSchristos epoch_t
epoch_join(void)115e670fd5cSchristos epoch_join( void )
116e670fd5cSchristos {
117e670fd5cSchristos     epoch_t epoch;
118e670fd5cSchristos     struct pending_ref *old, *ref = NULL;
119e670fd5cSchristos 
120e670fd5cSchristos retry:
121e670fd5cSchristos     /* TODO: make this completely lock-free */
122e670fd5cSchristos     ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
123e670fd5cSchristos     epoch = current_epoch;
124e670fd5cSchristos     __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
125e670fd5cSchristos     ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
126e670fd5cSchristos 
127e670fd5cSchristos     if ( __atomic_load_n(
128e670fd5cSchristos                  &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
129e670fd5cSchristos         return epoch;
130e670fd5cSchristos     }
131e670fd5cSchristos 
132e670fd5cSchristos     __atomic_exchange(
133e670fd5cSchristos             &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );
134e670fd5cSchristos 
135e670fd5cSchristos     Debug( LDAP_DEBUG_TRACE, "epoch_join: "
136e670fd5cSchristos             "advancing epoch to %zu with %s objects to free\n",
137e670fd5cSchristos             EPOCH_NEXT(epoch), ref ? "some" : "no" );
138e670fd5cSchristos 
139e670fd5cSchristos     ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
140e670fd5cSchristos     current_epoch = EPOCH_NEXT(epoch);
141e670fd5cSchristos     ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );
142e670fd5cSchristos 
143e670fd5cSchristos     if ( !ref ) {
144e670fd5cSchristos         return epoch;
145e670fd5cSchristos     }
146e670fd5cSchristos 
147e670fd5cSchristos     /*
148e670fd5cSchristos      * The below is now safe to free outside epochs and we don't want to make
149e670fd5cSchristos      * the current epoch last any longer than necessary.
150e670fd5cSchristos      *
151e670fd5cSchristos      * Looks like there might be fairness issues in massively parallel
152e670fd5cSchristos      * environments but they haven't been observed on 32-core machines.
153e670fd5cSchristos      */
154e670fd5cSchristos     epoch_leave( epoch );
155e670fd5cSchristos 
156e670fd5cSchristos     for ( old = ref; old; old = ref ) {
157e670fd5cSchristos         ref = old->next;
158e670fd5cSchristos 
159e670fd5cSchristos         old->dispose( old->object );
160e670fd5cSchristos         ch_free( old );
161e670fd5cSchristos     }
162e670fd5cSchristos 
163e670fd5cSchristos     goto retry;
164e670fd5cSchristos }
165e670fd5cSchristos 
166e670fd5cSchristos void
epoch_leave(epoch_t epoch)167e670fd5cSchristos epoch_leave( epoch_t epoch )
168e670fd5cSchristos {
169e670fd5cSchristos     struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;
170e670fd5cSchristos 
171e670fd5cSchristos     /* Are there other threads observing our epoch? */
172e670fd5cSchristos     if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
173e670fd5cSchristos         return;
174e670fd5cSchristos     }
175e670fd5cSchristos 
176e670fd5cSchristos     /*
177e670fd5cSchristos      * Optimisation for the case when we're mostly idle. Otherwise we won't
178e670fd5cSchristos      * release resources until another thread comes by and joins the epoch
179e670fd5cSchristos      * (twice), and there's no idea how soon (or late) that is going to happen.
180e670fd5cSchristos      *
181e670fd5cSchristos      * NB. There is no limit to the number of threads executing the following
182e670fd5cSchristos      * code in parallel.
183e670fd5cSchristos      */
184e670fd5cSchristos     ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
185e670fd5cSchristos     /*
186e670fd5cSchristos      * Anything could happen between the subtract and the lock being acquired
187e670fd5cSchristos      * above, so check again. But once we hold this lock (and confirm no more
188e670fd5cSchristos      * threads still observe either prospective epoch), noone will be able to
189e670fd5cSchristos      * finish epoch_join until we've released epoch_mutex since it holds that:
190e670fd5cSchristos      *
191e670fd5cSchristos      * epoch_threads[EPOCH_PREV(current_epoch)] == 0
192e670fd5cSchristos      *
193e670fd5cSchristos      * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
194e670fd5cSchristos      */
195e670fd5cSchristos     if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) {
196e670fd5cSchristos         /* Epoch counter has run full circle */
197e670fd5cSchristos         ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
198e670fd5cSchristos         return;
199e670fd5cSchristos     } else if ( epoch == current_epoch ) {
200e670fd5cSchristos         if ( __atomic_load_n(
201e670fd5cSchristos                      &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) {
202e670fd5cSchristos             /* There is another (older) thread still running */
203e670fd5cSchristos             ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
204e670fd5cSchristos             return;
205e670fd5cSchristos         }
206e670fd5cSchristos 
207e670fd5cSchristos         /* We're all alone, it's safe to claim all references and free them. */
208e670fd5cSchristos         __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs,
209e670fd5cSchristos                 &old_refs, __ATOMIC_ACQ_REL );
210e670fd5cSchristos         __atomic_exchange( &references[epoch], &current_refs, &current_refs,
211e670fd5cSchristos                 __ATOMIC_ACQ_REL );
212e670fd5cSchristos     } else if ( epoch == EPOCH_PREV(current_epoch) ) {
213e670fd5cSchristos         if ( __atomic_load_n(
214e670fd5cSchristos                      &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) {
215e670fd5cSchristos             /* There is another (newer) thread still running */
216e670fd5cSchristos             ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
217e670fd5cSchristos             return;
218e670fd5cSchristos         }
219e670fd5cSchristos 
220e670fd5cSchristos         /* We're all alone, it's safe to claim all references and free them. */
221e670fd5cSchristos         __atomic_exchange(
222e670fd5cSchristos                 &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL );
223e670fd5cSchristos         __atomic_exchange( &references[EPOCH_NEXT(epoch)], &current_refs,
224e670fd5cSchristos                 &current_refs, __ATOMIC_ACQ_REL );
225e670fd5cSchristos     }
226e670fd5cSchristos     /*
227e670fd5cSchristos      * Else the current_epoch has moved far enough that no references remain to
228e670fd5cSchristos      * be freed.
229e670fd5cSchristos      */
230e670fd5cSchristos     ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
231e670fd5cSchristos 
232e670fd5cSchristos     /*
233e670fd5cSchristos      * Trigger a memory-independent read fence to make sure we're reading the
234e670fd5cSchristos      * state after all threads actually finished - which might have happened
235e670fd5cSchristos      * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not
236e670fd5cSchristos      * catch everything.
237e670fd5cSchristos      *
238e670fd5cSchristos      * TODO is to confirm the below:
239e670fd5cSchristos      * It might be that the tests and exchanges above only enforce a fence for
240e670fd5cSchristos      * the locations affected, so we could still read stale memory for
241e670fd5cSchristos      * unrelated locations? At least that's the only explanation I've been able
242e670fd5cSchristos      * to establish for repeated crashes that seem to have gone away with this
243e670fd5cSchristos      * in place.
244e670fd5cSchristos      *
245e670fd5cSchristos      * But then that's contrary to the second example in Acquire/Release
246e670fd5cSchristos      * section here:
247e670fd5cSchristos      * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync
248e670fd5cSchristos      */
249e670fd5cSchristos     __atomic_thread_fence( __ATOMIC_ACQUIRE );
250e670fd5cSchristos 
251e670fd5cSchristos     for ( p = old_refs; p; p = next ) {
252e670fd5cSchristos         next = p->next;
253e670fd5cSchristos 
254e670fd5cSchristos         p->dispose( p->object );
255e670fd5cSchristos         ch_free( p );
256e670fd5cSchristos     }
257e670fd5cSchristos 
258e670fd5cSchristos     for ( p = current_refs; p; p = next ) {
259e670fd5cSchristos         next = p->next;
260e670fd5cSchristos 
261e670fd5cSchristos         p->dispose( p->object );
262e670fd5cSchristos         ch_free( p );
263e670fd5cSchristos     }
264e670fd5cSchristos }
265e670fd5cSchristos 
266e670fd5cSchristos /*
267e670fd5cSchristos  * Add the object to the "current global epoch", not the epoch our thread
268e670fd5cSchristos  * entered.
269e670fd5cSchristos  */
270e670fd5cSchristos void
epoch_append(void * ptr,dispose_cb * cb)271e670fd5cSchristos epoch_append( void *ptr, dispose_cb *cb )
272e670fd5cSchristos {
273e670fd5cSchristos     struct pending_ref *new;
274e670fd5cSchristos     epoch_t epoch = __atomic_load_n( &current_epoch, __ATOMIC_ACQUIRE );
275e670fd5cSchristos 
276e670fd5cSchristos     /*
277e670fd5cSchristos      * BTW, the following is not appropriate here:
278e670fd5cSchristos      * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
279e670fd5cSchristos      *
280e670fd5cSchristos      * We might be a thread lagging behind in the "previous epoch" with no
281e670fd5cSchristos      * other threads executing at all.
282e670fd5cSchristos      */
283e670fd5cSchristos 
284e670fd5cSchristos     new = ch_malloc( sizeof(struct pending_ref) );
285e670fd5cSchristos     new->object = ptr;
286e670fd5cSchristos     new->dispose = cb;
287e670fd5cSchristos     new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );
288e670fd5cSchristos 
289e670fd5cSchristos     while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
290e670fd5cSchristos             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
291e670fd5cSchristos         /* iterate until we succeed */;
292e670fd5cSchristos }
293e670fd5cSchristos 
294e670fd5cSchristos int
acquire_ref(uintptr_t * refp)295e670fd5cSchristos acquire_ref( uintptr_t *refp )
296e670fd5cSchristos {
297e670fd5cSchristos     uintptr_t refcnt, new_refcnt;
298e670fd5cSchristos 
299e670fd5cSchristos     refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
300e670fd5cSchristos 
301e670fd5cSchristos     /*
302e670fd5cSchristos      * If we just incremented the refcnt and checked for zero after, another
303e670fd5cSchristos      * thread might falsely believe the object was going to stick around.
304e670fd5cSchristos      *
305e670fd5cSchristos      * Checking whether the object is still dead at disposal time might not be
306e670fd5cSchristos      * able to distinguish it from being freed in a later epoch.
307e670fd5cSchristos      */
308e670fd5cSchristos     do {
309e670fd5cSchristos         if ( !refcnt ) {
310e670fd5cSchristos             return refcnt;
311e670fd5cSchristos         }
312e670fd5cSchristos 
313e670fd5cSchristos         new_refcnt = refcnt + 1;
314e670fd5cSchristos     } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
315e670fd5cSchristos             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
316e670fd5cSchristos     assert( new_refcnt == refcnt + 1 );
317e670fd5cSchristos 
318e670fd5cSchristos     return refcnt;
319e670fd5cSchristos }
320e670fd5cSchristos 
321e670fd5cSchristos int
try_release_ref(uintptr_t * refp,void * object,dispose_cb * cb)322e670fd5cSchristos try_release_ref( uintptr_t *refp, void *object, dispose_cb *cb )
323e670fd5cSchristos {
324e670fd5cSchristos     uintptr_t refcnt, new_refcnt;
325e670fd5cSchristos 
326e670fd5cSchristos     refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
327e670fd5cSchristos 
328e670fd5cSchristos     /* We promise the caller that we won't decrease refcnt below 0 */
329e670fd5cSchristos     do {
330e670fd5cSchristos         if ( !refcnt ) {
331e670fd5cSchristos             return refcnt;
332e670fd5cSchristos         }
333e670fd5cSchristos 
334e670fd5cSchristos         new_refcnt = refcnt - 1;
335e670fd5cSchristos     } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
336e670fd5cSchristos             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
337e670fd5cSchristos     assert( new_refcnt == refcnt - 1 );
338e670fd5cSchristos 
339e670fd5cSchristos     if ( !new_refcnt ) {
340e670fd5cSchristos         epoch_append( object, cb );
341e670fd5cSchristos     }
342e670fd5cSchristos 
343e670fd5cSchristos     return refcnt;
344e670fd5cSchristos }
345