xref: /netbsd-src/external/bsd/openldap/dist/servers/lloadd/epoch.c (revision 549b59ed3ccf0d36d3097190a0db27b770f3a839)
1 /*	$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $	*/
2 
3 /* epoch.c - epoch based memory reclamation */
4 /* $OpenLDAP$ */
5 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6  *
7  * Copyright 2018-2021 The OpenLDAP Foundation.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted only as authorized by the OpenLDAP
12  * Public License.
13  *
14  * A copy of this license is available in the file LICENSE in the
15  * top-level directory of the distribution or, alternatively, at
16  * <http://www.OpenLDAP.org/license.html>.
17  */
18 
19 /** @file epoch.c
20  *
21  * Implementation of epoch based memory reclamation, in principle
22  * similar to the algorithm presented in
23  * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
24  *
25  * Not completely lock-free at the moment.
26  *
27  * Also the problems with epoch based memory reclamation are still
28  * present - a thread actively observing an epoch getting stuck will
29  * prevent managed objects (in our case connections and operations)
30  * from being freed, potentially running out of memory.
31  */
32 
33 #include <sys/cdefs.h>
34 __RCSID("$NetBSD: epoch.c,v 1.2 2021/08/14 16:14:58 christos Exp $");
35 
36 #include "portable.h"
37 
38 #include "lload.h"
39 #include <epoch.h>
40 
41 /* Has to be >= 3 */
42 #define EPOCH_MASK ( 1 << 2 )
43 #define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
44 #define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )
45 
46 struct pending_ref {
47     void *object;
48     dispose_cb *dispose;
49     struct pending_ref *next;
50 };
51 
52 ldap_pvt_thread_rdwr_t epoch_mutex;
53 
54 static epoch_t current_epoch;
55 static uintptr_t epoch_threads[EPOCH_MASK];
56 static struct pending_ref *references[EPOCH_MASK];
57 
58 void
epoch_init(void)59 epoch_init( void )
60 {
61     epoch_t epoch;
62 
63     current_epoch = 0;
64     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
65         assert( !epoch_threads[epoch] );
66         assert( !references[epoch] );
67     }
68 
69     ldap_pvt_thread_rdwr_init( &epoch_mutex );
70 }
71 
72 void
epoch_shutdown(void)73 epoch_shutdown( void )
74 {
75     epoch_t epoch;
76     struct pending_ref *old, *next;
77 
78     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
79         assert( !epoch_threads[epoch] );
80     }
81 
82     /*
83      * Even with the work in epoch_leave(), shutdown code doesn't currently
84      * observe any epoch, so there might still be references left to free.
85      */
86     epoch = EPOCH_PREV(current_epoch);
87     next = references[epoch];
88     references[epoch] = NULL;
89     for ( old = next; old; old = next ) {
90         next = old->next;
91 
92         old->dispose( old->object );
93         ch_free( old );
94     }
95 
96     epoch = current_epoch;
97     next = references[epoch];
98     references[epoch] = NULL;
99     for ( old = next; old; old = next ) {
100         next = old->next;
101 
102         old->dispose( old->object );
103         ch_free( old );
104     }
105 
106     /* No references should exist anywhere now */
107     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
108         assert( !references[epoch] );
109     }
110 
111     ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
112 }
113 
114 epoch_t
epoch_join(void)115 epoch_join( void )
116 {
117     epoch_t epoch;
118     struct pending_ref *old, *ref = NULL;
119 
120 retry:
121     /* TODO: make this completely lock-free */
122     ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
123     epoch = current_epoch;
124     __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
125     ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
126 
127     if ( __atomic_load_n(
128                  &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
129         return epoch;
130     }
131 
132     __atomic_exchange(
133             &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );
134 
135     Debug( LDAP_DEBUG_TRACE, "epoch_join: "
136             "advancing epoch to %zu with %s objects to free\n",
137             EPOCH_NEXT(epoch), ref ? "some" : "no" );
138 
139     ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
140     current_epoch = EPOCH_NEXT(epoch);
141     ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );
142 
143     if ( !ref ) {
144         return epoch;
145     }
146 
147     /*
148      * The below is now safe to free outside epochs and we don't want to make
149      * the current epoch last any longer than necessary.
150      *
151      * Looks like there might be fairness issues in massively parallel
152      * environments but they haven't been observed on 32-core machines.
153      */
154     epoch_leave( epoch );
155 
156     for ( old = ref; old; old = ref ) {
157         ref = old->next;
158 
159         old->dispose( old->object );
160         ch_free( old );
161     }
162 
163     goto retry;
164 }
165 
166 void
epoch_leave(epoch_t epoch)167 epoch_leave( epoch_t epoch )
168 {
169     struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;
170 
171     /* Are there other threads observing our epoch? */
172     if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
173         return;
174     }
175 
176     /*
177      * Optimisation for the case when we're mostly idle. Otherwise we won't
178      * release resources until another thread comes by and joins the epoch
179      * (twice), and there's no idea how soon (or late) that is going to happen.
180      *
181      * NB. There is no limit to the number of threads executing the following
182      * code in parallel.
183      */
184     ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
185     /*
186      * Anything could happen between the subtract and the lock being acquired
187      * above, so check again. But once we hold this lock (and confirm no more
188      * threads still observe either prospective epoch), noone will be able to
189      * finish epoch_join until we've released epoch_mutex since it holds that:
190      *
191      * epoch_threads[EPOCH_PREV(current_epoch)] == 0
192      *
193      * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
194      */
195     if ( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ) {
196         /* Epoch counter has run full circle */
197         ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
198         return;
199     } else if ( epoch == current_epoch ) {
200         if ( __atomic_load_n(
201                      &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_RELAXED ) ) {
202             /* There is another (older) thread still running */
203             ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
204             return;
205         }
206 
207         /* We're all alone, it's safe to claim all references and free them. */
208         __atomic_exchange( &references[EPOCH_PREV(epoch)], &old_refs,
209                 &old_refs, __ATOMIC_ACQ_REL );
210         __atomic_exchange( &references[epoch], &current_refs, &current_refs,
211                 __ATOMIC_ACQ_REL );
212     } else if ( epoch == EPOCH_PREV(current_epoch) ) {
213         if ( __atomic_load_n(
214                      &epoch_threads[EPOCH_NEXT(epoch)], __ATOMIC_RELAXED ) ) {
215             /* There is another (newer) thread still running */
216             ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
217             return;
218         }
219 
220         /* We're all alone, it's safe to claim all references and free them. */
221         __atomic_exchange(
222                 &references[epoch], &old_refs, &old_refs, __ATOMIC_ACQ_REL );
223         __atomic_exchange( &references[EPOCH_NEXT(epoch)], &current_refs,
224                 &current_refs, __ATOMIC_ACQ_REL );
225     }
226     /*
227      * Else the current_epoch has moved far enough that no references remain to
228      * be freed.
229      */
230     ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
231 
232     /*
233      * Trigger a memory-independent read fence to make sure we're reading the
234      * state after all threads actually finished - which might have happened
235      * after we acquired epoch_mutex so ldap_pvt_thread_rdwr_rlock would not
236      * catch everything.
237      *
238      * TODO is to confirm the below:
239      * It might be that the tests and exchanges above only enforce a fence for
240      * the locations affected, so we could still read stale memory for
241      * unrelated locations? At least that's the only explanation I've been able
242      * to establish for repeated crashes that seem to have gone away with this
243      * in place.
244      *
245      * But then that's contrary to the second example in Acquire/Release
246      * section here:
247      * https://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync
248      */
249     __atomic_thread_fence( __ATOMIC_ACQUIRE );
250 
251     for ( p = old_refs; p; p = next ) {
252         next = p->next;
253 
254         p->dispose( p->object );
255         ch_free( p );
256     }
257 
258     for ( p = current_refs; p; p = next ) {
259         next = p->next;
260 
261         p->dispose( p->object );
262         ch_free( p );
263     }
264 }
265 
266 /*
267  * Add the object to the "current global epoch", not the epoch our thread
268  * entered.
269  */
270 void
epoch_append(void * ptr,dispose_cb * cb)271 epoch_append( void *ptr, dispose_cb *cb )
272 {
273     struct pending_ref *new;
274     epoch_t epoch = __atomic_load_n( &current_epoch, __ATOMIC_ACQUIRE );
275 
276     /*
277      * BTW, the following is not appropriate here:
278      * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
279      *
280      * We might be a thread lagging behind in the "previous epoch" with no
281      * other threads executing at all.
282      */
283 
284     new = ch_malloc( sizeof(struct pending_ref) );
285     new->object = ptr;
286     new->dispose = cb;
287     new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );
288 
289     while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
290             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
291         /* iterate until we succeed */;
292 }
293 
294 int
acquire_ref(uintptr_t * refp)295 acquire_ref( uintptr_t *refp )
296 {
297     uintptr_t refcnt, new_refcnt;
298 
299     refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
300 
301     /*
302      * If we just incremented the refcnt and checked for zero after, another
303      * thread might falsely believe the object was going to stick around.
304      *
305      * Checking whether the object is still dead at disposal time might not be
306      * able to distinguish it from being freed in a later epoch.
307      */
308     do {
309         if ( !refcnt ) {
310             return refcnt;
311         }
312 
313         new_refcnt = refcnt + 1;
314     } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
315             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
316     assert( new_refcnt == refcnt + 1 );
317 
318     return refcnt;
319 }
320 
321 int
try_release_ref(uintptr_t * refp,void * object,dispose_cb * cb)322 try_release_ref( uintptr_t *refp, void *object, dispose_cb *cb )
323 {
324     uintptr_t refcnt, new_refcnt;
325 
326     refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
327 
328     /* We promise the caller that we won't decrease refcnt below 0 */
329     do {
330         if ( !refcnt ) {
331             return refcnt;
332         }
333 
334         new_refcnt = refcnt - 1;
335     } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
336             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
337     assert( new_refcnt == refcnt - 1 );
338 
339     if ( !new_refcnt ) {
340         epoch_append( object, cb );
341     }
342 
343     return refcnt;
344 }
345