1ccec91a1Sjoerg /*
2ccec91a1Sjoerg * Copyright 2010-2012 PathScale, Inc. All rights reserved.
3ccec91a1Sjoerg *
4ccec91a1Sjoerg * Redistribution and use in source and binary forms, with or without
5ccec91a1Sjoerg * modification, are permitted provided that the following conditions are met:
6ccec91a1Sjoerg *
7ccec91a1Sjoerg * 1. Redistributions of source code must retain the above copyright notice,
8ccec91a1Sjoerg * this list of conditions and the following disclaimer.
9ccec91a1Sjoerg *
10ccec91a1Sjoerg * 2. Redistributions in binary form must reproduce the above copyright notice,
11ccec91a1Sjoerg * this list of conditions and the following disclaimer in the documentation
12ccec91a1Sjoerg * and/or other materials provided with the distribution.
13ccec91a1Sjoerg *
14ccec91a1Sjoerg * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
15ccec91a1Sjoerg * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16ccec91a1Sjoerg * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17ccec91a1Sjoerg * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18ccec91a1Sjoerg * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19ccec91a1Sjoerg * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20ccec91a1Sjoerg * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21ccec91a1Sjoerg * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22ccec91a1Sjoerg * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23ccec91a1Sjoerg * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24ccec91a1Sjoerg * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25ccec91a1Sjoerg */
26ccec91a1Sjoerg
27ccec91a1Sjoerg /**
28ccec91a1Sjoerg * guard.cc: Functions for thread-safe static initialisation.
29ccec91a1Sjoerg *
30ccec91a1Sjoerg * Static values in C++ can be initialised lazily their first use. This file
31ccec91a1Sjoerg * contains functions that are used to ensure that two threads attempting to
32ccec91a1Sjoerg * initialize the same static do not call the constructor twice. This is
33ccec91a1Sjoerg * important because constructors can have side effects, so calling the
34ccec91a1Sjoerg * constructor twice may be very bad.
35ccec91a1Sjoerg *
36ccec91a1Sjoerg * Statics that require initialisation are protected by a 64-bit value. Any
37ccec91a1Sjoerg * platform that can do 32-bit atomic test and set operations can use this
38ccec91a1Sjoerg * value as a low-overhead lock. Because statics (in most sane code) are
39ccec91a1Sjoerg * accessed far more times than they are initialised, this lock implementation
40ccec91a1Sjoerg * is heavily optimised towards the case where the static has already been
41ccec91a1Sjoerg * initialised.
42ccec91a1Sjoerg */
43ccec91a1Sjoerg #include <stdint.h>
445067d178Sjoerg #include <stdlib.h>
455067d178Sjoerg #include <stdio.h>
46ccec91a1Sjoerg #include <pthread.h>
47ccec91a1Sjoerg #include <assert.h>
485067d178Sjoerg #include "atomic.h"
49ccec91a1Sjoerg
505067d178Sjoerg // Older GCC doesn't define __LITTLE_ENDIAN__
515067d178Sjoerg #ifndef __LITTLE_ENDIAN__
525067d178Sjoerg // If __BYTE_ORDER__ is defined, use that instead
535067d178Sjoerg # ifdef __BYTE_ORDER__
545067d178Sjoerg # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
555067d178Sjoerg # define __LITTLE_ENDIAN__
565067d178Sjoerg # endif
575067d178Sjoerg // x86 and ARM are the most common little-endian CPUs, so let's have a
585067d178Sjoerg // special case for them (ARM is already special cased). Assume everything
595067d178Sjoerg // else is big endian.
605067d178Sjoerg # elif defined(__x86_64) || defined(__i386)
615067d178Sjoerg # define __LITTLE_ENDIAN__
625067d178Sjoerg # endif
635067d178Sjoerg #endif
645067d178Sjoerg
655067d178Sjoerg
665067d178Sjoerg /*
675067d178Sjoerg * The least significant bit of the guard variable indicates that the object
685067d178Sjoerg * has been initialised, the most significant bit is used for a spinlock.
695067d178Sjoerg */
70ccec91a1Sjoerg #ifdef __arm__
71ccec91a1Sjoerg // ARM ABI - 32-bit guards.
725067d178Sjoerg typedef uint32_t guard_t;
73534cb174Sjoerg typedef uint32_t guard_lock_t;
74d0b6b5d5Sjoerg static const uint32_t LOCKED = static_cast<guard_t>(1) << 31;
755067d178Sjoerg static const uint32_t INITIALISED = 1;
76534cb174Sjoerg #define LOCK_PART(guard) (guard)
77534cb174Sjoerg #define INIT_PART(guard) (guard)
78534cb174Sjoerg #elif defined(_LP64)
795067d178Sjoerg typedef uint64_t guard_t;
80534cb174Sjoerg typedef uint64_t guard_lock_t;
815067d178Sjoerg # if defined(__LITTLE_ENDIAN__)
82d0b6b5d5Sjoerg static const guard_t LOCKED = static_cast<guard_t>(1) << 63;
835067d178Sjoerg static const guard_t INITIALISED = 1;
845067d178Sjoerg # else
855067d178Sjoerg static const guard_t LOCKED = 1;
86d0b6b5d5Sjoerg static const guard_t INITIALISED = static_cast<guard_t>(1) << 56;
875067d178Sjoerg # endif
88534cb174Sjoerg #define LOCK_PART(guard) (guard)
89534cb174Sjoerg #define INIT_PART(guard) (guard)
90534cb174Sjoerg #else
91*f24d03ddSjoerg typedef uint32_t guard_lock_t;
92534cb174Sjoerg # if defined(__LITTLE_ENDIAN__)
93534cb174Sjoerg typedef struct {
94534cb174Sjoerg uint32_t init_half;
95534cb174Sjoerg uint32_t lock_half;
96534cb174Sjoerg } guard_t;
97534cb174Sjoerg static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31;
98534cb174Sjoerg static const uint32_t INITIALISED = 1;
99534cb174Sjoerg # else
100534cb174Sjoerg typedef struct {
101534cb174Sjoerg uint32_t init_half;
102534cb174Sjoerg uint32_t lock_half;
103534cb174Sjoerg } guard_t;
104534cb174Sjoerg static_assert(sizeof(guard_t) == sizeof(uint64_t), "");
105534cb174Sjoerg static const uint32_t LOCKED = 1;
106534cb174Sjoerg static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24;
1075067d178Sjoerg # endif
108534cb174Sjoerg #define LOCK_PART(guard) (&(guard)->lock_half)
109534cb174Sjoerg #define INIT_PART(guard) (&(guard)->init_half)
110534cb174Sjoerg #endif
111534cb174Sjoerg static const guard_lock_t INITIAL = 0;
112ccec91a1Sjoerg
113ccec91a1Sjoerg /**
114ccec91a1Sjoerg * Acquires a lock on a guard, returning 0 if the object has already been
115ccec91a1Sjoerg * initialised, and 1 if it has not. If the object is already constructed then
116ccec91a1Sjoerg * this function just needs to read a byte from memory and return.
117ccec91a1Sjoerg */
__cxa_guard_acquire(volatile guard_t * guard_object)1185067d178Sjoerg extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
119ccec91a1Sjoerg {
120534cb174Sjoerg guard_lock_t old;
1215067d178Sjoerg // Not an atomic read, doesn't establish a happens-before relationship, but
1225067d178Sjoerg // if one is already established and we end up seeing an initialised state
1235067d178Sjoerg // then it's a fast path, otherwise we'll do something more expensive than
1245067d178Sjoerg // this test anyway...
125534cb174Sjoerg if (INITIALISED == *INIT_PART(guard_object))
126534cb174Sjoerg return 0;
1275067d178Sjoerg // Spin trying to do the initialisation
128534cb174Sjoerg for (;;)
129ccec91a1Sjoerg {
1305067d178Sjoerg // Loop trying to move the value of the guard from 0 (not
1315067d178Sjoerg // locked, not initialised) to the locked-uninitialised
1325067d178Sjoerg // position.
133534cb174Sjoerg old = __sync_val_compare_and_swap(LOCK_PART(guard_object),
134534cb174Sjoerg INITIAL, LOCKED);
135534cb174Sjoerg if (old == INITIAL) {
136534cb174Sjoerg // Lock obtained. If lock and init bit are
137534cb174Sjoerg // in separate words, check for init race.
138534cb174Sjoerg if (INIT_PART(guard_object) == LOCK_PART(guard_object))
139ccec91a1Sjoerg return 1;
140534cb174Sjoerg if (INITIALISED != *INIT_PART(guard_object))
141534cb174Sjoerg return 1;
142534cb174Sjoerg
143534cb174Sjoerg // No need for a memory barrier here,
144534cb174Sjoerg // see first comment.
145534cb174Sjoerg *LOCK_PART(guard_object) = INITIAL;
1465067d178Sjoerg return 0;
147534cb174Sjoerg }
148534cb174Sjoerg // If lock and init bit are in the same word, check again
149534cb174Sjoerg // if we are done.
150534cb174Sjoerg if (INIT_PART(guard_object) == LOCK_PART(guard_object) &&
151534cb174Sjoerg old == INITIALISED)
152534cb174Sjoerg return 0;
153534cb174Sjoerg
154534cb174Sjoerg assert(old == LOCKED);
155534cb174Sjoerg // Another thread holds the lock.
156534cb174Sjoerg // If lock and init bit are in different words, check
157534cb174Sjoerg // if we are done before yielding and looping.
158534cb174Sjoerg if (INIT_PART(guard_object) != LOCK_PART(guard_object) &&
159534cb174Sjoerg INITIALISED == *INIT_PART(guard_object))
160534cb174Sjoerg return 0;
161ccec91a1Sjoerg sched_yield();
162ccec91a1Sjoerg }
1635067d178Sjoerg }
164ccec91a1Sjoerg
165ccec91a1Sjoerg /**
166ccec91a1Sjoerg * Releases the lock without marking the object as initialised. This function
167ccec91a1Sjoerg * is called if initialising a static causes an exception to be thrown.
168ccec91a1Sjoerg */
__cxa_guard_abort(volatile guard_t * guard_object)1695067d178Sjoerg extern "C" void __cxa_guard_abort(volatile guard_t *guard_object)
170ccec91a1Sjoerg {
1715067d178Sjoerg __attribute__((unused))
172534cb174Sjoerg bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object),
173534cb174Sjoerg LOCKED, INITIAL);
1745067d178Sjoerg assert(reset);
175ccec91a1Sjoerg }
176ccec91a1Sjoerg /**
177ccec91a1Sjoerg * Releases the guard and marks the object as initialised. This function is
178ccec91a1Sjoerg * called after successful initialisation of a static.
179ccec91a1Sjoerg */
__cxa_guard_release(volatile guard_t * guard_object)1805067d178Sjoerg extern "C" void __cxa_guard_release(volatile guard_t *guard_object)
181ccec91a1Sjoerg {
182534cb174Sjoerg guard_lock_t old;
183534cb174Sjoerg if (INIT_PART(guard_object) == LOCK_PART(guard_object))
184534cb174Sjoerg old = LOCKED;
185534cb174Sjoerg else
186534cb174Sjoerg old = INITIAL;
1875067d178Sjoerg __attribute__((unused))
188534cb174Sjoerg bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object),
189534cb174Sjoerg old, INITIALISED);
1905067d178Sjoerg assert(reset);
191534cb174Sjoerg if (INIT_PART(guard_object) != LOCK_PART(guard_object))
192534cb174Sjoerg *LOCK_PART(guard_object) = INITIAL;
193ccec91a1Sjoerg }
194