xref: /openbsd-src/gnu/llvm/libcxx/docs/DesignDocs/AtomicDesign.rst (revision 76d0caaeb19ae0808d90af1d0b3b7b50b3e5383f)
1*76d0caaeSpatrick
2*76d0caaeSpatrick====================
3*76d0caaeSpatrick``<atomic>`` Design
4*76d0caaeSpatrick====================
5*76d0caaeSpatrick
6*76d0caaeSpatrickThere were originally 3 designs under consideration. They differ in where most
7*76d0caaeSpatrickof the implementation work is done. The functionality exposed to the customer
8*76d0caaeSpatrickshould be identical (and conforming) for all three designs.
9*76d0caaeSpatrick
10*76d0caaeSpatrick
11*76d0caaeSpatrickDesign A: Minimal work for the library
12*76d0caaeSpatrick======================================
13*76d0caaeSpatrickThe compiler supplies all of the intrinsics as described below. This list of
14*76d0caaeSpatrickintrinsics roughly parallels the requirements of the C and C++ atomics proposals.
15*76d0caaeSpatrickThe C and C++ library implementations simply drop through to these intrinsics.
16*76d0caaeSpatrickAnything the platform does not support in hardware, the compiler
17*76d0caaeSpatrickarranges for a (compiler-rt) library call to be made which will do the job with
18*76d0caaeSpatricka mutex, and in this case ignoring the memory ordering parameter (effectively
19*76d0caaeSpatrickimplementing ``memory_order_seq_cst``).
20*76d0caaeSpatrick
21*76d0caaeSpatrickUltimate efficiency is preferred over run time error checking. Undefined
22*76d0caaeSpatrickbehavior is acceptable when the inputs do not conform as defined below.
23*76d0caaeSpatrick
24*76d0caaeSpatrick.. code-block:: cpp
25*76d0caaeSpatrick
26*76d0caaeSpatrick    // In every intrinsic signature below, type* atomic_obj may be a pointer to a
27*76d0caaeSpatrick    // volatile-qualified type. Memory ordering values map to the following meanings:
28*76d0caaeSpatrick    //  memory_order_relaxed == 0
29*76d0caaeSpatrick    //  memory_order_consume == 1
30*76d0caaeSpatrick    //  memory_order_acquire == 2
31*76d0caaeSpatrick    //  memory_order_release == 3
32*76d0caaeSpatrick    //  memory_order_acq_rel == 4
33*76d0caaeSpatrick    //  memory_order_seq_cst == 5
34*76d0caaeSpatrick
35*76d0caaeSpatrick    // type must be trivially copyable
36*76d0caaeSpatrick    // type represents a "type argument"
37*76d0caaeSpatrick    bool __atomic_is_lock_free(type);
38*76d0caaeSpatrick
39*76d0caaeSpatrick    // type must be trivially copyable
40*76d0caaeSpatrick    // Behavior is defined for mem_ord = 0, 1, 2, 5
41*76d0caaeSpatrick    type __atomic_load(const type* atomic_obj, int mem_ord);
42*76d0caaeSpatrick
43*76d0caaeSpatrick    // type must be trivially copyable
44*76d0caaeSpatrick    // Behavior is defined for mem_ord = 0, 3, 5
45*76d0caaeSpatrick    void __atomic_store(type* atomic_obj, type desired, int mem_ord);
46*76d0caaeSpatrick
47*76d0caaeSpatrick    // type must be trivially copyable
48*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
49*76d0caaeSpatrick    type __atomic_exchange(type* atomic_obj, type desired, int mem_ord);
50*76d0caaeSpatrick
51*76d0caaeSpatrick    // type must be trivially copyable
52*76d0caaeSpatrick    // Behavior is defined for mem_success = [0 ... 5],
53*76d0caaeSpatrick    //   mem_failure <= mem_success
54*76d0caaeSpatrick    //   mem_failure != 3
55*76d0caaeSpatrick    //   mem_failure != 4
56*76d0caaeSpatrick    bool __atomic_compare_exchange_strong(type* atomic_obj,
57*76d0caaeSpatrick                                        type* expected, type desired,
58*76d0caaeSpatrick                                        int mem_success, int mem_failure);
59*76d0caaeSpatrick
60*76d0caaeSpatrick    // type must be trivially copyable
61*76d0caaeSpatrick    // Behavior is defined for mem_success = [0 ... 5],
62*76d0caaeSpatrick    //   mem_failure <= mem_success
63*76d0caaeSpatrick    //   mem_failure != 3
64*76d0caaeSpatrick    //   mem_failure != 4
65*76d0caaeSpatrick    bool __atomic_compare_exchange_weak(type* atomic_obj,
66*76d0caaeSpatrick                                        type* expected, type desired,
67*76d0caaeSpatrick                                        int mem_success, int mem_failure);
68*76d0caaeSpatrick
69*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
70*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
71*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
72*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
73*76d0caaeSpatrick    type __atomic_fetch_add(type* atomic_obj, type operand, int mem_ord);
74*76d0caaeSpatrick
75*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
76*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
77*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
78*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
79*76d0caaeSpatrick    type __atomic_fetch_sub(type* atomic_obj, type operand, int mem_ord);
80*76d0caaeSpatrick
81*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
82*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
83*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
84*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
85*76d0caaeSpatrick    type __atomic_fetch_and(type* atomic_obj, type operand, int mem_ord);
86*76d0caaeSpatrick
87*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
88*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
89*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
90*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
91*76d0caaeSpatrick    type __atomic_fetch_or(type* atomic_obj, type operand, int mem_ord);
92*76d0caaeSpatrick
93*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
94*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
95*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
96*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
97*76d0caaeSpatrick    type __atomic_fetch_xor(type* atomic_obj, type operand, int mem_ord);
98*76d0caaeSpatrick
99*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
100*76d0caaeSpatrick    void* __atomic_fetch_add(void** atomic_obj, ptrdiff_t operand, int mem_ord);
101*76d0caaeSpatrick    void* __atomic_fetch_sub(void** atomic_obj, ptrdiff_t operand, int mem_ord);
102*76d0caaeSpatrick
103*76d0caaeSpatrick    // Behavior is defined for mem_ord = [0 ... 5]
104*76d0caaeSpatrick    void __atomic_thread_fence(int mem_ord);
105*76d0caaeSpatrick    void __atomic_signal_fence(int mem_ord);
106*76d0caaeSpatrick
107*76d0caaeSpatrickIf desired the intrinsics taking a single ``mem_ord`` parameter can default
108*76d0caaeSpatrickthis argument to 5.
109*76d0caaeSpatrick
110*76d0caaeSpatrickIf desired the intrinsics taking two ordering parameters can default ``mem_success``
111*76d0caaeSpatrickto 5, and ``mem_failure`` to ``translate_memory_order(mem_success)`` where
112*76d0caaeSpatrick``translate_memory_order(mem_success)`` is defined as:
113*76d0caaeSpatrick
114*76d0caaeSpatrick.. code-block:: cpp
115*76d0caaeSpatrick
116*76d0caaeSpatrick    int translate_memory_order(int o) {
117*76d0caaeSpatrick        switch (o) {
118*76d0caaeSpatrick        case 4:
119*76d0caaeSpatrick            return 2;
120*76d0caaeSpatrick        case 3:
121*76d0caaeSpatrick            return 0;
122*76d0caaeSpatrick        }
123*76d0caaeSpatrick        return o;
124*76d0caaeSpatrick    }
125*76d0caaeSpatrick
126*76d0caaeSpatrickBelow are representative C++ implementations of all of the operations. Their
127*76d0caaeSpatrickpurpose is to document the desired semantics of each operation, assuming
128*76d0caaeSpatrick``memory_order_seq_cst``. This is essentially the code that will be called
129*76d0caaeSpatrickif the front end calls out to compiler-rt.
130*76d0caaeSpatrick
131*76d0caaeSpatrick.. code-block:: cpp
132*76d0caaeSpatrick
133*76d0caaeSpatrick    template <class T>
134*76d0caaeSpatrick    T __atomic_load(T const volatile* obj) {
135*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
136*76d0caaeSpatrick        return *obj;
137*76d0caaeSpatrick    }
138*76d0caaeSpatrick
139*76d0caaeSpatrick    template <class T>
140*76d0caaeSpatrick    void __atomic_store(T volatile* obj, T desr) {
141*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
142*76d0caaeSpatrick        *obj = desr;
143*76d0caaeSpatrick    }
144*76d0caaeSpatrick
145*76d0caaeSpatrick    template <class T>
146*76d0caaeSpatrick    T __atomic_exchange(T volatile* obj, T desr) {
147*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
148*76d0caaeSpatrick        T r = *obj;
149*76d0caaeSpatrick        *obj = desr;
150*76d0caaeSpatrick        return r;
151*76d0caaeSpatrick    }
152*76d0caaeSpatrick
153*76d0caaeSpatrick    template <class T>
154*76d0caaeSpatrick    bool __atomic_compare_exchange_strong(T volatile* obj, T* exp, T desr) {
155*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
156*76d0caaeSpatrick        if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) // if (*obj == *exp)
157*76d0caaeSpatrick        {
158*76d0caaeSpatrick            std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); // *obj = desr;
159*76d0caaeSpatrick            return true;
160*76d0caaeSpatrick        }
161*76d0caaeSpatrick        std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); // *exp = *obj;
162*76d0caaeSpatrick        return false;
163*76d0caaeSpatrick    }
164*76d0caaeSpatrick
165*76d0caaeSpatrick    // May spuriously return false (even if *obj == *exp)
166*76d0caaeSpatrick    template <class T>
167*76d0caaeSpatrick    bool __atomic_compare_exchange_weak(T volatile* obj, T* exp, T desr) {
168*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
169*76d0caaeSpatrick        if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) // if (*obj == *exp)
170*76d0caaeSpatrick        {
171*76d0caaeSpatrick            std::memcpy(const_cast<T*>(obj), &desr, sizeof(T)); // *obj = desr;
172*76d0caaeSpatrick            return true;
173*76d0caaeSpatrick        }
174*76d0caaeSpatrick        std::memcpy(exp, const_cast<T*>(obj), sizeof(T)); // *exp = *obj;
175*76d0caaeSpatrick        return false;
176*76d0caaeSpatrick    }
177*76d0caaeSpatrick
178*76d0caaeSpatrick    template <class T>
179*76d0caaeSpatrick    T __atomic_fetch_add(T volatile* obj, T operand) {
180*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
181*76d0caaeSpatrick        T r = *obj;
182*76d0caaeSpatrick        *obj += operand;
183*76d0caaeSpatrick        return r;
184*76d0caaeSpatrick    }
185*76d0caaeSpatrick
186*76d0caaeSpatrick    template <class T>
187*76d0caaeSpatrick    T __atomic_fetch_sub(T volatile* obj, T operand) {
188*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
189*76d0caaeSpatrick        T r = *obj;
190*76d0caaeSpatrick        *obj -= operand;
191*76d0caaeSpatrick        return r;
192*76d0caaeSpatrick    }
193*76d0caaeSpatrick
194*76d0caaeSpatrick    template <class T>
195*76d0caaeSpatrick    T __atomic_fetch_and(T volatile* obj, T operand) {
196*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
197*76d0caaeSpatrick        T r = *obj;
198*76d0caaeSpatrick        *obj &= operand;
199*76d0caaeSpatrick        return r;
200*76d0caaeSpatrick    }
201*76d0caaeSpatrick
202*76d0caaeSpatrick    template <class T>
203*76d0caaeSpatrick    T __atomic_fetch_or(T volatile* obj, T operand) {
204*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
205*76d0caaeSpatrick        T r = *obj;
206*76d0caaeSpatrick        *obj |= operand;
207*76d0caaeSpatrick        return r;
208*76d0caaeSpatrick    }
209*76d0caaeSpatrick
210*76d0caaeSpatrick    template <class T>
211*76d0caaeSpatrick    T __atomic_fetch_xor(T volatile* obj, T operand) {
212*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
213*76d0caaeSpatrick        T r = *obj;
214*76d0caaeSpatrick        *obj ^= operand;
215*76d0caaeSpatrick        return r;
216*76d0caaeSpatrick    }
217*76d0caaeSpatrick
218*76d0caaeSpatrick    void* __atomic_fetch_add(void* volatile* obj, ptrdiff_t operand) {
219*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
220*76d0caaeSpatrick        void* r = *obj;
221*76d0caaeSpatrick        (char*&)(*obj) += operand;
222*76d0caaeSpatrick        return r;
223*76d0caaeSpatrick    }
224*76d0caaeSpatrick
225*76d0caaeSpatrick    void* __atomic_fetch_sub(void* volatile* obj, ptrdiff_t operand) {
226*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
227*76d0caaeSpatrick        void* r = *obj;
228*76d0caaeSpatrick        (char*&)(*obj) -= operand;
229*76d0caaeSpatrick        return r;
230*76d0caaeSpatrick    }
231*76d0caaeSpatrick
232*76d0caaeSpatrick    void __atomic_thread_fence() {
233*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
234*76d0caaeSpatrick    }
235*76d0caaeSpatrick
236*76d0caaeSpatrick    void __atomic_signal_fence() {
237*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
238*76d0caaeSpatrick    }
239*76d0caaeSpatrick
240*76d0caaeSpatrick
241*76d0caaeSpatrickDesign B: Something in between
242*76d0caaeSpatrick==============================
243*76d0caaeSpatrickThis is a variation of design A which puts the burden on the library to arrange
244*76d0caaeSpatrickfor the correct manipulation of the run time memory ordering arguments, and only
245*76d0caaeSpatrickcalls the compiler for well-defined memory orderings. I think of this design as
246*76d0caaeSpatrickthe worst of A and C, instead of the best of A and C. But I offer it as an
247*76d0caaeSpatrickoption in the spirit of completeness.
248*76d0caaeSpatrick
249*76d0caaeSpatrick.. code-block:: cpp
250*76d0caaeSpatrick
251*76d0caaeSpatrick    // type must be trivially copyable
252*76d0caaeSpatrick    bool __atomic_is_lock_free(const type* atomic_obj);
253*76d0caaeSpatrick
254*76d0caaeSpatrick    // type must be trivially copyable
255*76d0caaeSpatrick    type __atomic_load_relaxed(const volatile type* atomic_obj);
256*76d0caaeSpatrick    type __atomic_load_consume(const volatile type* atomic_obj);
257*76d0caaeSpatrick    type __atomic_load_acquire(const volatile type* atomic_obj);
258*76d0caaeSpatrick    type __atomic_load_seq_cst(const volatile type* atomic_obj);
259*76d0caaeSpatrick
260*76d0caaeSpatrick    // type must be trivially copyable
261*76d0caaeSpatrick    type __atomic_store_relaxed(volatile type* atomic_obj, type desired);
262*76d0caaeSpatrick    type __atomic_store_release(volatile type* atomic_obj, type desired);
263*76d0caaeSpatrick    type __atomic_store_seq_cst(volatile type* atomic_obj, type desired);
264*76d0caaeSpatrick
265*76d0caaeSpatrick    // type must be trivially copyable
266*76d0caaeSpatrick    type __atomic_exchange_relaxed(volatile type* atomic_obj, type desired);
267*76d0caaeSpatrick    type __atomic_exchange_consume(volatile type* atomic_obj, type desired);
268*76d0caaeSpatrick    type __atomic_exchange_acquire(volatile type* atomic_obj, type desired);
269*76d0caaeSpatrick    type __atomic_exchange_release(volatile type* atomic_obj, type desired);
270*76d0caaeSpatrick    type __atomic_exchange_acq_rel(volatile type* atomic_obj, type desired);
271*76d0caaeSpatrick    type __atomic_exchange_seq_cst(volatile type* atomic_obj, type desired);
272*76d0caaeSpatrick
273*76d0caaeSpatrick    // type must be trivially copyable
274*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_relaxed_relaxed(volatile type* atomic_obj,
275*76d0caaeSpatrick                                                        type* expected,
276*76d0caaeSpatrick                                                        type desired);
277*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_consume_relaxed(volatile type* atomic_obj,
278*76d0caaeSpatrick                                                        type* expected,
279*76d0caaeSpatrick                                                        type desired);
280*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_consume_consume(volatile type* atomic_obj,
281*76d0caaeSpatrick                                                        type* expected,
282*76d0caaeSpatrick                                                        type desired);
283*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_acquire_relaxed(volatile type* atomic_obj,
284*76d0caaeSpatrick                                                        type* expected,
285*76d0caaeSpatrick                                                        type desired);
286*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_acquire_consume(volatile type* atomic_obj,
287*76d0caaeSpatrick                                                        type* expected,
288*76d0caaeSpatrick                                                        type desired);
289*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_acquire_acquire(volatile type* atomic_obj,
290*76d0caaeSpatrick                                                        type* expected,
291*76d0caaeSpatrick                                                        type desired);
292*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_release_relaxed(volatile type* atomic_obj,
293*76d0caaeSpatrick                                                        type* expected,
294*76d0caaeSpatrick                                                        type desired);
295*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_release_consume(volatile type* atomic_obj,
296*76d0caaeSpatrick                                                        type* expected,
297*76d0caaeSpatrick                                                        type desired);
298*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_release_acquire(volatile type* atomic_obj,
299*76d0caaeSpatrick                                                        type* expected,
300*76d0caaeSpatrick                                                        type desired);
301*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_acq_rel_relaxed(volatile type* atomic_obj,
302*76d0caaeSpatrick                                                        type* expected,
303*76d0caaeSpatrick                                                        type desired);
304*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_acq_rel_consume(volatile type* atomic_obj,
305*76d0caaeSpatrick                                                        type* expected,
306*76d0caaeSpatrick                                                        type desired);
307*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_acq_rel_acquire(volatile type* atomic_obj,
308*76d0caaeSpatrick                                                        type* expected,
309*76d0caaeSpatrick                                                        type desired);
310*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_seq_cst_relaxed(volatile type* atomic_obj,
311*76d0caaeSpatrick                                                        type* expected,
312*76d0caaeSpatrick                                                        type desired);
313*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_seq_cst_consume(volatile type* atomic_obj,
314*76d0caaeSpatrick                                                        type* expected,
315*76d0caaeSpatrick                                                        type desired);
316*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_seq_cst_acquire(volatile type* atomic_obj,
317*76d0caaeSpatrick                                                        type* expected,
318*76d0caaeSpatrick                                                        type desired);
319*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_seq_cst_seq_cst(volatile type* atomic_obj,
320*76d0caaeSpatrick                                                        type* expected,
321*76d0caaeSpatrick                                                        type desired);
322*76d0caaeSpatrick
323*76d0caaeSpatrick    // type must be trivially copyable
324*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_relaxed_relaxed(volatile type* atomic_obj,
325*76d0caaeSpatrick                                                        type* expected,
326*76d0caaeSpatrick                                                        type desired);
327*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_consume_relaxed(volatile type* atomic_obj,
328*76d0caaeSpatrick                                                        type* expected,
329*76d0caaeSpatrick                                                        type desired);
330*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_consume_consume(volatile type* atomic_obj,
331*76d0caaeSpatrick                                                        type* expected,
332*76d0caaeSpatrick                                                        type desired);
333*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_acquire_relaxed(volatile type* atomic_obj,
334*76d0caaeSpatrick                                                        type* expected,
335*76d0caaeSpatrick                                                        type desired);
336*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_acquire_consume(volatile type* atomic_obj,
337*76d0caaeSpatrick                                                        type* expected,
338*76d0caaeSpatrick                                                        type desired);
339*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_acquire_acquire(volatile type* atomic_obj,
340*76d0caaeSpatrick                                                        type* expected,
341*76d0caaeSpatrick                                                        type desired);
342*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_release_relaxed(volatile type* atomic_obj,
343*76d0caaeSpatrick                                                        type* expected,
344*76d0caaeSpatrick                                                        type desired);
345*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_release_consume(volatile type* atomic_obj,
346*76d0caaeSpatrick                                                        type* expected,
347*76d0caaeSpatrick                                                        type desired);
348*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_release_acquire(volatile type* atomic_obj,
349*76d0caaeSpatrick                                                        type* expected,
350*76d0caaeSpatrick                                                        type desired);
351*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_acq_rel_relaxed(volatile type* atomic_obj,
352*76d0caaeSpatrick                                                        type* expected,
353*76d0caaeSpatrick                                                        type desired);
354*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_acq_rel_consume(volatile type* atomic_obj,
355*76d0caaeSpatrick                                                        type* expected,
356*76d0caaeSpatrick                                                        type desired);
357*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_acq_rel_acquire(volatile type* atomic_obj,
358*76d0caaeSpatrick                                                        type* expected,
359*76d0caaeSpatrick                                                        type desired);
360*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_seq_cst_relaxed(volatile type* atomic_obj,
361*76d0caaeSpatrick                                                        type* expected,
362*76d0caaeSpatrick                                                        type desired);
363*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_seq_cst_consume(volatile type* atomic_obj,
364*76d0caaeSpatrick                                                        type* expected,
365*76d0caaeSpatrick                                                        type desired);
366*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_seq_cst_acquire(volatile type* atomic_obj,
367*76d0caaeSpatrick                                                        type* expected,
368*76d0caaeSpatrick                                                        type desired);
369*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_seq_cst_seq_cst(volatile type* atomic_obj,
370*76d0caaeSpatrick                                                        type* expected,
371*76d0caaeSpatrick                                                        type desired);
372*76d0caaeSpatrick
373*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
374*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
375*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
376*76d0caaeSpatrick    type __atomic_fetch_add_relaxed(volatile type* atomic_obj, type operand);
377*76d0caaeSpatrick    type __atomic_fetch_add_consume(volatile type* atomic_obj, type operand);
378*76d0caaeSpatrick    type __atomic_fetch_add_acquire(volatile type* atomic_obj, type operand);
379*76d0caaeSpatrick    type __atomic_fetch_add_release(volatile type* atomic_obj, type operand);
380*76d0caaeSpatrick    type __atomic_fetch_add_acq_rel(volatile type* atomic_obj, type operand);
381*76d0caaeSpatrick    type __atomic_fetch_add_seq_cst(volatile type* atomic_obj, type operand);
382*76d0caaeSpatrick
383*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
384*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
385*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
386*76d0caaeSpatrick    type __atomic_fetch_sub_relaxed(volatile type* atomic_obj, type operand);
387*76d0caaeSpatrick    type __atomic_fetch_sub_consume(volatile type* atomic_obj, type operand);
388*76d0caaeSpatrick    type __atomic_fetch_sub_acquire(volatile type* atomic_obj, type operand);
389*76d0caaeSpatrick    type __atomic_fetch_sub_release(volatile type* atomic_obj, type operand);
390*76d0caaeSpatrick    type __atomic_fetch_sub_acq_rel(volatile type* atomic_obj, type operand);
391*76d0caaeSpatrick    type __atomic_fetch_sub_seq_cst(volatile type* atomic_obj, type operand);
392*76d0caaeSpatrick
393*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
394*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
395*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
396*76d0caaeSpatrick    type __atomic_fetch_and_relaxed(volatile type* atomic_obj, type operand);
397*76d0caaeSpatrick    type __atomic_fetch_and_consume(volatile type* atomic_obj, type operand);
398*76d0caaeSpatrick    type __atomic_fetch_and_acquire(volatile type* atomic_obj, type operand);
399*76d0caaeSpatrick    type __atomic_fetch_and_release(volatile type* atomic_obj, type operand);
400*76d0caaeSpatrick    type __atomic_fetch_and_acq_rel(volatile type* atomic_obj, type operand);
401*76d0caaeSpatrick    type __atomic_fetch_and_seq_cst(volatile type* atomic_obj, type operand);
402*76d0caaeSpatrick
403*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
404*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
405*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
406*76d0caaeSpatrick    type __atomic_fetch_or_relaxed(volatile type* atomic_obj, type operand);
407*76d0caaeSpatrick    type __atomic_fetch_or_consume(volatile type* atomic_obj, type operand);
408*76d0caaeSpatrick    type __atomic_fetch_or_acquire(volatile type* atomic_obj, type operand);
409*76d0caaeSpatrick    type __atomic_fetch_or_release(volatile type* atomic_obj, type operand);
410*76d0caaeSpatrick    type __atomic_fetch_or_acq_rel(volatile type* atomic_obj, type operand);
411*76d0caaeSpatrick    type __atomic_fetch_or_seq_cst(volatile type* atomic_obj, type operand);
412*76d0caaeSpatrick
413*76d0caaeSpatrick    // type is one of: char, signed char, unsigned char, short, unsigned short, int,
414*76d0caaeSpatrick    //      unsigned int, long, unsigned long, long long, unsigned long long,
415*76d0caaeSpatrick    //      char16_t, char32_t, wchar_t
416*76d0caaeSpatrick    type __atomic_fetch_xor_relaxed(volatile type* atomic_obj, type operand);
417*76d0caaeSpatrick    type __atomic_fetch_xor_consume(volatile type* atomic_obj, type operand);
418*76d0caaeSpatrick    type __atomic_fetch_xor_acquire(volatile type* atomic_obj, type operand);
419*76d0caaeSpatrick    type __atomic_fetch_xor_release(volatile type* atomic_obj, type operand);
420*76d0caaeSpatrick    type __atomic_fetch_xor_acq_rel(volatile type* atomic_obj, type operand);
421*76d0caaeSpatrick    type __atomic_fetch_xor_seq_cst(volatile type* atomic_obj, type operand);
422*76d0caaeSpatrick
423*76d0caaeSpatrick    void* __atomic_fetch_add_relaxed(void* volatile* atomic_obj, ptrdiff_t operand);
424*76d0caaeSpatrick    void* __atomic_fetch_add_consume(void* volatile* atomic_obj, ptrdiff_t operand);
425*76d0caaeSpatrick    void* __atomic_fetch_add_acquire(void* volatile* atomic_obj, ptrdiff_t operand);
426*76d0caaeSpatrick    void* __atomic_fetch_add_release(void* volatile* atomic_obj, ptrdiff_t operand);
427*76d0caaeSpatrick    void* __atomic_fetch_add_acq_rel(void* volatile* atomic_obj, ptrdiff_t operand);
428*76d0caaeSpatrick    void* __atomic_fetch_add_seq_cst(void* volatile* atomic_obj, ptrdiff_t operand);
429*76d0caaeSpatrick
430*76d0caaeSpatrick    void* __atomic_fetch_sub_relaxed(void* volatile* atomic_obj, ptrdiff_t operand);
431*76d0caaeSpatrick    void* __atomic_fetch_sub_consume(void* volatile* atomic_obj, ptrdiff_t operand);
432*76d0caaeSpatrick    void* __atomic_fetch_sub_acquire(void* volatile* atomic_obj, ptrdiff_t operand);
433*76d0caaeSpatrick    void* __atomic_fetch_sub_release(void* volatile* atomic_obj, ptrdiff_t operand);
434*76d0caaeSpatrick    void* __atomic_fetch_sub_acq_rel(void* volatile* atomic_obj, ptrdiff_t operand);
435*76d0caaeSpatrick    void* __atomic_fetch_sub_seq_cst(void* volatile* atomic_obj, ptrdiff_t operand);
436*76d0caaeSpatrick
437*76d0caaeSpatrick    void __atomic_thread_fence_relaxed();
438*76d0caaeSpatrick    void __atomic_thread_fence_consume();
439*76d0caaeSpatrick    void __atomic_thread_fence_acquire();
440*76d0caaeSpatrick    void __atomic_thread_fence_release();
441*76d0caaeSpatrick    void __atomic_thread_fence_acq_rel();
442*76d0caaeSpatrick    void __atomic_thread_fence_seq_cst();
443*76d0caaeSpatrick
444*76d0caaeSpatrick    void __atomic_signal_fence_relaxed();
445*76d0caaeSpatrick    void __atomic_signal_fence_consume();
446*76d0caaeSpatrick    void __atomic_signal_fence_acquire();
447*76d0caaeSpatrick    void __atomic_signal_fence_release();
448*76d0caaeSpatrick    void __atomic_signal_fence_acq_rel();
449*76d0caaeSpatrick    void __atomic_signal_fence_seq_cst();
450*76d0caaeSpatrick
451*76d0caaeSpatrickDesign C: Minimal work for the front end
452*76d0caaeSpatrick========================================
453*76d0caaeSpatrickThe ``<atomic>`` header is one of the most closely coupled headers to the compiler.
454*76d0caaeSpatrickIdeally when you invoke any function from ``<atomic>``, it should result in highly
455*76d0caaeSpatrickoptimized assembly being inserted directly into your application -- assembly that
456*76d0caaeSpatrickis not otherwise representable by higher level C or C++ expressions. The design of
457*76d0caaeSpatrickthe libc++ ``<atomic>`` header started with this goal in mind. A secondary, but
458*76d0caaeSpatrickstill very important goal is that the compiler should have to do minimal work to
459*76d0caaeSpatrickfacilitate the implementation of ``<atomic>``.  Without this second goal, then
460*76d0caaeSpatrickpractically speaking, the libc++ ``<atomic>`` header would be doomed to be a
461*76d0caaeSpatrickbarely supported, second class citizen on almost every platform.
462*76d0caaeSpatrick
463*76d0caaeSpatrickGoals:
464*76d0caaeSpatrick
465*76d0caaeSpatrick- Optimal code generation for atomic operations
466*76d0caaeSpatrick- Minimal effort for the compiler to achieve goal 1 on any given platform
467*76d0caaeSpatrick- Conformance to the C++0X draft standard
468*76d0caaeSpatrick
469*76d0caaeSpatrickThe purpose of this document is to inform compiler writers what they need to do
470*76d0caaeSpatrickto enable a high performance libc++ ``<atomic>`` with minimal effort.
471*76d0caaeSpatrick
472*76d0caaeSpatrickThe minimal work that must be done for a conforming ``<atomic>``
473*76d0caaeSpatrick----------------------------------------------------------------
474*76d0caaeSpatrickThe only "atomic" operations that must actually be lock free in
475*76d0caaeSpatrick``<atomic>`` are represented by the following compiler intrinsics:
476*76d0caaeSpatrick
477*76d0caaeSpatrick.. code-block:: cpp
478*76d0caaeSpatrick
479*76d0caaeSpatrick    __atomic_flag__ __atomic_exchange_seq_cst(__atomic_flag__ volatile* obj, __atomic_flag__ desr) {
480*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
481*76d0caaeSpatrick        __atomic_flag__ result = *obj;
482*76d0caaeSpatrick        *obj = desr;
483*76d0caaeSpatrick        return result;
484*76d0caaeSpatrick    }
485*76d0caaeSpatrick
486*76d0caaeSpatrick    void __atomic_store_seq_cst(__atomic_flag__ volatile* obj, __atomic_flag__ desr) {
487*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
488*76d0caaeSpatrick        *obj = desr;
489*76d0caaeSpatrick    }
490*76d0caaeSpatrick
491*76d0caaeSpatrickWhere:
492*76d0caaeSpatrick
493*76d0caaeSpatrick- If ``__has_feature(__atomic_flag)`` evaluates to 1 in the preprocessor then
494*76d0caaeSpatrick  the compiler must define ``__atomic_flag__`` (e.g. as a typedef to ``int``).
495*76d0caaeSpatrick- If ``__has_feature(__atomic_flag)`` evaluates to 0 in the preprocessor then
496*76d0caaeSpatrick  the library defines ``__atomic_flag__`` as a typedef to ``bool``.
497*76d0caaeSpatrick- To communicate that the above intrinsics are available, the compiler must
498*76d0caaeSpatrick  arrange for ``__has_feature`` to return 1 when fed the intrinsic name
499*76d0caaeSpatrick  appended with an '_' and the mangled type name of ``__atomic_flag__``.
500*76d0caaeSpatrick
501*76d0caaeSpatrickFor example if ``__atomic_flag__`` is ``unsigned int``:
502*76d0caaeSpatrick
503*76d0caaeSpatrick.. code-block:: cpp
504*76d0caaeSpatrick
505*76d0caaeSpatrick    // __has_feature(__atomic_flag) == 1
506*76d0caaeSpatrick    // __has_feature(__atomic_exchange_seq_cst_j) == 1
507*76d0caaeSpatrick    // __has_feature(__atomic_store_seq_cst_j) == 1
508*76d0caaeSpatrick
509*76d0caaeSpatrick    typedef unsigned int __atomic_flag__;
510*76d0caaeSpatrick
511*76d0caaeSpatrick    unsigned int __atomic_exchange_seq_cst(unsigned int volatile*, unsigned int) {
512*76d0caaeSpatrick        // ...
513*76d0caaeSpatrick    }
514*76d0caaeSpatrick
515*76d0caaeSpatrick    void __atomic_store_seq_cst(unsigned int volatile*, unsigned int) {
516*76d0caaeSpatrick        // ...
517*76d0caaeSpatrick    }
518*76d0caaeSpatrick
519*76d0caaeSpatrickThat's it! Compiler writers do the above and you've got a fully conforming
520*76d0caaeSpatrick(though sub-par performance) ``<atomic>`` header!
521*76d0caaeSpatrick
522*76d0caaeSpatrick
523*76d0caaeSpatrickRecommended work for a higher performance ``<atomic>``
524*76d0caaeSpatrick------------------------------------------------------
525*76d0caaeSpatrickIt would be good if the above intrinsics worked with all integral types plus
526*76d0caaeSpatrick``void*``. Because this may not be possible to do in a lock-free manner for
527*76d0caaeSpatrickall integral types on all platforms, a compiler must communicate each type that
528*76d0caaeSpatrickan intrinsic works with. For example, if ``__atomic_exchange_seq_cst`` works
529*76d0caaeSpatrickfor all types except for ``long long`` and ``unsigned long long`` then:
530*76d0caaeSpatrick
531*76d0caaeSpatrick.. code-block:: cpp
532*76d0caaeSpatrick
533*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_b) == 1  // bool
534*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_c) == 1  // char
535*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_a) == 1  // signed char
536*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_h) == 1  // unsigned char
537*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_Ds) == 1 // char16_t
538*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_Di) == 1 // char32_t
539*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_w) == 1  // wchar_t
540*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_s) == 1  // short
541*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_t) == 1  // unsigned short
542*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_i) == 1  // int
543*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_j) == 1  // unsigned int
544*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_l) == 1  // long
545*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_m) == 1  // unsigned long
546*76d0caaeSpatrick    __has_feature(__atomic_exchange_seq_cst_Pv) == 1 // void*
547*76d0caaeSpatrick
548*76d0caaeSpatrickNote that only the ``__has_feature`` flag is decorated with the argument
549*76d0caaeSpatricktype. The name of the compiler intrinsic is not decorated, but instead works
550*76d0caaeSpatricklike a C++ overloaded function.
551*76d0caaeSpatrick
552*76d0caaeSpatrickAdditionally, there are other intrinsics besides ``__atomic_exchange_seq_cst``
553*76d0caaeSpatrickand ``__atomic_store_seq_cst``. They are optional. But if the compiler can
554*76d0caaeSpatrickgenerate faster code than provided by the library, then clients will benefit
555*76d0caaeSpatrickfrom the compiler writer's expertise and knowledge of the targeted platform.
556*76d0caaeSpatrick
557*76d0caaeSpatrickBelow is the complete list of *sequentially consistent* intrinsics, and
558*76d0caaeSpatricktheir library implementations. Template syntax is used to indicate the desired
559*76d0caaeSpatrickoverloading for integral and ``void*`` types. The template does not represent a
560*76d0caaeSpatrickrequirement that the intrinsic operate on **any** type!
561*76d0caaeSpatrick
562*76d0caaeSpatrick.. code-block:: cpp
563*76d0caaeSpatrick
564*76d0caaeSpatrick    // T is one of:
565*76d0caaeSpatrick    // bool, char, signed char, unsigned char, short, unsigned short,
566*76d0caaeSpatrick    // int, unsigned int, long, unsigned long,
567*76d0caaeSpatrick    // long long, unsigned long long, char16_t, char32_t, wchar_t, void*
568*76d0caaeSpatrick
569*76d0caaeSpatrick    template <class T>
570*76d0caaeSpatrick    T __atomic_load_seq_cst(T const volatile* obj) {
571*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
572*76d0caaeSpatrick        return *obj;
573*76d0caaeSpatrick    }
574*76d0caaeSpatrick
575*76d0caaeSpatrick    template <class T>
576*76d0caaeSpatrick    void __atomic_store_seq_cst(T volatile* obj, T desr) {
577*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
578*76d0caaeSpatrick        *obj = desr;
579*76d0caaeSpatrick    }
580*76d0caaeSpatrick
581*76d0caaeSpatrick    template <class T>
582*76d0caaeSpatrick    T __atomic_exchange_seq_cst(T volatile* obj, T desr) {
583*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
584*76d0caaeSpatrick        T r = *obj;
585*76d0caaeSpatrick        *obj = desr;
586*76d0caaeSpatrick        return r;
587*76d0caaeSpatrick    }
588*76d0caaeSpatrick
589*76d0caaeSpatrick    template <class T>
590*76d0caaeSpatrick    bool __atomic_compare_exchange_strong_seq_cst_seq_cst(T volatile* obj, T* exp, T desr) {
591*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
592*76d0caaeSpatrick        if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0) {
593*76d0caaeSpatrick            std::memcpy(const_cast<T*>(obj), &desr, sizeof(T));
594*76d0caaeSpatrick            return true;
595*76d0caaeSpatrick        }
596*76d0caaeSpatrick        std::memcpy(exp, const_cast<T*>(obj), sizeof(T));
597*76d0caaeSpatrick        return false;
598*76d0caaeSpatrick    }
599*76d0caaeSpatrick
600*76d0caaeSpatrick    template <class T>
601*76d0caaeSpatrick    bool __atomic_compare_exchange_weak_seq_cst_seq_cst(T volatile* obj, T* exp, T desr) {
602*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
603*76d0caaeSpatrick        if (std::memcmp(const_cast<T*>(obj), exp, sizeof(T)) == 0)
604*76d0caaeSpatrick        {
605*76d0caaeSpatrick            std::memcpy(const_cast<T*>(obj), &desr, sizeof(T));
606*76d0caaeSpatrick            return true;
607*76d0caaeSpatrick        }
608*76d0caaeSpatrick        std::memcpy(exp, const_cast<T*>(obj), sizeof(T));
609*76d0caaeSpatrick        return false;
610*76d0caaeSpatrick    }
611*76d0caaeSpatrick
612*76d0caaeSpatrick    // T is one of:
613*76d0caaeSpatrick    // char, signed char, unsigned char, short, unsigned short,
614*76d0caaeSpatrick    // int, unsigned int, long, unsigned long,
615*76d0caaeSpatrick    // long long, unsigned long long, char16_t, char32_t, wchar_t
616*76d0caaeSpatrick
617*76d0caaeSpatrick    template <class T>
618*76d0caaeSpatrick    T __atomic_fetch_add_seq_cst(T volatile* obj, T operand) {
619*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
620*76d0caaeSpatrick        T r = *obj;
621*76d0caaeSpatrick        *obj += operand;
622*76d0caaeSpatrick        return r;
623*76d0caaeSpatrick    }
624*76d0caaeSpatrick
625*76d0caaeSpatrick    template <class T>
626*76d0caaeSpatrick    T __atomic_fetch_sub_seq_cst(T volatile* obj, T operand) {
627*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
628*76d0caaeSpatrick        T r = *obj;
629*76d0caaeSpatrick        *obj -= operand;
630*76d0caaeSpatrick        return r;
631*76d0caaeSpatrick    }
632*76d0caaeSpatrick
633*76d0caaeSpatrick    template <class T>
634*76d0caaeSpatrick    T __atomic_fetch_and_seq_cst(T volatile* obj, T operand) {
635*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
636*76d0caaeSpatrick        T r = *obj;
637*76d0caaeSpatrick        *obj &= operand;
638*76d0caaeSpatrick        return r;
639*76d0caaeSpatrick    }
640*76d0caaeSpatrick
641*76d0caaeSpatrick    template <class T>
642*76d0caaeSpatrick    T __atomic_fetch_or_seq_cst(T volatile* obj, T operand) {
643*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
644*76d0caaeSpatrick        T r = *obj;
645*76d0caaeSpatrick        *obj |= operand;
646*76d0caaeSpatrick        return r;
647*76d0caaeSpatrick    }
648*76d0caaeSpatrick
649*76d0caaeSpatrick    template <class T>
650*76d0caaeSpatrick    T __atomic_fetch_xor_seq_cst(T volatile* obj, T operand) {
651*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
652*76d0caaeSpatrick        T r = *obj;
653*76d0caaeSpatrick        *obj ^= operand;
654*76d0caaeSpatrick        return r;
655*76d0caaeSpatrick    }
656*76d0caaeSpatrick
657*76d0caaeSpatrick    void* __atomic_fetch_add_seq_cst(void* volatile* obj, ptrdiff_t operand) {
658*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
659*76d0caaeSpatrick        void* r = *obj;
660*76d0caaeSpatrick        (char*&)(*obj) += operand;
661*76d0caaeSpatrick        return r;
662*76d0caaeSpatrick    }
663*76d0caaeSpatrick
664*76d0caaeSpatrick    void* __atomic_fetch_sub_seq_cst(void* volatile* obj, ptrdiff_t operand) {
665*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
666*76d0caaeSpatrick        void* r = *obj;
667*76d0caaeSpatrick        (char*&)(*obj) -= operand;
668*76d0caaeSpatrick        return r;
669*76d0caaeSpatrick    }
670*76d0caaeSpatrick
671*76d0caaeSpatrick    void __atomic_thread_fence_seq_cst() {
672*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
673*76d0caaeSpatrick    }
674*76d0caaeSpatrick
675*76d0caaeSpatrick    void __atomic_signal_fence_seq_cst() {
676*76d0caaeSpatrick        unique_lock<mutex> _(some_mutex);
677*76d0caaeSpatrick    }
678*76d0caaeSpatrick
679*76d0caaeSpatrickOne should consult the (currently draft) `C++ Standard <https://wg21.link/n3126>`_
680*76d0caaeSpatrickfor the details of the definitions for these operations. For example,
681*76d0caaeSpatrick``__atomic_compare_exchange_weak_seq_cst_seq_cst`` is allowed to fail
682*76d0caaeSpatrickspuriously while ``__atomic_compare_exchange_strong_seq_cst_seq_cst`` is not.
683*76d0caaeSpatrick
684*76d0caaeSpatrickIf on your platform the lock-free definition of ``__atomic_compare_exchange_weak_seq_cst_seq_cst``
685*76d0caaeSpatrickwould be the same as ``__atomic_compare_exchange_strong_seq_cst_seq_cst``, you may omit the
686*76d0caaeSpatrick``__atomic_compare_exchange_weak_seq_cst_seq_cst`` intrinsic without a performance cost. The
687*76d0caaeSpatricklibrary will prefer your implementation of ``__atomic_compare_exchange_strong_seq_cst_seq_cst``
688*76d0caaeSpatrickover its own definition for implementing ``__atomic_compare_exchange_weak_seq_cst_seq_cst``.
689*76d0caaeSpatrickThat is, the library will arrange for ``__atomic_compare_exchange_weak_seq_cst_seq_cst`` to call
690*76d0caaeSpatrick``__atomic_compare_exchange_strong_seq_cst_seq_cst`` if you supply an intrinsic for the strong
691*76d0caaeSpatrickversion but not the weak.
692*76d0caaeSpatrick
693*76d0caaeSpatrickTaking advantage of weaker memory synchronization
694*76d0caaeSpatrick-------------------------------------------------
695*76d0caaeSpatrickSo far, all of the intrinsics presented require a **sequentially consistent** memory ordering.
696*76d0caaeSpatrickThat is, no loads or stores can move across the operation (just as if the library had locked
697*76d0caaeSpatrickthat internal mutex). But ``<atomic>`` supports weaker memory ordering operations. In all,
698*76d0caaeSpatrickthere are six memory orderings (listed here from strongest to weakest):
699*76d0caaeSpatrick
700*76d0caaeSpatrick.. code-block:: cpp
701*76d0caaeSpatrick
702*76d0caaeSpatrick    memory_order_seq_cst
703*76d0caaeSpatrick    memory_order_acq_rel
704*76d0caaeSpatrick    memory_order_release
705*76d0caaeSpatrick    memory_order_acquire
706*76d0caaeSpatrick    memory_order_consume
707*76d0caaeSpatrick    memory_order_relaxed
708*76d0caaeSpatrick
709*76d0caaeSpatrick(See the `C++ Standard <https://wg21.link/n3126>`_ for the detailed definitions of each of these orderings).
710*76d0caaeSpatrick
711*76d0caaeSpatrickOn some platforms, the compiler vendor can offer some or even all of the above
712*76d0caaeSpatrickintrinsics at one or more weaker levels of memory synchronization. This might
713*76d0caaeSpatricklead for example to not issuing an ``mfence`` instruction on the x86.
714*76d0caaeSpatrick
715*76d0caaeSpatrickIf the compiler does not offer any given operation, at any given memory ordering
716*76d0caaeSpatricklevel, the library will automatically attempt to call the next highest memory
717*76d0caaeSpatrickordering operation. This continues up to ``seq_cst``, and if that doesn't
718*76d0caaeSpatrickexist, then the library takes over and does the job with a ``mutex``. This
719*76d0caaeSpatrickis a compile-time search and selection operation. At run time, the application
720*76d0caaeSpatrickwill only see the few inlined assembly instructions for the selected intrinsic.
721*76d0caaeSpatrick
722*76d0caaeSpatrickEach intrinsic is appended with the 7-letter name of the memory ordering it
723*76d0caaeSpatrickaddresses. For example a ``load`` with ``relaxed`` ordering is defined by:
724*76d0caaeSpatrick
725*76d0caaeSpatrick.. code-block:: cpp
726*76d0caaeSpatrick
727*76d0caaeSpatrick    T __atomic_load_relaxed(const volatile T* obj);
728*76d0caaeSpatrick
729*76d0caaeSpatrickAnd announced with:
730*76d0caaeSpatrick
731*76d0caaeSpatrick.. code-block:: cpp
732*76d0caaeSpatrick
733*76d0caaeSpatrick    __has_feature(__atomic_load_relaxed_b) == 1  // bool
734*76d0caaeSpatrick    __has_feature(__atomic_load_relaxed_c) == 1  // char
735*76d0caaeSpatrick    __has_feature(__atomic_load_relaxed_a) == 1  // signed char
736*76d0caaeSpatrick    ...
737*76d0caaeSpatrick
738*76d0caaeSpatrickThe ``__atomic_compare_exchange_strong(weak)`` intrinsics are parameterized
739*76d0caaeSpatrickon two memory orderings. The first ordering applies when the operation returns
740*76d0caaeSpatrick``true`` and the second ordering applies when the operation returns ``false``.
741*76d0caaeSpatrick
742*76d0caaeSpatrickNot every memory ordering is appropriate for every operation. ``exchange``
743*76d0caaeSpatrickand the ``fetch_XXX`` operations support all 6. But ``load`` only supports
744*76d0caaeSpatrick``relaxed``, ``consume``, ``acquire`` and ``seq_cst``. ``store`` only supports
745*76d0caaeSpatrick``relaxed``, ``release``, and ``seq_cst``. The ``compare_exchange`` operations
746*76d0caaeSpatricksupport the following 16 combinations out of the possible 36:
747*76d0caaeSpatrick
748*76d0caaeSpatrick.. code-block:: cpp
749*76d0caaeSpatrick
750*76d0caaeSpatrick    relaxed_relaxed
751*76d0caaeSpatrick    consume_relaxed
752*76d0caaeSpatrick    consume_consume
753*76d0caaeSpatrick    acquire_relaxed
754*76d0caaeSpatrick    acquire_consume
755*76d0caaeSpatrick    acquire_acquire
756*76d0caaeSpatrick    release_relaxed
757*76d0caaeSpatrick    release_consume
758*76d0caaeSpatrick    release_acquire
759*76d0caaeSpatrick    acq_rel_relaxed
760*76d0caaeSpatrick    acq_rel_consume
761*76d0caaeSpatrick    acq_rel_acquire
762*76d0caaeSpatrick    seq_cst_relaxed
763*76d0caaeSpatrick    seq_cst_consume
764*76d0caaeSpatrick    seq_cst_acquire
765*76d0caaeSpatrick    seq_cst_seq_cst
766*76d0caaeSpatrick
767*76d0caaeSpatrickAgain, the compiler supplies intrinsics only for the strongest orderings where
768*76d0caaeSpatrickit can make a difference. The library takes care of calling the weakest
769*76d0caaeSpatricksupplied intrinsic that is as strong or stronger than the customer asked for.
770*76d0caaeSpatrick
771*76d0caaeSpatrickNote about ABI
772*76d0caaeSpatrick==============
773*76d0caaeSpatrickWith any design, the (back end) compiler writer should note that the decision to
774*76d0caaeSpatrickimplement lock-free operations on any given type (or not) is an ABI-binding decision.
775*76d0caaeSpatrickOne can not change from treating a type as not lock free, to lock free (or vice-versa)
776*76d0caaeSpatrickwithout breaking your ABI.
777*76d0caaeSpatrick
778*76d0caaeSpatrickFor example:
779*76d0caaeSpatrick
780*76d0caaeSpatrick**TU1.cpp**:
781*76d0caaeSpatrick
782*76d0caaeSpatrick.. code-block:: cpp
783*76d0caaeSpatrick
784*76d0caaeSpatrick    extern atomic<long long> A;
785*76d0caaeSpatrick    int foo() { return A.compare_exchange_strong(w, x); }
786*76d0caaeSpatrick
787*76d0caaeSpatrick
788*76d0caaeSpatrick**TU2.cpp**:
789*76d0caaeSpatrick
790*76d0caaeSpatrick.. code-block:: cpp
791*76d0caaeSpatrick
792*76d0caaeSpatrick    extern atomic<long long> A;
793*76d0caaeSpatrick    void bar() { return A.compare_exchange_strong(y, z); }
794*76d0caaeSpatrick
795*76d0caaeSpatrickIf only **one** of these calls to ``compare_exchange_strong`` is implemented with
796*76d0caaeSpatrickmutex-locked code, then that mutex-locked code will not be executed mutually
797*76d0caaeSpatrickexclusively of the one implemented in a lock-free manner.
798