xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_atomic.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP rhs);                                               \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP rhs);                                      \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP rhs);                                    \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836 // Undo explicit type casts to get MSVC ARM64 to build. Uses
837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838 #undef OP_CMPXCHG
839 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
840   {                                                                            \
841     struct _sss {                                                              \
842       TYPE cmp;                                                                \
843       kmp_int##BITS *vvv;                                                      \
844     };                                                                         \
845     struct _sss old_value, new_value;                                          \
846     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
847     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
848     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
849     new_value.cmp = old_value.cmp OP rhs;                                      \
850     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
851         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
852         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
853       KMP_DO_PAUSE;                                                            \
854                                                                                \
855       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
856       new_value.cmp = old_value.cmp OP rhs;                                    \
857     }                                                                          \
858   }
859 
860 #undef OP_UPDATE_CRITICAL
861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
862   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
863   (*lhs) = (*lhs)OP rhs;                                                       \
864   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865 
866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867 
868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
869 
870 // ------------------------------------------------------------------------
871 // X86 or X86_64: no alignment problems ====================================
872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
873                          GOMP_FLAG)                                            \
874   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
875   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
876   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
877   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
878   }
879 // -------------------------------------------------------------------------
880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
881                        GOMP_FLAG)                                              \
882   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
883   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
884   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
885   }
886 #if USE_CMPXCHG_FIX
887 // -------------------------------------------------------------------------
888 // workaround for C78287 (complex(kind=4) data type)
889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
890                                   MASK, GOMP_FLAG)                             \
891   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
892   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
893   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
894   }
895 // end of the second part of the workaround for C78287
896 #endif // USE_CMPXCHG_FIX
897 
898 #else
899 // -------------------------------------------------------------------------
900 // Code for other architectures that don't handle unaligned accesses.
901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
902                          GOMP_FLAG)                                            \
903   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
904   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
905   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
906     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
907     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
908   } else {                                                                     \
909     KMP_CHECK_GTID;                                                            \
910     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
911                        LCK_ID) /* unaligned address - use critical */          \
912   }                                                                            \
913   }
914 // -------------------------------------------------------------------------
915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
916                        GOMP_FLAG)                                              \
917   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
918   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
919   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
920     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
921   } else {                                                                     \
922     KMP_CHECK_GTID;                                                            \
923     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
924                        LCK_ID) /* unaligned address - use critical */          \
925   }                                                                            \
926   }
927 #if USE_CMPXCHG_FIX
928 // -------------------------------------------------------------------------
929 // workaround for C78287 (complex(kind=4) data type)
930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
931                                   MASK, GOMP_FLAG)                             \
932   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
933   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
934   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
935     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
936   } else {                                                                     \
937     KMP_CHECK_GTID;                                                            \
938     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
939                        LCK_ID) /* unaligned address - use critical */          \
940   }                                                                            \
941   }
942 // end of the second part of the workaround for C78287
943 #endif // USE_CMPXCHG_FIX
944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945 
946 // Routines for ATOMIC 4-byte operands addition and subtraction
947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948                  0) // __kmpc_atomic_fixed4_add
949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950                  0) // __kmpc_atomic_fixed4_sub
951 
952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953                KMP_ARCH_X86) // __kmpc_atomic_float4_add
954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956 
957 // Routines for ATOMIC 8-byte operands addition and subtraction
958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962 
963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964                KMP_ARCH_X86) // __kmpc_atomic_float8_add
965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967 
968 // ------------------------------------------------------------------------
969 // Entries definition for integer operands
970 //     TYPE_ID - operands type and size (fixed4, float4)
971 //     OP_ID   - operation identifier (add, sub, mul, ...)
972 //     TYPE    - operand type
973 //     BITS    - size in bits, used to distinguish low level calls
974 //     OP      - operator (used in critical section)
975 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
976 //     MASK    - used for alignment check
977 
978 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
979 // ------------------------------------------------------------------------
980 // Routines for ATOMIC integer operands, other operators
981 // ------------------------------------------------------------------------
982 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986                0) // __kmpc_atomic_fixed1_andb
987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994                0) // __kmpc_atomic_fixed1_orb
995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004                0) // __kmpc_atomic_fixed1_xor
1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008                0) // __kmpc_atomic_fixed2_andb
1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016                0) // __kmpc_atomic_fixed2_orb
1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026                0) // __kmpc_atomic_fixed2_xor
1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028                0) // __kmpc_atomic_fixed4_andb
1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036                0) // __kmpc_atomic_fixed4_orb
1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044                0) // __kmpc_atomic_fixed4_xor
1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1072 
1073 /* ------------------------------------------------------------------------ */
1074 /* Routines for C/C++ Reduction operators && and ||                         */
1075 
1076 // ------------------------------------------------------------------------
1077 // Need separate macros for &&, || because there is no combined assignment
1078 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1080   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1081   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1082   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1083   }
1084 
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086 
1087 // ------------------------------------------------------------------------
1088 // X86 or X86_64: no alignment problems ===================================
1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1091   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1092   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1093   }
1094 
1095 #else
1096 // ------------------------------------------------------------------------
1097 // Code for other architectures that don't handle unaligned accesses.
1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1100   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1101   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1102     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1103   } else {                                                                     \
1104     KMP_CHECK_GTID;                                                            \
1105     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1106   }                                                                            \
1107   }
1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109 
1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119               0) // __kmpc_atomic_fixed4_andl
1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121               0) // __kmpc_atomic_fixed4_orl
1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126 
1127 /* ------------------------------------------------------------------------- */
1128 /* Routines for Fortran operators that matched no one in C:                  */
1129 /* MAX, MIN, .EQV., .NEQV.                                                   */
1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1132 
1133 // -------------------------------------------------------------------------
1134 // MIN and MAX need separate macros
1135 // OP - operator to check if we need any actions?
1136 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1137   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1138                                                                                \
1139   if (*lhs OP rhs) { /* still need actions? */                                 \
1140     *lhs = rhs;                                                                \
1141   }                                                                            \
1142   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143 
1144 // -------------------------------------------------------------------------
1145 #ifdef KMP_GOMP_COMPAT
1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1147   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1148     KMP_CHECK_GTID;                                                            \
1149     MIN_MAX_CRITSECT(OP, 0);                                                   \
1150     return;                                                                    \
1151   }
1152 #else
1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154 #endif /* KMP_GOMP_COMPAT */
1155 
1156 // -------------------------------------------------------------------------
1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1158   {                                                                            \
1159     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1160     TYPE old_value;                                                            \
1161     temp_val = *lhs;                                                           \
1162     old_value = temp_val;                                                      \
1163     while (old_value OP rhs && /* still need actions? */                       \
1164            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1165                (kmp_int##BITS *)lhs,                                           \
1166                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1167                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1168       temp_val = *lhs;                                                         \
1169       old_value = temp_val;                                                    \
1170     }                                                                          \
1171   }
1172 
1173 // -------------------------------------------------------------------------
1174 // 1-byte, 2-byte operands - use critical section
1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1176   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1177   if (*lhs OP rhs) { /* need actions? */                                       \
1178     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1179     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1180   }                                                                            \
1181   }
1182 
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // -------------------------------------------------------------------------
1186 // X86 or X86_64: no alignment problems ====================================
1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1188                          GOMP_FLAG)                                            \
1189   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1190   if (*lhs OP rhs) {                                                           \
1191     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1192     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1193   }                                                                            \
1194   }
1195 
1196 #else
1197 // -------------------------------------------------------------------------
1198 // Code for other architectures that don't handle unaligned accesses.
1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1200                          GOMP_FLAG)                                            \
1201   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1202   if (*lhs OP rhs) {                                                           \
1203     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1204     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1205       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1206     } else {                                                                   \
1207       KMP_CHECK_GTID;                                                          \
1208       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1209     }                                                                          \
1210   }                                                                            \
1211   }
1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213 
1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223                  0) // __kmpc_atomic_fixed4_max
1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225                  0) // __kmpc_atomic_fixed4_min
1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1239                  1) // __kmpc_atomic_float10_max
1240 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1241                  1) // __kmpc_atomic_float10_min
1242 #if KMP_HAVE_QUAD
1243 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1244                  1) // __kmpc_atomic_float16_max
1245 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1246                  1) // __kmpc_atomic_float16_min
1247 #if (KMP_ARCH_X86)
1248 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1249                  1) // __kmpc_atomic_float16_max_a16
1250 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1251                  1) // __kmpc_atomic_float16_min_a16
1252 #endif // (KMP_ARCH_X86)
1253 #endif // KMP_HAVE_QUAD
1254 // ------------------------------------------------------------------------
1255 // Need separate macros for .EQV. because of the need of complement (~)
1256 // OP ignored for critical sections, ^=~ used instead
1257 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1258   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1259   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1260   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1261   }
1262 
1263 // ------------------------------------------------------------------------
1264 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1265 // ------------------------------------------------------------------------
1266 // X86 or X86_64: no alignment problems ===================================
1267 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1268                         GOMP_FLAG)                                             \
1269   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1270   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1271   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1272   }
1273 // ------------------------------------------------------------------------
1274 #else
1275 // ------------------------------------------------------------------------
1276 // Code for other architectures that don't handle unaligned accesses.
1277 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1278                         GOMP_FLAG)                                             \
1279   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1280   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1281   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1282     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1283   } else {                                                                     \
1284     KMP_CHECK_GTID;                                                            \
1285     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1286   }                                                                            \
1287   }
1288 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1289 
1290 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1291                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1292 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1293                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1294 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1295                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1296 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1297                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1298 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1299                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1300 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1301                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1302 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1303                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1304 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1305                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1306 
1307 // ------------------------------------------------------------------------
1308 // Routines for Extended types: long double, _Quad, complex flavours (use
1309 // critical section)
1310 //     TYPE_ID, OP_ID, TYPE - detailed above
1311 //     OP      - operator
1312 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1313 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1314   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1315   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1316   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1317   }
1318 
1319 /* ------------------------------------------------------------------------- */
1320 // routines for long double type
1321 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1322                 1) // __kmpc_atomic_float10_add
1323 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1324                 1) // __kmpc_atomic_float10_sub
1325 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1326                 1) // __kmpc_atomic_float10_mul
1327 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1328                 1) // __kmpc_atomic_float10_div
1329 #if KMP_HAVE_QUAD
1330 // routines for _Quad type
1331 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1332                 1) // __kmpc_atomic_float16_add
1333 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1334                 1) // __kmpc_atomic_float16_sub
1335 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1336                 1) // __kmpc_atomic_float16_mul
1337 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1338                 1) // __kmpc_atomic_float16_div
1339 #if (KMP_ARCH_X86)
1340 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1341                 1) // __kmpc_atomic_float16_add_a16
1342 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1343                 1) // __kmpc_atomic_float16_sub_a16
1344 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1345                 1) // __kmpc_atomic_float16_mul_a16
1346 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1347                 1) // __kmpc_atomic_float16_div_a16
1348 #endif // (KMP_ARCH_X86)
1349 #endif // KMP_HAVE_QUAD
1350 // routines for complex types
1351 
1352 #if USE_CMPXCHG_FIX
1353 // workaround for C78287 (complex(kind=4) data type)
1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1355                           1) // __kmpc_atomic_cmplx4_add
1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1357                           1) // __kmpc_atomic_cmplx4_sub
1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1359                           1) // __kmpc_atomic_cmplx4_mul
1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1361                           1) // __kmpc_atomic_cmplx4_div
1362 // end of the workaround for C78287
1363 #else
1364 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1365 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1366 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1367 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1368 #endif // USE_CMPXCHG_FIX
1369 
1370 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1371 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1372 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1373 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1374 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1375                 1) // __kmpc_atomic_cmplx10_add
1376 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1377                 1) // __kmpc_atomic_cmplx10_sub
1378 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1379                 1) // __kmpc_atomic_cmplx10_mul
1380 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1381                 1) // __kmpc_atomic_cmplx10_div
1382 #if KMP_HAVE_QUAD
1383 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1384                 1) // __kmpc_atomic_cmplx16_add
1385 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1386                 1) // __kmpc_atomic_cmplx16_sub
1387 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1388                 1) // __kmpc_atomic_cmplx16_mul
1389 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1390                 1) // __kmpc_atomic_cmplx16_div
1391 #if (KMP_ARCH_X86)
1392 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1393                 1) // __kmpc_atomic_cmplx16_add_a16
1394 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1395                 1) // __kmpc_atomic_cmplx16_sub_a16
1396 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1397                 1) // __kmpc_atomic_cmplx16_mul_a16
1398 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1399                 1) // __kmpc_atomic_cmplx16_div_a16
1400 #endif // (KMP_ARCH_X86)
1401 #endif // KMP_HAVE_QUAD
1402 
1403 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1404 // Supported only on IA-32 architecture and Intel(R) 64
1405 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1406 
1407 // ------------------------------------------------------------------------
1408 // Operation on *lhs, rhs bound by critical section
1409 //     OP     - operator (it's supposed to contain an assignment)
1410 //     LCK_ID - lock identifier
1411 // Note: don't check gtid as it should always be valid
1412 // 1, 2-byte - expect valid parameter, other - check before this macro
1413 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1414   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1415                                                                                \
1416   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1417                                                                                \
1418   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1419 
1420 #ifdef KMP_GOMP_COMPAT
1421 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1422   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1423     KMP_CHECK_GTID;                                                            \
1424     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1425     return;                                                                    \
1426   }
1427 
1428 #else
1429 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1430 #endif /* KMP_GOMP_COMPAT */
1431 
1432 // Beginning of a definition (provides name, parameters, gebug trace)
1433 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1434 //     fixed)
1435 //     OP_ID   - operation identifier (add, sub, mul, ...)
1436 //     TYPE    - operands' type
1437 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1438   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1439                                                    TYPE *lhs, TYPE rhs) {      \
1440     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1441     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1442 
1443 // ------------------------------------------------------------------------
1444 // Operation on *lhs, rhs using "compare_and_store" routine
1445 //     TYPE    - operands' type
1446 //     BITS    - size in bits, used to distinguish low level calls
1447 //     OP      - operator
1448 // Note: temp_val introduced in order to force the compiler to read
1449 //       *lhs only once (w/o it the compiler reads *lhs twice)
1450 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1451   {                                                                            \
1452     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1453     TYPE old_value, new_value;                                                 \
1454     temp_val = *lhs;                                                           \
1455     old_value = temp_val;                                                      \
1456     new_value = (TYPE)(rhs OP old_value);                                      \
1457     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1458         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1459         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1460       KMP_DO_PAUSE;                                                            \
1461                                                                                \
1462       temp_val = *lhs;                                                         \
1463       old_value = temp_val;                                                    \
1464       new_value = (TYPE)(rhs OP old_value);                                    \
1465     }                                                                          \
1466   }
1467 
1468 // -------------------------------------------------------------------------
1469 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1470   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1471   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1472   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1473   }
1474 
1475 // ------------------------------------------------------------------------
1476 // Entries definition for integer operands
1477 //     TYPE_ID - operands type and size (fixed4, float4)
1478 //     OP_ID   - operation identifier (add, sub, mul, ...)
1479 //     TYPE    - operand type
1480 //     BITS    - size in bits, used to distinguish low level calls
1481 //     OP      - operator (used in critical section)
1482 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1483 
1484 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1485 // ------------------------------------------------------------------------
1486 // Routines for ATOMIC integer operands, other operators
1487 // ------------------------------------------------------------------------
1488 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1489 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1490                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1491 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1492                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1493 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1494                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1495 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1496                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1497 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1498                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1499 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1500                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1501 
1502 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1503                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1504 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1505                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1506 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1507                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1508 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1509                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1510 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1511                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1512 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1513                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1514 
1515 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1516                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1517 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1518                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1519 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1520                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1521 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1522                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1523 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1524                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1525 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1526                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1527 
1528 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1529                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1530 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1531                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1532 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1533                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1534 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1535                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1536 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1537                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1538 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1539                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1540 
1541 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1542                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1543 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1544                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1545 
1546 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1547                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1548 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1549                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1550 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1551 
1552 // ------------------------------------------------------------------------
1553 // Routines for Extended types: long double, _Quad, complex flavours (use
1554 // critical section)
1555 //     TYPE_ID, OP_ID, TYPE - detailed above
1556 //     OP      - operator
1557 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1558 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1559   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1560   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1561   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1562   }
1563 
1564 /* ------------------------------------------------------------------------- */
1565 // routines for long double type
1566 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1567                     1) // __kmpc_atomic_float10_sub_rev
1568 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1569                     1) // __kmpc_atomic_float10_div_rev
1570 #if KMP_HAVE_QUAD
1571 // routines for _Quad type
1572 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1573                     1) // __kmpc_atomic_float16_sub_rev
1574 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1575                     1) // __kmpc_atomic_float16_div_rev
1576 #if (KMP_ARCH_X86)
1577 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1578                     1) // __kmpc_atomic_float16_sub_a16_rev
1579 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1580                     1) // __kmpc_atomic_float16_div_a16_rev
1581 #endif // KMP_ARCH_X86
1582 #endif // KMP_HAVE_QUAD
1583 
1584 // routines for complex types
1585 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1586                     1) // __kmpc_atomic_cmplx4_sub_rev
1587 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1588                     1) // __kmpc_atomic_cmplx4_div_rev
1589 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1590                     1) // __kmpc_atomic_cmplx8_sub_rev
1591 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1592                     1) // __kmpc_atomic_cmplx8_div_rev
1593 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1594                     1) // __kmpc_atomic_cmplx10_sub_rev
1595 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1596                     1) // __kmpc_atomic_cmplx10_div_rev
1597 #if KMP_HAVE_QUAD
1598 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1599                     1) // __kmpc_atomic_cmplx16_sub_rev
1600 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1601                     1) // __kmpc_atomic_cmplx16_div_rev
1602 #if (KMP_ARCH_X86)
1603 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1604                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1605 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1606                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1607 #endif // KMP_ARCH_X86
1608 #endif // KMP_HAVE_QUAD
1609 
1610 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1611 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1612 
1613 /* ------------------------------------------------------------------------ */
1614 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1615 /* Note: in order to reduce the total number of types combinations          */
1616 /*       it is supposed that compiler converts RHS to longest floating type,*/
1617 /*       that is _Quad, before call to any of these routines                */
1618 /* Conversion to _Quad will be done by the compiler during calculation,     */
1619 /*    conversion back to TYPE - before the assignment, like:                */
1620 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1621 /* Performance penalty expected because of SW emulation use                 */
1622 /* ------------------------------------------------------------------------ */
1623 
1624 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1625   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1626       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1627     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1628     KA_TRACE(100,                                                              \
1629              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1630               gtid));
1631 
1632 // -------------------------------------------------------------------------
1633 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1634                            GOMP_FLAG)                                          \
1635   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1636   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1637   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1638   }
1639 
1640 // -------------------------------------------------------------------------
1641 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1642 // -------------------------------------------------------------------------
1643 // X86 or X86_64: no alignment problems ====================================
1644 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1645                            LCK_ID, MASK, GOMP_FLAG)                            \
1646   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1647   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1648   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1649   }
1650 // -------------------------------------------------------------------------
1651 #else
1652 // ------------------------------------------------------------------------
1653 // Code for other architectures that don't handle unaligned accesses.
1654 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1655                            LCK_ID, MASK, GOMP_FLAG)                            \
1656   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1657   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1658   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1659     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1660   } else {                                                                     \
1661     KMP_CHECK_GTID;                                                            \
1662     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1663                        LCK_ID) /* unaligned address - use critical */          \
1664   }                                                                            \
1665   }
1666 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1667 
1668 // -------------------------------------------------------------------------
1669 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1670 // -------------------------------------------------------------------------
1671 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1672                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1673   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1674   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1675   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1676   }
1677 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1678                                LCK_ID, GOMP_FLAG)                              \
1679   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1680   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1681   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1682   }
1683 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1684 
1685 // RHS=float8
1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1687                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1688 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1689                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1690 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1691                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1692 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1695                    0) // __kmpc_atomic_fixed4_mul_float8
1696 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1697                    0) // __kmpc_atomic_fixed4_div_float8
1698 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1699                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1700 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1701                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1703                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1705                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1706 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1707                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1709                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1710 
1711 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1712 // use them)
1713 #if KMP_HAVE_QUAD
1714 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1715                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1717                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1718 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1719                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1722 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1726 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1727                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1728 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1729                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1730 
1731 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1732                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1734                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1735 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1736                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1738                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1739 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1740                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1742                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1743 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1744                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1745 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1746                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1747 
1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1749                    0) // __kmpc_atomic_fixed4_add_fp
1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1751                    0) // __kmpc_atomic_fixed4u_add_fp
1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1753                    0) // __kmpc_atomic_fixed4_sub_fp
1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1755                    0) // __kmpc_atomic_fixed4u_sub_fp
1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1757                    0) // __kmpc_atomic_fixed4_mul_fp
1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1759                    0) // __kmpc_atomic_fixed4u_mul_fp
1760 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1761                    0) // __kmpc_atomic_fixed4_div_fp
1762 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1763                    0) // __kmpc_atomic_fixed4u_div_fp
1764 
1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1766                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1768                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1770                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1772                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1774                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1776                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1777 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1778                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1779 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1780                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1781 
1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1783                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1785                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1786 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1787                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1789                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1790 
1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1792                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1794                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1795 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1796                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1798                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1799 
1800 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1801                    1) // __kmpc_atomic_float10_add_fp
1802 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1803                    1) // __kmpc_atomic_float10_sub_fp
1804 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1805                    1) // __kmpc_atomic_float10_mul_fp
1806 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1807                    1) // __kmpc_atomic_float10_div_fp
1808 
1809 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1810 // Reverse operations
1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1812                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1814                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1815 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1816                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1817 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1818                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1819 
1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1821                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1823                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1824 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1825                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1826 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1827                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1828 
1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1830                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1832                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1833 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1834                        0) // __kmpc_atomic_fixed4_div_rev_fp
1835 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1836                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1837 
1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1839                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1841                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1842 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1843                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1844 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1845                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1846 
1847 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1848                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1849 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1850                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1851 
1852 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1853                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1854 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1855                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1856 
1857 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1858                        1) // __kmpc_atomic_float10_sub_rev_fp
1859 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1860                        1) // __kmpc_atomic_float10_div_rev_fp
1861 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1862 
1863 #endif // KMP_HAVE_QUAD
1864 
1865 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1866 // ------------------------------------------------------------------------
1867 // X86 or X86_64: no alignment problems ====================================
1868 #if USE_CMPXCHG_FIX
1869 // workaround for C78287 (complex(kind=4) data type)
1870 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1871                              LCK_ID, MASK, GOMP_FLAG)                          \
1872   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1873   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1874   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1875   }
1876 // end of the second part of the workaround for C78287
1877 #else
1878 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1879                              LCK_ID, MASK, GOMP_FLAG)                          \
1880   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1881   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1882   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1883   }
1884 #endif // USE_CMPXCHG_FIX
1885 #else
1886 // ------------------------------------------------------------------------
1887 // Code for other architectures that don't handle unaligned accesses.
1888 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1889                              LCK_ID, MASK, GOMP_FLAG)                          \
1890   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1891   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1892   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1893     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1894   } else {                                                                     \
1895     KMP_CHECK_GTID;                                                            \
1896     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1897                        LCK_ID) /* unaligned address - use critical */          \
1898   }                                                                            \
1899   }
1900 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1901 
1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1903                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1905                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1906 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1907                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1909                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1910 
1911 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1912 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1913 
1914 // ------------------------------------------------------------------------
1915 // Atomic READ routines
1916 
1917 // ------------------------------------------------------------------------
1918 // Beginning of a definition (provides name, parameters, gebug trace)
1919 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1920 //     fixed)
1921 //     OP_ID   - operation identifier (add, sub, mul, ...)
1922 //     TYPE    - operands' type
1923 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1924   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1925                                              TYPE *loc) {                      \
1926     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1927     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1928 
1929 // ------------------------------------------------------------------------
1930 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1931 //     TYPE    - operands' type
1932 //     BITS    - size in bits, used to distinguish low level calls
1933 //     OP      - operator
1934 // Note: temp_val introduced in order to force the compiler to read
1935 //       *lhs only once (w/o it the compiler reads *lhs twice)
1936 // TODO: check if it is still necessary
1937 // Return old value regardless of the result of "compare & swap# operation
1938 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1939   {                                                                            \
1940     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1941     union f_i_union {                                                          \
1942       TYPE f_val;                                                              \
1943       kmp_int##BITS i_val;                                                     \
1944     };                                                                         \
1945     union f_i_union old_value;                                                 \
1946     temp_val = *loc;                                                           \
1947     old_value.f_val = temp_val;                                                \
1948     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1949         (kmp_int##BITS *)loc,                                                  \
1950         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1951         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1952     new_value = old_value.f_val;                                               \
1953     return new_value;                                                          \
1954   }
1955 
1956 // -------------------------------------------------------------------------
1957 // Operation on *lhs, rhs bound by critical section
1958 //     OP     - operator (it's supposed to contain an assignment)
1959 //     LCK_ID - lock identifier
1960 // Note: don't check gtid as it should always be valid
1961 // 1, 2-byte - expect valid parameter, other - check before this macro
1962 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1963   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1964                                                                                \
1965   new_value = (*loc);                                                          \
1966                                                                                \
1967   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1968 
1969 // -------------------------------------------------------------------------
1970 #ifdef KMP_GOMP_COMPAT
1971 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1972   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1973     KMP_CHECK_GTID;                                                            \
1974     OP_CRITICAL_READ(OP, 0);                                                   \
1975     return new_value;                                                          \
1976   }
1977 #else
1978 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1979 #endif /* KMP_GOMP_COMPAT */
1980 
1981 // -------------------------------------------------------------------------
1982 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1983   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1984   TYPE new_value;                                                              \
1985   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1986   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1987   return new_value;                                                            \
1988   }
1989 // -------------------------------------------------------------------------
1990 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1991   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1992   TYPE new_value;                                                              \
1993   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1994   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1995   }
1996 // ------------------------------------------------------------------------
1997 // Routines for Extended types: long double, _Quad, complex flavours (use
1998 // critical section)
1999 //     TYPE_ID, OP_ID, TYPE - detailed above
2000 //     OP      - operator
2001 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2002 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2003   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
2004   TYPE new_value;                                                              \
2005   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
2006   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
2007   return new_value;                                                            \
2008   }
2009 
2010 // ------------------------------------------------------------------------
2011 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2012 // value doesn't work.
2013 // Let's return the read value through the additional parameter.
2014 #if (KMP_OS_WINDOWS)
2015 
2016 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
2017   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2018                                                                                \
2019   (*out) = (*loc);                                                             \
2020                                                                                \
2021   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2022 // ------------------------------------------------------------------------
2023 #ifdef KMP_GOMP_COMPAT
2024 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
2025   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2026     KMP_CHECK_GTID;                                                            \
2027     OP_CRITICAL_READ_WRK(OP, 0);                                               \
2028   }
2029 #else
2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2031 #endif /* KMP_GOMP_COMPAT */
2032 // ------------------------------------------------------------------------
2033 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
2034   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2035                                          TYPE *loc) {                          \
2036     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2037     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2038 
2039 // ------------------------------------------------------------------------
2040 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2041   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2042   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2043   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2044   }
2045 
2046 #endif // KMP_OS_WINDOWS
2047 
2048 // ------------------------------------------------------------------------
2049 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2050 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2051 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2052                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2053 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2054                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2055 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2056                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2057 
2058 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2059 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2060                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2061 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2062                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2063 
2064 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2065                      1) // __kmpc_atomic_float10_rd
2066 #if KMP_HAVE_QUAD
2067 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2068                      1) // __kmpc_atomic_float16_rd
2069 #endif // KMP_HAVE_QUAD
2070 
2071 // Fix for CQ220361 on Windows* OS
2072 #if (KMP_OS_WINDOWS)
2073 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2074                          1) // __kmpc_atomic_cmplx4_rd
2075 #else
2076 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2077                      1) // __kmpc_atomic_cmplx4_rd
2078 #endif // (KMP_OS_WINDOWS)
2079 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2080                      1) // __kmpc_atomic_cmplx8_rd
2081 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2082                      1) // __kmpc_atomic_cmplx10_rd
2083 #if KMP_HAVE_QUAD
2084 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2085                      1) // __kmpc_atomic_cmplx16_rd
2086 #if (KMP_ARCH_X86)
2087 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2088                      1) // __kmpc_atomic_float16_a16_rd
2089 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2090                      1) // __kmpc_atomic_cmplx16_a16_rd
2091 #endif // (KMP_ARCH_X86)
2092 #endif // KMP_HAVE_QUAD
2093 
2094 // ------------------------------------------------------------------------
2095 // Atomic WRITE routines
2096 
2097 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2098   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2099   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2100   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2101   }
2102 // ------------------------------------------------------------------------
2103 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2104   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2105   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2106   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2107   }
2108 
2109 // ------------------------------------------------------------------------
2110 // Operation on *lhs, rhs using "compare_and_store" routine
2111 //     TYPE    - operands' type
2112 //     BITS    - size in bits, used to distinguish low level calls
2113 //     OP      - operator
2114 // Note: temp_val introduced in order to force the compiler to read
2115 //       *lhs only once (w/o it the compiler reads *lhs twice)
2116 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2117   {                                                                            \
2118     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2119     TYPE old_value, new_value;                                                 \
2120     temp_val = *lhs;                                                           \
2121     old_value = temp_val;                                                      \
2122     new_value = rhs;                                                           \
2123     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2124         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2125         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2126       temp_val = *lhs;                                                         \
2127       old_value = temp_val;                                                    \
2128       new_value = rhs;                                                         \
2129     }                                                                          \
2130   }
2131 
2132 // -------------------------------------------------------------------------
2133 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2134   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2135   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2136   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2137   }
2138 
2139 // ------------------------------------------------------------------------
2140 // Routines for Extended types: long double, _Quad, complex flavours (use
2141 // critical section)
2142 //     TYPE_ID, OP_ID, TYPE - detailed above
2143 //     OP      - operator
2144 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2145 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2146   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2147   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2148   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2149   }
2150 // -------------------------------------------------------------------------
2151 
2152 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2153                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2154 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2155                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2156 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2157                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2158 #if (KMP_ARCH_X86)
2159 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2160                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2161 #else
2162 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2163                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2164 #endif // (KMP_ARCH_X86)
2165 
2166 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2167                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2168 #if (KMP_ARCH_X86)
2169 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2170                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2171 #else
2172 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2173                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2174 #endif // (KMP_ARCH_X86)
2175 
2176 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2177                    1) // __kmpc_atomic_float10_wr
2178 #if KMP_HAVE_QUAD
2179 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2180                    1) // __kmpc_atomic_float16_wr
2181 #endif // KMP_HAVE_QUAD
2182 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2183 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2184                    1) // __kmpc_atomic_cmplx8_wr
2185 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2186                    1) // __kmpc_atomic_cmplx10_wr
2187 #if KMP_HAVE_QUAD
2188 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2189                    1) // __kmpc_atomic_cmplx16_wr
2190 #if (KMP_ARCH_X86)
2191 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2192                    1) // __kmpc_atomic_float16_a16_wr
2193 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2194                    1) // __kmpc_atomic_cmplx16_a16_wr
2195 #endif // (KMP_ARCH_X86)
2196 #endif // KMP_HAVE_QUAD
2197 
2198 // ------------------------------------------------------------------------
2199 // Atomic CAPTURE routines
2200 
2201 // Beginning of a definition (provides name, parameters, gebug trace)
2202 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2203 //     fixed)
2204 //     OP_ID   - operation identifier (add, sub, mul, ...)
2205 //     TYPE    - operands' type
2206 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2207   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2208                                              TYPE *lhs, TYPE rhs, int flag) {  \
2209     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2210     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2211 
2212 // -------------------------------------------------------------------------
2213 // Operation on *lhs, rhs bound by critical section
2214 //     OP     - operator (it's supposed to contain an assignment)
2215 //     LCK_ID - lock identifier
2216 // Note: don't check gtid as it should always be valid
2217 // 1, 2-byte - expect valid parameter, other - check before this macro
2218 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2219   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2220                                                                                \
2221   if (flag) {                                                                  \
2222     (*lhs) OP rhs;                                                             \
2223     new_value = (*lhs);                                                        \
2224   } else {                                                                     \
2225     new_value = (*lhs);                                                        \
2226     (*lhs) OP rhs;                                                             \
2227   }                                                                            \
2228                                                                                \
2229   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2230   return new_value;
2231 
2232 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2233   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2234                                                                                \
2235   if (flag) {                                                                  \
2236     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2237     new_value = (*lhs);                                                        \
2238   } else {                                                                     \
2239     new_value = (*lhs);                                                        \
2240     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2241   }                                                                            \
2242                                                                                \
2243   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2244   return new_value;
2245 
2246 // ------------------------------------------------------------------------
2247 #ifdef KMP_GOMP_COMPAT
2248 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2249   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2250     KMP_CHECK_GTID;                                                            \
2251     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2252   }
2253 #else
2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2255 #endif /* KMP_GOMP_COMPAT */
2256 
2257 // ------------------------------------------------------------------------
2258 // Operation on *lhs, rhs using "compare_and_store" routine
2259 //     TYPE    - operands' type
2260 //     BITS    - size in bits, used to distinguish low level calls
2261 //     OP      - operator
2262 // Note: temp_val introduced in order to force the compiler to read
2263 //       *lhs only once (w/o it the compiler reads *lhs twice)
2264 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2265   {                                                                            \
2266     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2267     TYPE old_value, new_value;                                                 \
2268     temp_val = *lhs;                                                           \
2269     old_value = temp_val;                                                      \
2270     new_value = (TYPE)(old_value OP rhs);                                      \
2271     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2272         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2273         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2274       temp_val = *lhs;                                                         \
2275       old_value = temp_val;                                                    \
2276       new_value = (TYPE)(old_value OP rhs);                                    \
2277     }                                                                          \
2278     if (flag) {                                                                \
2279       return new_value;                                                        \
2280     } else                                                                     \
2281       return old_value;                                                        \
2282   }
2283 
2284 // -------------------------------------------------------------------------
2285 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2286   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2287   TYPE new_value;                                                              \
2288   (void)new_value;                                                             \
2289   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2290   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2291   }
2292 
2293 // -------------------------------------------------------------------------
2294 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2295   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2296   TYPE old_value, new_value;                                                   \
2297   (void)new_value;                                                             \
2298   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2299   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2300   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2301   if (flag) {                                                                  \
2302     return old_value OP rhs;                                                   \
2303   } else                                                                       \
2304     return old_value;                                                          \
2305   }
2306 // -------------------------------------------------------------------------
2307 
2308 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2309                      0) // __kmpc_atomic_fixed4_add_cpt
2310 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2311                      0) // __kmpc_atomic_fixed4_sub_cpt
2312 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2313                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2314 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2315                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2316 
2317 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2318                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2319 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2320                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2321 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2322                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2323 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2324                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2325 
2326 // ------------------------------------------------------------------------
2327 // Entries definition for integer operands
2328 //     TYPE_ID - operands type and size (fixed4, float4)
2329 //     OP_ID   - operation identifier (add, sub, mul, ...)
2330 //     TYPE    - operand type
2331 //     BITS    - size in bits, used to distinguish low level calls
2332 //     OP      - operator (used in critical section)
2333 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2334 // ------------------------------------------------------------------------
2335 // Routines for ATOMIC integer operands, other operators
2336 // ------------------------------------------------------------------------
2337 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2338 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2339                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2341                    0) // __kmpc_atomic_fixed1_andb_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2343                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2345                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2347                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2349                    0) // __kmpc_atomic_fixed1_orb_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2353                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2355                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2357                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2359                    0) // __kmpc_atomic_fixed1_xor_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2361                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2363                    0) // __kmpc_atomic_fixed2_andb_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2365                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2367                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2369                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2371                    0) // __kmpc_atomic_fixed2_orb_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2373                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2375                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2377                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2379                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2381                    0) // __kmpc_atomic_fixed2_xor_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2383                    0) // __kmpc_atomic_fixed4_andb_cpt
2384 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2385                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2386 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2387                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2388 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2389                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2390 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2391                    0) // __kmpc_atomic_fixed4_orb_cpt
2392 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2393                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2394 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2395                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2396 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2397                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2398 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2399                    0) // __kmpc_atomic_fixed4_xor_cpt
2400 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2401                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2402 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2403                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2404 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2405                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2406 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2407                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2408 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2409                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2410 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2411                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2412 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2413                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2414 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2415                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2416 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2417                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2418 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2419                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2420 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2421                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2422 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2423                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2424 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2425                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2426 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2427 
2428 // CAPTURE routines for mixed types RHS=float16
2429 #if KMP_HAVE_QUAD
2430 
2431 // Beginning of a definition (provides name, parameters, gebug trace)
2432 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2433 //     fixed)
2434 //     OP_ID   - operation identifier (add, sub, mul, ...)
2435 //     TYPE    - operands' type
2436 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2437   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2438       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2439     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2440     KA_TRACE(100,                                                              \
2441              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2442               gtid));
2443 
2444 // -------------------------------------------------------------------------
2445 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2446                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2447   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2448   TYPE new_value;                                                              \
2449   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2450   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2451   }
2452 
2453 // -------------------------------------------------------------------------
2454 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2455                                 LCK_ID, GOMP_FLAG)                             \
2456   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2457   TYPE new_value;                                                              \
2458   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2459   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2460   }
2461 
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2463                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2465                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2467                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2469                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2471                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2473                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2475                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2477                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2478 
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2480                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2482                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2484                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2486                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2488                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2490                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2492                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2494                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2495 
2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2497                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2499                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2501                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2503                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2505                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2507                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2509                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2511                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2512 
2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2514                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2516                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2518                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2520                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2522                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2524                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2526                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2528                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2529 
2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2531                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2533                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2534 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2535                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2536 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2537                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2538 
2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2540                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2542                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2543 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2544                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2545 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2546                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2547 
2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2549                         1) // __kmpc_atomic_float10_add_cpt_fp
2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2551                         1) // __kmpc_atomic_float10_sub_cpt_fp
2552 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2553                         1) // __kmpc_atomic_float10_mul_cpt_fp
2554 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2555                         1) // __kmpc_atomic_float10_div_cpt_fp
2556 
2557 #endif // KMP_HAVE_QUAD
2558 
2559 // ------------------------------------------------------------------------
2560 // Routines for C/C++ Reduction operators && and ||
2561 
2562 // -------------------------------------------------------------------------
2563 // Operation on *lhs, rhs bound by critical section
2564 //     OP     - operator (it's supposed to contain an assignment)
2565 //     LCK_ID - lock identifier
2566 // Note: don't check gtid as it should always be valid
2567 // 1, 2-byte - expect valid parameter, other - check before this macro
2568 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2569   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2570                                                                                \
2571   if (flag) {                                                                  \
2572     new_value OP rhs;                                                          \
2573     (*lhs) = new_value;                                                        \
2574   } else {                                                                     \
2575     new_value = (*lhs);                                                        \
2576     (*lhs) OP rhs;                                                             \
2577   }                                                                            \
2578                                                                                \
2579   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2580 
2581 // ------------------------------------------------------------------------
2582 #ifdef KMP_GOMP_COMPAT
2583 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2584   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2585     KMP_CHECK_GTID;                                                            \
2586     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2587     return new_value;                                                          \
2588   }
2589 #else
2590 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2591 #endif /* KMP_GOMP_COMPAT */
2592 
2593 // ------------------------------------------------------------------------
2594 // Need separate macros for &&, || because there is no combined assignment
2595 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2596   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2597   TYPE new_value;                                                              \
2598   (void)new_value;                                                             \
2599   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2600   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2601   }
2602 
2603 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2604                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2605 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2606                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2607 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2608                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2609 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2610                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2611 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2612                   0) // __kmpc_atomic_fixed4_andl_cpt
2613 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2614                   0) // __kmpc_atomic_fixed4_orl_cpt
2615 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2616                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2617 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2618                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2619 
2620 // -------------------------------------------------------------------------
2621 // Routines for Fortran operators that matched no one in C:
2622 // MAX, MIN, .EQV., .NEQV.
2623 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2624 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2625 
2626 // -------------------------------------------------------------------------
2627 // MIN and MAX need separate macros
2628 // OP - operator to check if we need any actions?
2629 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2630   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2631                                                                                \
2632   if (*lhs OP rhs) { /* still need actions? */                                 \
2633     old_value = *lhs;                                                          \
2634     *lhs = rhs;                                                                \
2635     if (flag)                                                                  \
2636       new_value = rhs;                                                         \
2637     else                                                                       \
2638       new_value = old_value;                                                   \
2639   } else {                                                                     \
2640     new_value = *lhs;                                                          \
2641   }                                                                            \
2642   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2643   return new_value;
2644 
2645 // -------------------------------------------------------------------------
2646 #ifdef KMP_GOMP_COMPAT
2647 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2648   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2649     KMP_CHECK_GTID;                                                            \
2650     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2651   }
2652 #else
2653 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2654 #endif /* KMP_GOMP_COMPAT */
2655 
2656 // -------------------------------------------------------------------------
2657 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2658   {                                                                            \
2659     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2660     /*TYPE old_value; */                                                       \
2661     temp_val = *lhs;                                                           \
2662     old_value = temp_val;                                                      \
2663     while (old_value OP rhs && /* still need actions? */                       \
2664            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2665                (kmp_int##BITS *)lhs,                                           \
2666                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2667                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2668       temp_val = *lhs;                                                         \
2669       old_value = temp_val;                                                    \
2670     }                                                                          \
2671     if (flag)                                                                  \
2672       return rhs;                                                              \
2673     else                                                                       \
2674       return old_value;                                                        \
2675   }
2676 
2677 // -------------------------------------------------------------------------
2678 // 1-byte, 2-byte operands - use critical section
2679 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2680   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2681   TYPE new_value, old_value;                                                   \
2682   if (*lhs OP rhs) { /* need actions? */                                       \
2683     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2684     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2685   }                                                                            \
2686   return *lhs;                                                                 \
2687   }
2688 
2689 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2690   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2691   TYPE new_value, old_value;                                                   \
2692   (void)new_value;                                                             \
2693   if (*lhs OP rhs) {                                                           \
2694     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2695     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2696   }                                                                            \
2697   return *lhs;                                                                 \
2698   }
2699 
2700 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2701                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2702 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2703                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2704 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2705                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2706 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2707                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2708 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2709                      0) // __kmpc_atomic_fixed4_max_cpt
2710 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2711                      0) // __kmpc_atomic_fixed4_min_cpt
2712 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2713                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2714 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2715                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2716 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2717                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2718 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2719                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2720 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2721                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2722 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2723                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2724 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2725                      1) // __kmpc_atomic_float10_max_cpt
2726 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2727                      1) // __kmpc_atomic_float10_min_cpt
2728 #if KMP_HAVE_QUAD
2729 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2730                      1) // __kmpc_atomic_float16_max_cpt
2731 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2732                      1) // __kmpc_atomic_float16_min_cpt
2733 #if (KMP_ARCH_X86)
2734 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2735                      1) // __kmpc_atomic_float16_max_a16_cpt
2736 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2737                      1) // __kmpc_atomic_float16_mix_a16_cpt
2738 #endif // (KMP_ARCH_X86)
2739 #endif // KMP_HAVE_QUAD
2740 
2741 // ------------------------------------------------------------------------
2742 #ifdef KMP_GOMP_COMPAT
2743 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2744   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2745     KMP_CHECK_GTID;                                                            \
2746     OP_CRITICAL_CPT(OP, 0);                                                    \
2747   }
2748 #else
2749 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2750 #endif /* KMP_GOMP_COMPAT */
2751 // ------------------------------------------------------------------------
2752 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2753   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2754   TYPE new_value;                                                              \
2755   (void)new_value;                                                             \
2756   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2757   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2758   }
2759 
2760 // ------------------------------------------------------------------------
2761 
2762 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2763                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2764 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2765                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2766 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2767                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2768 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2769                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2770 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2771                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2772 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2773                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2774 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2775                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2776 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2777                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2778 
2779 // ------------------------------------------------------------------------
2780 // Routines for Extended types: long double, _Quad, complex flavours (use
2781 // critical section)
2782 //     TYPE_ID, OP_ID, TYPE - detailed above
2783 //     OP      - operator
2784 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2785 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2786   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2787   TYPE new_value;                                                              \
2788   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2789   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2790   }
2791 
2792 // ------------------------------------------------------------------------
2793 // Workaround for cmplx4. Regular routines with return value don't work
2794 // on Win_32e. Let's return captured values through the additional parameter.
2795 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2796   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2797                                                                                \
2798   if (flag) {                                                                  \
2799     (*lhs) OP rhs;                                                             \
2800     (*out) = (*lhs);                                                           \
2801   } else {                                                                     \
2802     (*out) = (*lhs);                                                           \
2803     (*lhs) OP rhs;                                                             \
2804   }                                                                            \
2805                                                                                \
2806   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2807   return;
2808 // ------------------------------------------------------------------------
2809 
2810 #ifdef KMP_GOMP_COMPAT
2811 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2812   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2813     KMP_CHECK_GTID;                                                            \
2814     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2815   }
2816 #else
2817 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2818 #endif /* KMP_GOMP_COMPAT */
2819 // ------------------------------------------------------------------------
2820 
2821 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2822   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2823                                          TYPE rhs, TYPE *out, int flag) {      \
2824     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2825     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2826 // ------------------------------------------------------------------------
2827 
2828 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2829   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2830   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2831   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2832   }
2833 // The end of workaround for cmplx4
2834 
2835 /* ------------------------------------------------------------------------- */
2836 // routines for long double type
2837 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2838                     1) // __kmpc_atomic_float10_add_cpt
2839 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2840                     1) // __kmpc_atomic_float10_sub_cpt
2841 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2842                     1) // __kmpc_atomic_float10_mul_cpt
2843 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2844                     1) // __kmpc_atomic_float10_div_cpt
2845 #if KMP_HAVE_QUAD
2846 // routines for _Quad type
2847 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2848                     1) // __kmpc_atomic_float16_add_cpt
2849 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2850                     1) // __kmpc_atomic_float16_sub_cpt
2851 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2852                     1) // __kmpc_atomic_float16_mul_cpt
2853 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2854                     1) // __kmpc_atomic_float16_div_cpt
2855 #if (KMP_ARCH_X86)
2856 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2857                     1) // __kmpc_atomic_float16_add_a16_cpt
2858 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2859                     1) // __kmpc_atomic_float16_sub_a16_cpt
2860 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2861                     1) // __kmpc_atomic_float16_mul_a16_cpt
2862 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2863                     1) // __kmpc_atomic_float16_div_a16_cpt
2864 #endif // (KMP_ARCH_X86)
2865 #endif // KMP_HAVE_QUAD
2866 
2867 // routines for complex types
2868 
2869 // cmplx4 routines to return void
2870 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2871                         1) // __kmpc_atomic_cmplx4_add_cpt
2872 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2873                         1) // __kmpc_atomic_cmplx4_sub_cpt
2874 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2875                         1) // __kmpc_atomic_cmplx4_mul_cpt
2876 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2877                         1) // __kmpc_atomic_cmplx4_div_cpt
2878 
2879 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2880                     1) // __kmpc_atomic_cmplx8_add_cpt
2881 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2882                     1) // __kmpc_atomic_cmplx8_sub_cpt
2883 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2884                     1) // __kmpc_atomic_cmplx8_mul_cpt
2885 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2886                     1) // __kmpc_atomic_cmplx8_div_cpt
2887 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2888                     1) // __kmpc_atomic_cmplx10_add_cpt
2889 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2890                     1) // __kmpc_atomic_cmplx10_sub_cpt
2891 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2892                     1) // __kmpc_atomic_cmplx10_mul_cpt
2893 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2894                     1) // __kmpc_atomic_cmplx10_div_cpt
2895 #if KMP_HAVE_QUAD
2896 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2897                     1) // __kmpc_atomic_cmplx16_add_cpt
2898 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2899                     1) // __kmpc_atomic_cmplx16_sub_cpt
2900 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2901                     1) // __kmpc_atomic_cmplx16_mul_cpt
2902 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2903                     1) // __kmpc_atomic_cmplx16_div_cpt
2904 #if (KMP_ARCH_X86)
2905 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2906                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2907 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2908                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2909 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2910                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2911 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2912                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2913 #endif // (KMP_ARCH_X86)
2914 #endif // KMP_HAVE_QUAD
2915 
2916 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2917 // binop x; v = x; }  for non-commutative operations.
2918 // Supported only on IA-32 architecture and Intel(R) 64
2919 
2920 // -------------------------------------------------------------------------
2921 // Operation on *lhs, rhs bound by critical section
2922 //     OP     - operator (it's supposed to contain an assignment)
2923 //     LCK_ID - lock identifier
2924 // Note: don't check gtid as it should always be valid
2925 // 1, 2-byte - expect valid parameter, other - check before this macro
2926 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2927   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2928                                                                                \
2929   if (flag) {                                                                  \
2930     /*temp_val = (*lhs);*/                                                     \
2931     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2932     new_value = (*lhs);                                                        \
2933   } else {                                                                     \
2934     new_value = (*lhs);                                                        \
2935     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2936   }                                                                            \
2937   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2938   return new_value;
2939 
2940 // ------------------------------------------------------------------------
2941 #ifdef KMP_GOMP_COMPAT
2942 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2943   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2944     KMP_CHECK_GTID;                                                            \
2945     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2946   }
2947 #else
2948 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2949 #endif /* KMP_GOMP_COMPAT */
2950 
2951 // ------------------------------------------------------------------------
2952 // Operation on *lhs, rhs using "compare_and_store" routine
2953 //     TYPE    - operands' type
2954 //     BITS    - size in bits, used to distinguish low level calls
2955 //     OP      - operator
2956 // Note: temp_val introduced in order to force the compiler to read
2957 //       *lhs only once (w/o it the compiler reads *lhs twice)
2958 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2959   {                                                                            \
2960     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2961     TYPE old_value, new_value;                                                 \
2962     temp_val = *lhs;                                                           \
2963     old_value = temp_val;                                                      \
2964     new_value = (TYPE)(rhs OP old_value);                                      \
2965     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2966         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2967         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2968       temp_val = *lhs;                                                         \
2969       old_value = temp_val;                                                    \
2970       new_value = (TYPE)(rhs OP old_value);                                    \
2971     }                                                                          \
2972     if (flag) {                                                                \
2973       return new_value;                                                        \
2974     } else                                                                     \
2975       return old_value;                                                        \
2976   }
2977 
2978 // -------------------------------------------------------------------------
2979 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2980   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2981   TYPE new_value;                                                              \
2982   (void)new_value;                                                             \
2983   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2984   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2985   }
2986 
2987 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2988                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2989 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2990                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2991 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2992                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2993 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2994                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2996                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2998                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3000                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3001 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3002                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3003 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3004                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3005 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3006                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3007 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3008                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3009 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3010                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3011 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3012                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3013 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3014                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3015 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3016                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3017 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3018                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3019 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3020                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3021 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3022                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3023 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3024                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3025 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3026                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3027 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3028                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3029 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3030                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3031 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3032                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3033 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3034                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3035 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3036                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3037 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3038                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3039 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3040                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3041 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3042                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3043 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3044 
3045 // ------------------------------------------------------------------------
3046 // Routines for Extended types: long double, _Quad, complex flavours (use
3047 // critical section)
3048 //     TYPE_ID, OP_ID, TYPE - detailed above
3049 //     OP      - operator
3050 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3051 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3052   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3053   TYPE new_value;                                                              \
3054   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3055   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3056   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3057   }
3058 
3059 /* ------------------------------------------------------------------------- */
3060 // routines for long double type
3061 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3062                         1) // __kmpc_atomic_float10_sub_cpt_rev
3063 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3064                         1) // __kmpc_atomic_float10_div_cpt_rev
3065 #if KMP_HAVE_QUAD
3066 // routines for _Quad type
3067 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3068                         1) // __kmpc_atomic_float16_sub_cpt_rev
3069 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3070                         1) // __kmpc_atomic_float16_div_cpt_rev
3071 #if (KMP_ARCH_X86)
3072 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3073                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3074 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3075                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3076 #endif // (KMP_ARCH_X86)
3077 #endif // KMP_HAVE_QUAD
3078 
3079 // routines for complex types
3080 
3081 // ------------------------------------------------------------------------
3082 // Workaround for cmplx4. Regular routines with return value don't work
3083 // on Win_32e. Let's return captured values through the additional parameter.
3084 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3085   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3086                                                                                \
3087   if (flag) {                                                                  \
3088     (*lhs) = (rhs)OP(*lhs);                                                    \
3089     (*out) = (*lhs);                                                           \
3090   } else {                                                                     \
3091     (*out) = (*lhs);                                                           \
3092     (*lhs) = (rhs)OP(*lhs);                                                    \
3093   }                                                                            \
3094                                                                                \
3095   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3096   return;
3097 // ------------------------------------------------------------------------
3098 
3099 #ifdef KMP_GOMP_COMPAT
3100 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3101   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3102     KMP_CHECK_GTID;                                                            \
3103     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3104   }
3105 #else
3106 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3107 #endif /* KMP_GOMP_COMPAT */
3108 // ------------------------------------------------------------------------
3109 
3110 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3111                                     GOMP_FLAG)                                 \
3112   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3113   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3114   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3115   }
3116 // The end of workaround for cmplx4
3117 
3118 // !!! TODO: check if we need to return void for cmplx4 routines
3119 // cmplx4 routines to return void
3120 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3121                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3122 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3123                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3124 
3125 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3126                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3127 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3128                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3129 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3130                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3131 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3132                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3133 #if KMP_HAVE_QUAD
3134 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3135                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3136 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3137                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3138 #if (KMP_ARCH_X86)
3139 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3140                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3141 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3142                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3143 #endif // (KMP_ARCH_X86)
3144 #endif // KMP_HAVE_QUAD
3145 
3146 // Capture reverse for mixed type: RHS=float16
3147 #if KMP_HAVE_QUAD
3148 
3149 // Beginning of a definition (provides name, parameters, gebug trace)
3150 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3151 //     fixed)
3152 //     OP_ID   - operation identifier (add, sub, mul, ...)
3153 //     TYPE    - operands' type
3154 // -------------------------------------------------------------------------
3155 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3156                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3157   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3158   TYPE new_value;                                                              \
3159   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3160   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3161   }
3162 
3163 // -------------------------------------------------------------------------
3164 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3165                                     LCK_ID, GOMP_FLAG)                         \
3166   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3167   TYPE new_value;                                                              \
3168   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3169   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3170   }
3171 
3172 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3173                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3174 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3175                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3176 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3177                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3179                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3180 
3181 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3182                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3183 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3184                            1,
3185                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3187                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3189                            1,
3190                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3191 
3192 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3193                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3194 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3195                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3197                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3199                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3200 
3201 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3202                            7,
3203                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3205                            8i, 7,
3206                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3207 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3208                            7,
3209                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3210 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3211                            8i, 7,
3212                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3213 
3214 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3215                            4r, 3,
3216                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3217 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3218                            4r, 3,
3219                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3220 
3221 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3222                            8r, 7,
3223                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3224 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3225                            8r, 7,
3226                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3227 
3228 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3229                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3230 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3231                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3232 
3233 #endif // KMP_HAVE_QUAD
3234 
3235 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3236 
3237 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3238   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3239                                      TYPE rhs) {                               \
3240     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3241     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3242 
3243 #define CRITICAL_SWP(LCK_ID)                                                   \
3244   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3245                                                                                \
3246   old_value = (*lhs);                                                          \
3247   (*lhs) = rhs;                                                                \
3248                                                                                \
3249   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3250   return old_value;
3251 
3252 // ------------------------------------------------------------------------
3253 #ifdef KMP_GOMP_COMPAT
3254 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3255   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3256     KMP_CHECK_GTID;                                                            \
3257     CRITICAL_SWP(0);                                                           \
3258   }
3259 #else
3260 #define GOMP_CRITICAL_SWP(FLAG)
3261 #endif /* KMP_GOMP_COMPAT */
3262 
3263 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3264   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3265   TYPE old_value;                                                              \
3266   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3267   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3268   return old_value;                                                            \
3269   }
3270 // ------------------------------------------------------------------------
3271 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3272   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3273   TYPE old_value;                                                              \
3274   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3275   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3276   return old_value;                                                            \
3277   }
3278 
3279 // ------------------------------------------------------------------------
3280 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3281   {                                                                            \
3282     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3283     TYPE old_value, new_value;                                                 \
3284     temp_val = *lhs;                                                           \
3285     old_value = temp_val;                                                      \
3286     new_value = rhs;                                                           \
3287     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3288         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3289         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3290       temp_val = *lhs;                                                         \
3291       old_value = temp_val;                                                    \
3292       new_value = rhs;                                                         \
3293     }                                                                          \
3294     return old_value;                                                          \
3295   }
3296 
3297 // -------------------------------------------------------------------------
3298 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3299   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3300   TYPE old_value;                                                              \
3301   (void)old_value;                                                             \
3302   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3303   CMPXCHG_SWP(TYPE, BITS)                                                      \
3304   }
3305 
3306 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3307 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3308 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3309 
3310 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3311                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3312 
3313 #if (KMP_ARCH_X86)
3314 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3315                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3316 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3317                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3318 #else
3319 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3320 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3321                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3322 #endif // (KMP_ARCH_X86)
3323 
3324 // ------------------------------------------------------------------------
3325 // Routines for Extended types: long double, _Quad, complex flavours (use
3326 // critical section)
3327 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3328   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3329   TYPE old_value;                                                              \
3330   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3331   CRITICAL_SWP(LCK_ID)                                                         \
3332   }
3333 
3334 // ------------------------------------------------------------------------
3335 // !!! TODO: check if we need to return void for cmplx4 routines
3336 // Workaround for cmplx4. Regular routines with return value don't work
3337 // on Win_32e. Let's return captured values through the additional parameter.
3338 
3339 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3340   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3341                                      TYPE rhs, TYPE *out) {                    \
3342     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3343     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3344 
3345 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3346   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3347                                                                                \
3348   tmp = (*lhs);                                                                \
3349   (*lhs) = (rhs);                                                              \
3350   (*out) = tmp;                                                                \
3351   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3352   return;
3353 // ------------------------------------------------------------------------
3354 
3355 #ifdef KMP_GOMP_COMPAT
3356 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3357   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3358     KMP_CHECK_GTID;                                                            \
3359     CRITICAL_SWP_WRK(0);                                                       \
3360   }
3361 #else
3362 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3363 #endif /* KMP_GOMP_COMPAT */
3364 // ------------------------------------------------------------------------
3365 
3366 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3367   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3368   TYPE tmp;                                                                    \
3369   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3370   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3371   }
3372 // The end of workaround for cmplx4
3373 
3374 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3375 #if KMP_HAVE_QUAD
3376 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3377 #endif // KMP_HAVE_QUAD
3378 // cmplx4 routine to return void
3379 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3380 
3381 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3382 // __kmpc_atomic_cmplx4_swp
3383 
3384 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3385 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3386 #if KMP_HAVE_QUAD
3387 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3388 #if (KMP_ARCH_X86)
3389 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3390                     1) // __kmpc_atomic_float16_a16_swp
3391 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3392                     1) // __kmpc_atomic_cmplx16_a16_swp
3393 #endif // (KMP_ARCH_X86)
3394 #endif // KMP_HAVE_QUAD
3395 
3396 // End of OpenMP 4.0 Capture
3397 
3398 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3399 
3400 #undef OP_CRITICAL
3401 
3402 /* ------------------------------------------------------------------------ */
3403 /* Generic atomic routines                                                  */
3404 
3405 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3406                      void (*f)(void *, void *, void *)) {
3407   KMP_DEBUG_ASSERT(__kmp_init_serial);
3408 
3409   if (
3410 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3411       FALSE /* must use lock */
3412 #else
3413       TRUE
3414 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3415   ) {
3416     kmp_int8 old_value, new_value;
3417 
3418     old_value = *(kmp_int8 *)lhs;
3419     (*f)(&new_value, &old_value, rhs);
3420 
3421     /* TODO: Should this be acquire or release? */
3422     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3423                                        *(kmp_int8 *)&new_value)) {
3424       KMP_CPU_PAUSE();
3425 
3426       old_value = *(kmp_int8 *)lhs;
3427       (*f)(&new_value, &old_value, rhs);
3428     }
3429 
3430     return;
3431   } else {
3432     // All 1-byte data is of integer data type.
3433 
3434 #ifdef KMP_GOMP_COMPAT
3435     if (__kmp_atomic_mode == 2) {
3436       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3437     } else
3438 #endif /* KMP_GOMP_COMPAT */
3439       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3440 
3441     (*f)(lhs, lhs, rhs);
3442 
3443 #ifdef KMP_GOMP_COMPAT
3444     if (__kmp_atomic_mode == 2) {
3445       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3446     } else
3447 #endif /* KMP_GOMP_COMPAT */
3448       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3449   }
3450 }
3451 
3452 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3453                      void (*f)(void *, void *, void *)) {
3454   if (
3455 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3456       FALSE /* must use lock */
3457 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3458       TRUE /* no alignment problems */
3459 #else
3460       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3461 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3462   ) {
3463     kmp_int16 old_value, new_value;
3464 
3465     old_value = *(kmp_int16 *)lhs;
3466     (*f)(&new_value, &old_value, rhs);
3467 
3468     /* TODO: Should this be acquire or release? */
3469     while (!KMP_COMPARE_AND_STORE_ACQ16(
3470         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3471       KMP_CPU_PAUSE();
3472 
3473       old_value = *(kmp_int16 *)lhs;
3474       (*f)(&new_value, &old_value, rhs);
3475     }
3476 
3477     return;
3478   } else {
3479     // All 2-byte data is of integer data type.
3480 
3481 #ifdef KMP_GOMP_COMPAT
3482     if (__kmp_atomic_mode == 2) {
3483       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3484     } else
3485 #endif /* KMP_GOMP_COMPAT */
3486       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3487 
3488     (*f)(lhs, lhs, rhs);
3489 
3490 #ifdef KMP_GOMP_COMPAT
3491     if (__kmp_atomic_mode == 2) {
3492       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3493     } else
3494 #endif /* KMP_GOMP_COMPAT */
3495       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3496   }
3497 }
3498 
3499 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3500                      void (*f)(void *, void *, void *)) {
3501   KMP_DEBUG_ASSERT(__kmp_init_serial);
3502 
3503   if (
3504 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3505 // Gomp compatibility is broken if this routine is called for floats.
3506 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3507       TRUE /* no alignment problems */
3508 #else
3509       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3510 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3511   ) {
3512     kmp_int32 old_value, new_value;
3513 
3514     old_value = *(kmp_int32 *)lhs;
3515     (*f)(&new_value, &old_value, rhs);
3516 
3517     /* TODO: Should this be acquire or release? */
3518     while (!KMP_COMPARE_AND_STORE_ACQ32(
3519         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3520       KMP_CPU_PAUSE();
3521 
3522       old_value = *(kmp_int32 *)lhs;
3523       (*f)(&new_value, &old_value, rhs);
3524     }
3525 
3526     return;
3527   } else {
3528     // Use __kmp_atomic_lock_4i for all 4-byte data,
3529     // even if it isn't of integer data type.
3530 
3531 #ifdef KMP_GOMP_COMPAT
3532     if (__kmp_atomic_mode == 2) {
3533       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3534     } else
3535 #endif /* KMP_GOMP_COMPAT */
3536       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3537 
3538     (*f)(lhs, lhs, rhs);
3539 
3540 #ifdef KMP_GOMP_COMPAT
3541     if (__kmp_atomic_mode == 2) {
3542       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3543     } else
3544 #endif /* KMP_GOMP_COMPAT */
3545       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3546   }
3547 }
3548 
3549 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3550                      void (*f)(void *, void *, void *)) {
3551   KMP_DEBUG_ASSERT(__kmp_init_serial);
3552   if (
3553 
3554 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3555       FALSE /* must use lock */
3556 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3557       TRUE /* no alignment problems */
3558 #else
3559       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3560 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3561   ) {
3562     kmp_int64 old_value, new_value;
3563 
3564     old_value = *(kmp_int64 *)lhs;
3565     (*f)(&new_value, &old_value, rhs);
3566     /* TODO: Should this be acquire or release? */
3567     while (!KMP_COMPARE_AND_STORE_ACQ64(
3568         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3569       KMP_CPU_PAUSE();
3570 
3571       old_value = *(kmp_int64 *)lhs;
3572       (*f)(&new_value, &old_value, rhs);
3573     }
3574 
3575     return;
3576   } else {
3577     // Use __kmp_atomic_lock_8i for all 8-byte data,
3578     // even if it isn't of integer data type.
3579 
3580 #ifdef KMP_GOMP_COMPAT
3581     if (__kmp_atomic_mode == 2) {
3582       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3583     } else
3584 #endif /* KMP_GOMP_COMPAT */
3585       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3586 
3587     (*f)(lhs, lhs, rhs);
3588 
3589 #ifdef KMP_GOMP_COMPAT
3590     if (__kmp_atomic_mode == 2) {
3591       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3592     } else
3593 #endif /* KMP_GOMP_COMPAT */
3594       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3595   }
3596 }
3597 
3598 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3599                       void (*f)(void *, void *, void *)) {
3600   KMP_DEBUG_ASSERT(__kmp_init_serial);
3601 
3602 #ifdef KMP_GOMP_COMPAT
3603   if (__kmp_atomic_mode == 2) {
3604     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3605   } else
3606 #endif /* KMP_GOMP_COMPAT */
3607     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3608 
3609   (*f)(lhs, lhs, rhs);
3610 
3611 #ifdef KMP_GOMP_COMPAT
3612   if (__kmp_atomic_mode == 2) {
3613     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3614   } else
3615 #endif /* KMP_GOMP_COMPAT */
3616     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3617 }
3618 
3619 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3620                       void (*f)(void *, void *, void *)) {
3621   KMP_DEBUG_ASSERT(__kmp_init_serial);
3622 
3623 #ifdef KMP_GOMP_COMPAT
3624   if (__kmp_atomic_mode == 2) {
3625     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3626   } else
3627 #endif /* KMP_GOMP_COMPAT */
3628     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3629 
3630   (*f)(lhs, lhs, rhs);
3631 
3632 #ifdef KMP_GOMP_COMPAT
3633   if (__kmp_atomic_mode == 2) {
3634     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3635   } else
3636 #endif /* KMP_GOMP_COMPAT */
3637     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3638 }
3639 
3640 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3641                       void (*f)(void *, void *, void *)) {
3642   KMP_DEBUG_ASSERT(__kmp_init_serial);
3643 
3644 #ifdef KMP_GOMP_COMPAT
3645   if (__kmp_atomic_mode == 2) {
3646     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3647   } else
3648 #endif /* KMP_GOMP_COMPAT */
3649     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3650 
3651   (*f)(lhs, lhs, rhs);
3652 
3653 #ifdef KMP_GOMP_COMPAT
3654   if (__kmp_atomic_mode == 2) {
3655     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3656   } else
3657 #endif /* KMP_GOMP_COMPAT */
3658     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3659 }
3660 
3661 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3662                       void (*f)(void *, void *, void *)) {
3663   KMP_DEBUG_ASSERT(__kmp_init_serial);
3664 
3665 #ifdef KMP_GOMP_COMPAT
3666   if (__kmp_atomic_mode == 2) {
3667     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3668   } else
3669 #endif /* KMP_GOMP_COMPAT */
3670     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3671 
3672   (*f)(lhs, lhs, rhs);
3673 
3674 #ifdef KMP_GOMP_COMPAT
3675   if (__kmp_atomic_mode == 2) {
3676     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3677   } else
3678 #endif /* KMP_GOMP_COMPAT */
3679     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3680 }
3681 
3682 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3683 // compiler; duplicated in order to not use 3-party names in pure Intel code
3684 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3685 void __kmpc_atomic_start(void) {
3686   int gtid = __kmp_entry_gtid();
3687   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3688   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3689 }
3690 
3691 void __kmpc_atomic_end(void) {
3692   int gtid = __kmp_get_gtid();
3693   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3694   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3695 }
3696 
3697 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3698 
3699 // OpenMP 5.1 compare and swap
3700 
3701 /*!
3702 @param loc Source code location
3703 @param gtid Global thread id
3704 @param x Memory location to operate on
3705 @param e Expected value
3706 @param d Desired value
3707 @return Result of comparison
3708 
3709 Implements Compare And Swap atomic operation.
3710 
3711 Sample code:
3712 #pragma omp atomic compare update capture
3713   { r = x == e; if(r) { x = d; } }
3714 */
3715 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3716   return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3717 }
3718 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3719                               short d) {
3720   return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3721 }
3722 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3723                               kmp_int32 d) {
3724   return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3725 }
3726 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3727                               kmp_int64 d) {
3728   return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3729 }
3730 
3731 /*!
3732 @param loc Source code location
3733 @param gtid Global thread id
3734 @param x Memory location to operate on
3735 @param e Expected value
3736 @param d Desired value
3737 @return Old value of x
3738 
3739 Implements Compare And Swap atomic operation.
3740 
3741 Sample code:
3742 #pragma omp atomic compare update capture
3743   { v = x; if (x == e) { x = d; } }
3744 */
3745 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3746   return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3747 }
3748 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3749                               short d) {
3750   return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3751 }
3752 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3753                                   kmp_int32 e, kmp_int32 d) {
3754   return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3755 }
3756 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3757                                   kmp_int64 e, kmp_int64 d) {
3758   return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3759 }
3760 
3761 /*!
3762 @param loc Source code location
3763 @param gtid Global thread id
3764 @param x Memory location to operate on
3765 @param e Expected value
3766 @param d Desired value
3767 @param pv Captured value location
3768 @return Result of comparison
3769 
3770 Implements Compare And Swap + Capture atomic operation.
3771 
3772 v gets old valie of x if comparison failed, untouched otherwise.
3773 Sample code:
3774 #pragma omp atomic compare update capture
3775   { r = x == e; if(r) { x = d; } else { v = x; } }
3776 */
3777 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3778                                   char d, char *pv) {
3779   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3780   if (old == e)
3781     return true;
3782   KMP_ASSERT(pv != NULL);
3783   *pv = old;
3784   return false;
3785 }
3786 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3787                                   short d, short *pv) {
3788   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3789   if (old == e)
3790     return true;
3791   KMP_ASSERT(pv != NULL);
3792   *pv = old;
3793   return false;
3794 }
3795 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3796                                   kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3797   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3798   if (old == e)
3799     return true;
3800   KMP_ASSERT(pv != NULL);
3801   *pv = old;
3802   return false;
3803 }
3804 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3805                                   kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3806   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3807   if (old == e)
3808     return true;
3809   KMP_ASSERT(pv != NULL);
3810   *pv = old;
3811   return false;
3812 }
3813 
3814 /*!
3815 @param loc Source code location
3816 @param gtid Global thread id
3817 @param x Memory location to operate on
3818 @param e Expected value
3819 @param d Desired value
3820 @param pv Captured value location
3821 @return Old value of x
3822 
3823 Implements Compare And Swap + Capture atomic operation.
3824 
3825 v gets new valie of x.
3826 Sample code:
3827 #pragma omp atomic compare update capture
3828   { if (x == e) { x = d; }; v = x; }
3829 */
3830 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3831                                  char d, char *pv) {
3832   char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3833   KMP_ASSERT(pv != NULL);
3834   *pv = old == e ? d : old;
3835   return old;
3836 }
3837 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3838                                   short d, short *pv) {
3839   short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3840   KMP_ASSERT(pv != NULL);
3841   *pv = old == e ? d : old;
3842   return old;
3843 }
3844 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3845                                       kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3846   kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3847   KMP_ASSERT(pv != NULL);
3848   *pv = old == e ? d : old;
3849   return old;
3850 }
3851 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3852                                       kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3853   kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3854   KMP_ASSERT(pv != NULL);
3855   *pv = old == e ? d : old;
3856   return old;
3857 }
3858 
3859 // End OpenMP 5.1 compare + capture
3860 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3861 
3862 /*!
3863 @}
3864 */
3865 
3866 // end of file
3867