1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functions for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 return lhs.q + rhs.q; 611 } 612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 return lhs.q - rhs.q; 614 } 615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 return lhs.q * rhs.q; 617 } 618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 return lhs.q / rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 return lhs.q + rhs.q; 630 } 631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 return lhs.q - rhs.q; 633 } 634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 return lhs.q * rhs.q; 636 } 637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 return lhs.q / rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs, 648 kmp_cmplx128_a4_t &rhs) { 649 return lhs.q + rhs.q; 650 } 651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs, 652 kmp_cmplx128_a4_t &rhs) { 653 return lhs.q - rhs.q; 654 } 655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs, 656 kmp_cmplx128_a4_t &rhs) { 657 return lhs.q * rhs.q; 658 } 659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs, 660 kmp_cmplx128_a4_t &rhs) { 661 return lhs.q / rhs.q; 662 } 663 664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 return lhs.q + rhs.q; 667 } 668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 return lhs.q - rhs.q; 671 } 672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 return lhs.q * rhs.q; 675 } 676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, 677 kmp_cmplx128_a16_t &rhs) { 678 return lhs.q / rhs.q; 679 } 680 681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 682 683 // ATOMIC implementation routines ----------------------------------------- 684 // One routine for each operation and operand type. 685 // All routines declarations looks like 686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 687 688 #define KMP_CHECK_GTID \ 689 if (gtid == KMP_GTID_UNKNOWN) { \ 690 gtid = __kmp_entry_gtid(); \ 691 } // check and get gtid when needed 692 693 // Beginning of a definition (provides name, parameters, gebug trace) 694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 695 // fixed) 696 // OP_ID - operation identifier (add, sub, mul, ...) 697 // TYPE - operands' type 698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 700 TYPE *lhs, TYPE rhs) { \ 701 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 703 704 // ------------------------------------------------------------------------ 705 // Lock variables used for critical sections for various size operands 706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 719 720 // ------------------------------------------------------------------------ 721 // Operation on *lhs, rhs bound by critical section 722 // OP - operator (it's supposed to contain an assignment) 723 // LCK_ID - lock identifier 724 // Note: don't check gtid as it should always be valid 725 // 1, 2-byte - expect valid parameter, other - check before this macro 726 #define OP_CRITICAL(OP, LCK_ID) \ 727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 728 \ 729 (*lhs) OP(rhs); \ 730 \ 731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 732 733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 735 (*lhs) = (TYPE)((*lhs)OP rhs); \ 736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 737 738 // ------------------------------------------------------------------------ 739 // For GNU compatibility, we may need to use a critical section, 740 // even though it is not required by the ISA. 741 // 742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 744 // critical section. On Intel(R) 64, all atomic operations are done with fetch 745 // and add or compare and exchange. Therefore, the FLAG parameter to this 746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 747 // require a critical section, where we predict that they will be implemented 748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 749 // 750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 751 // the FLAG parameter should always be 1. If we know that we will be using 752 // a critical section, then we want to make certain that we use the generic 753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 754 // locks that are specialized based upon the size or type of the data. 755 // 756 // If FLAG is 0, then we are relying on dead code elimination by the build 757 // compiler to get rid of the useless block of code, and save a needless 758 // branch at runtime. 759 760 #ifdef KMP_GOMP_COMPAT 761 #define OP_GOMP_CRITICAL(OP, FLAG) \ 762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 763 KMP_CHECK_GTID; \ 764 OP_CRITICAL(OP, 0); \ 765 return; \ 766 } 767 768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \ 769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 770 KMP_CHECK_GTID; \ 771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \ 772 return; \ 773 } 774 #else 775 #define OP_GOMP_CRITICAL(OP, FLAG) 776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) 777 #endif /* KMP_GOMP_COMPAT */ 778 779 #if KMP_MIC 780 #define KMP_DO_PAUSE _mm_delay_32(1) 781 #else 782 #define KMP_DO_PAUSE 783 #endif /* KMP_MIC */ 784 785 // ------------------------------------------------------------------------ 786 // Operation on *lhs, rhs using "compare_and_store" routine 787 // TYPE - operands' type 788 // BITS - size in bits, used to distinguish low level calls 789 // OP - operator 790 #define OP_CMPXCHG(TYPE, BITS, OP) \ 791 { \ 792 TYPE old_value, new_value; \ 793 old_value = *(TYPE volatile *)lhs; \ 794 new_value = (TYPE)(old_value OP rhs); \ 795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 798 KMP_DO_PAUSE; \ 799 \ 800 old_value = *(TYPE volatile *)lhs; \ 801 new_value = (TYPE)(old_value OP rhs); \ 802 } \ 803 } 804 805 #if USE_CMPXCHG_FIX 806 // 2007-06-25: 807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 811 // the workaround. 812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 813 { \ 814 struct _sss { \ 815 TYPE cmp; \ 816 kmp_int##BITS *vvv; \ 817 }; \ 818 struct _sss old_value, new_value; \ 819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 826 KMP_DO_PAUSE; \ 827 \ 828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 830 } \ 831 } 832 // end of the first part of the workaround for C78287 833 #endif // USE_CMPXCHG_FIX 834 835 #if KMP_OS_WINDOWS && KMP_ARCH_AARCH64 836 // Undo explicit type casts to get MSVC ARM64 to build. Uses 837 // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG 838 #undef OP_CMPXCHG 839 #define OP_CMPXCHG(TYPE, BITS, OP) \ 840 { \ 841 struct _sss { \ 842 TYPE cmp; \ 843 kmp_int##BITS *vvv; \ 844 }; \ 845 struct _sss old_value, new_value; \ 846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 849 new_value.cmp = old_value.cmp OP rhs; \ 850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 853 KMP_DO_PAUSE; \ 854 \ 855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 856 new_value.cmp = old_value.cmp OP rhs; \ 857 } \ 858 } 859 860 #undef OP_UPDATE_CRITICAL 861 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 863 (*lhs) = (*lhs)OP rhs; \ 864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 865 866 #endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64 867 868 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 869 870 // ------------------------------------------------------------------------ 871 // X86 or X86_64: no alignment problems ==================================== 872 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 873 GOMP_FLAG) \ 874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 878 } 879 // ------------------------------------------------------------------------- 880 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 881 GOMP_FLAG) \ 882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 884 OP_CMPXCHG(TYPE, BITS, OP) \ 885 } 886 #if USE_CMPXCHG_FIX 887 // ------------------------------------------------------------------------- 888 // workaround for C78287 (complex(kind=4) data type) 889 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 890 MASK, GOMP_FLAG) \ 891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 894 } 895 // end of the second part of the workaround for C78287 896 #endif // USE_CMPXCHG_FIX 897 898 #else 899 // ------------------------------------------------------------------------- 900 // Code for other architectures that don't handle unaligned accesses. 901 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 902 GOMP_FLAG) \ 903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 908 } else { \ 909 KMP_CHECK_GTID; \ 910 OP_UPDATE_CRITICAL(TYPE, OP, \ 911 LCK_ID) /* unaligned address - use critical */ \ 912 } \ 913 } 914 // ------------------------------------------------------------------------- 915 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 916 GOMP_FLAG) \ 917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 921 } else { \ 922 KMP_CHECK_GTID; \ 923 OP_UPDATE_CRITICAL(TYPE, OP, \ 924 LCK_ID) /* unaligned address - use critical */ \ 925 } \ 926 } 927 #if USE_CMPXCHG_FIX 928 // ------------------------------------------------------------------------- 929 // workaround for C78287 (complex(kind=4) data type) 930 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 931 MASK, GOMP_FLAG) \ 932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 936 } else { \ 937 KMP_CHECK_GTID; \ 938 OP_UPDATE_CRITICAL(TYPE, OP, \ 939 LCK_ID) /* unaligned address - use critical */ \ 940 } \ 941 } 942 // end of the second part of the workaround for C78287 943 #endif // USE_CMPXCHG_FIX 944 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 945 946 // Routines for ATOMIC 4-byte operands addition and subtraction 947 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 948 0) // __kmpc_atomic_fixed4_add 949 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 950 0) // __kmpc_atomic_fixed4_sub 951 952 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 953 KMP_ARCH_X86) // __kmpc_atomic_float4_add 954 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 956 957 // Routines for ATOMIC 8-byte operands addition and subtraction 958 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 960 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 962 963 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 964 KMP_ARCH_X86) // __kmpc_atomic_float8_add 965 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 967 968 // ------------------------------------------------------------------------ 969 // Entries definition for integer operands 970 // TYPE_ID - operands type and size (fixed4, float4) 971 // OP_ID - operation identifier (add, sub, mul, ...) 972 // TYPE - operand type 973 // BITS - size in bits, used to distinguish low level calls 974 // OP - operator (used in critical section) 975 // LCK_ID - lock identifier, used to possibly distinguish lock variable 976 // MASK - used for alignment check 977 978 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 979 // ------------------------------------------------------------------------ 980 // Routines for ATOMIC integer operands, other operators 981 // ------------------------------------------------------------------------ 982 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 983 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 985 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 986 0) // __kmpc_atomic_fixed1_andb 987 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 989 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 991 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 993 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 994 0) // __kmpc_atomic_fixed1_orb 995 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 997 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 999 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 1001 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 1003 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 1004 0) // __kmpc_atomic_fixed1_xor 1005 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 1007 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 1008 0) // __kmpc_atomic_fixed2_andb 1009 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 1011 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 1013 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 1015 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 1016 0) // __kmpc_atomic_fixed2_orb 1017 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 1019 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 1021 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 1023 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 1025 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 1026 0) // __kmpc_atomic_fixed2_xor 1027 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 1028 0) // __kmpc_atomic_fixed4_andb 1029 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 1031 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 1033 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 1035 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 1036 0) // __kmpc_atomic_fixed4_orb 1037 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 1039 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 1041 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 1043 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 1044 0) // __kmpc_atomic_fixed4_xor 1045 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 1047 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 1049 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 1051 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1053 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1055 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1057 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1059 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1061 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1063 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1065 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1067 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1069 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1071 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1072 1073 /* ------------------------------------------------------------------------ */ 1074 /* Routines for C/C++ Reduction operators && and || */ 1075 1076 // ------------------------------------------------------------------------ 1077 // Need separate macros for &&, || because there is no combined assignment 1078 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1079 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1082 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1083 } 1084 1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1086 1087 // ------------------------------------------------------------------------ 1088 // X86 or X86_64: no alignment problems =================================== 1089 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1092 OP_CMPXCHG(TYPE, BITS, OP) \ 1093 } 1094 1095 #else 1096 // ------------------------------------------------------------------------ 1097 // Code for other architectures that don't handle unaligned accesses. 1098 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1103 } else { \ 1104 KMP_CHECK_GTID; \ 1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1106 } \ 1107 } 1108 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1109 1110 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1112 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1114 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1116 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1118 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1119 0) // __kmpc_atomic_fixed4_andl 1120 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1121 0) // __kmpc_atomic_fixed4_orl 1122 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1124 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1126 1127 /* ------------------------------------------------------------------------- */ 1128 /* Routines for Fortran operators that matched no one in C: */ 1129 /* MAX, MIN, .EQV., .NEQV. */ 1130 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1131 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1132 1133 // ------------------------------------------------------------------------- 1134 // MIN and MAX need separate macros 1135 // OP - operator to check if we need any actions? 1136 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1138 \ 1139 if (*lhs OP rhs) { /* still need actions? */ \ 1140 *lhs = rhs; \ 1141 } \ 1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1143 1144 // ------------------------------------------------------------------------- 1145 #ifdef KMP_GOMP_COMPAT 1146 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1148 KMP_CHECK_GTID; \ 1149 MIN_MAX_CRITSECT(OP, 0); \ 1150 return; \ 1151 } 1152 #else 1153 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1154 #endif /* KMP_GOMP_COMPAT */ 1155 1156 // ------------------------------------------------------------------------- 1157 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1158 { \ 1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1160 TYPE old_value; \ 1161 temp_val = *lhs; \ 1162 old_value = temp_val; \ 1163 while (old_value OP rhs && /* still need actions? */ \ 1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1165 (kmp_int##BITS *)lhs, \ 1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1168 temp_val = *lhs; \ 1169 old_value = temp_val; \ 1170 } \ 1171 } 1172 1173 // ------------------------------------------------------------------------- 1174 // 1-byte, 2-byte operands - use critical section 1175 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1177 if (*lhs OP rhs) { /* need actions? */ \ 1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1179 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1180 } \ 1181 } 1182 1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1184 1185 // ------------------------------------------------------------------------- 1186 // X86 or X86_64: no alignment problems ==================================== 1187 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1188 GOMP_FLAG) \ 1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1190 if (*lhs OP rhs) { \ 1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1193 } \ 1194 } 1195 1196 #else 1197 // ------------------------------------------------------------------------- 1198 // Code for other architectures that don't handle unaligned accesses. 1199 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1200 GOMP_FLAG) \ 1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1202 if (*lhs OP rhs) { \ 1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1206 } else { \ 1207 KMP_CHECK_GTID; \ 1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1209 } \ 1210 } \ 1211 } 1212 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1213 1214 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1216 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1218 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1220 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1222 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1223 0) // __kmpc_atomic_fixed4_max 1224 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1225 0) // __kmpc_atomic_fixed4_min 1226 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1228 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1230 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1232 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1234 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1236 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1238 MIN_MAX_CRITICAL(float10, max, long double, <, 10r, 1239 1) // __kmpc_atomic_float10_max 1240 MIN_MAX_CRITICAL(float10, min, long double, >, 10r, 1241 1) // __kmpc_atomic_float10_min 1242 #if KMP_HAVE_QUAD 1243 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1244 1) // __kmpc_atomic_float16_max 1245 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1246 1) // __kmpc_atomic_float16_min 1247 #if (KMP_ARCH_X86) 1248 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1249 1) // __kmpc_atomic_float16_max_a16 1250 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1251 1) // __kmpc_atomic_float16_min_a16 1252 #endif // (KMP_ARCH_X86) 1253 #endif // KMP_HAVE_QUAD 1254 // ------------------------------------------------------------------------ 1255 // Need separate macros for .EQV. because of the need of complement (~) 1256 // OP ignored for critical sections, ^=~ used instead 1257 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1259 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1260 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \ 1261 } 1262 1263 // ------------------------------------------------------------------------ 1264 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1265 // ------------------------------------------------------------------------ 1266 // X86 or X86_64: no alignment problems =================================== 1267 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1268 GOMP_FLAG) \ 1269 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1270 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1271 OP_CMPXCHG(TYPE, BITS, OP) \ 1272 } 1273 // ------------------------------------------------------------------------ 1274 #else 1275 // ------------------------------------------------------------------------ 1276 // Code for other architectures that don't handle unaligned accesses. 1277 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1278 GOMP_FLAG) \ 1279 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1280 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \ 1281 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1282 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1283 } else { \ 1284 KMP_CHECK_GTID; \ 1285 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \ 1286 } \ 1287 } 1288 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1289 1290 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1291 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1292 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1293 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1294 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1295 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1296 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1297 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1298 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1299 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1300 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1301 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1302 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1303 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1304 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1305 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1306 1307 // ------------------------------------------------------------------------ 1308 // Routines for Extended types: long double, _Quad, complex flavours (use 1309 // critical section) 1310 // TYPE_ID, OP_ID, TYPE - detailed above 1311 // OP - operator 1312 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1313 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1314 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1315 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1316 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1317 } 1318 1319 /* ------------------------------------------------------------------------- */ 1320 // routines for long double type 1321 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1322 1) // __kmpc_atomic_float10_add 1323 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1324 1) // __kmpc_atomic_float10_sub 1325 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1326 1) // __kmpc_atomic_float10_mul 1327 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1328 1) // __kmpc_atomic_float10_div 1329 #if KMP_HAVE_QUAD 1330 // routines for _Quad type 1331 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1332 1) // __kmpc_atomic_float16_add 1333 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1334 1) // __kmpc_atomic_float16_sub 1335 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1336 1) // __kmpc_atomic_float16_mul 1337 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1338 1) // __kmpc_atomic_float16_div 1339 #if (KMP_ARCH_X86) 1340 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1341 1) // __kmpc_atomic_float16_add_a16 1342 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1343 1) // __kmpc_atomic_float16_sub_a16 1344 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1345 1) // __kmpc_atomic_float16_mul_a16 1346 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1347 1) // __kmpc_atomic_float16_div_a16 1348 #endif // (KMP_ARCH_X86) 1349 #endif // KMP_HAVE_QUAD 1350 // routines for complex types 1351 1352 #if USE_CMPXCHG_FIX 1353 // workaround for C78287 (complex(kind=4) data type) 1354 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1355 1) // __kmpc_atomic_cmplx4_add 1356 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1357 1) // __kmpc_atomic_cmplx4_sub 1358 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1359 1) // __kmpc_atomic_cmplx4_mul 1360 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1361 1) // __kmpc_atomic_cmplx4_div 1362 // end of the workaround for C78287 1363 #else 1364 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1365 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1366 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1367 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1368 #endif // USE_CMPXCHG_FIX 1369 1370 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1371 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1372 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1373 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1374 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1375 1) // __kmpc_atomic_cmplx10_add 1376 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1377 1) // __kmpc_atomic_cmplx10_sub 1378 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1379 1) // __kmpc_atomic_cmplx10_mul 1380 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1381 1) // __kmpc_atomic_cmplx10_div 1382 #if KMP_HAVE_QUAD 1383 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1384 1) // __kmpc_atomic_cmplx16_add 1385 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1386 1) // __kmpc_atomic_cmplx16_sub 1387 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1388 1) // __kmpc_atomic_cmplx16_mul 1389 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1390 1) // __kmpc_atomic_cmplx16_div 1391 #if (KMP_ARCH_X86) 1392 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1393 1) // __kmpc_atomic_cmplx16_add_a16 1394 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1395 1) // __kmpc_atomic_cmplx16_sub_a16 1396 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1397 1) // __kmpc_atomic_cmplx16_mul_a16 1398 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1399 1) // __kmpc_atomic_cmplx16_div_a16 1400 #endif // (KMP_ARCH_X86) 1401 #endif // KMP_HAVE_QUAD 1402 1403 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1404 // Supported only on IA-32 architecture and Intel(R) 64 1405 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1406 1407 // ------------------------------------------------------------------------ 1408 // Operation on *lhs, rhs bound by critical section 1409 // OP - operator (it's supposed to contain an assignment) 1410 // LCK_ID - lock identifier 1411 // Note: don't check gtid as it should always be valid 1412 // 1, 2-byte - expect valid parameter, other - check before this macro 1413 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1414 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1415 \ 1416 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 1417 \ 1418 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1419 1420 #ifdef KMP_GOMP_COMPAT 1421 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \ 1422 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1423 KMP_CHECK_GTID; \ 1424 OP_CRITICAL_REV(TYPE, OP, 0); \ 1425 return; \ 1426 } 1427 1428 #else 1429 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) 1430 #endif /* KMP_GOMP_COMPAT */ 1431 1432 // Beginning of a definition (provides name, parameters, gebug trace) 1433 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1434 // fixed) 1435 // OP_ID - operation identifier (add, sub, mul, ...) 1436 // TYPE - operands' type 1437 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1438 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1439 TYPE *lhs, TYPE rhs) { \ 1440 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1441 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1442 1443 // ------------------------------------------------------------------------ 1444 // Operation on *lhs, rhs using "compare_and_store" routine 1445 // TYPE - operands' type 1446 // BITS - size in bits, used to distinguish low level calls 1447 // OP - operator 1448 // Note: temp_val introduced in order to force the compiler to read 1449 // *lhs only once (w/o it the compiler reads *lhs twice) 1450 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1451 { \ 1452 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1453 TYPE old_value, new_value; \ 1454 temp_val = *lhs; \ 1455 old_value = temp_val; \ 1456 new_value = (TYPE)(rhs OP old_value); \ 1457 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1458 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1459 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1460 KMP_DO_PAUSE; \ 1461 \ 1462 temp_val = *lhs; \ 1463 old_value = temp_val; \ 1464 new_value = (TYPE)(rhs OP old_value); \ 1465 } \ 1466 } 1467 1468 // ------------------------------------------------------------------------- 1469 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1470 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1471 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1472 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1473 } 1474 1475 // ------------------------------------------------------------------------ 1476 // Entries definition for integer operands 1477 // TYPE_ID - operands type and size (fixed4, float4) 1478 // OP_ID - operation identifier (add, sub, mul, ...) 1479 // TYPE - operand type 1480 // BITS - size in bits, used to distinguish low level calls 1481 // OP - operator (used in critical section) 1482 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1483 1484 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1485 // ------------------------------------------------------------------------ 1486 // Routines for ATOMIC integer operands, other operators 1487 // ------------------------------------------------------------------------ 1488 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1489 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1490 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1491 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1492 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1493 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1494 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1495 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1497 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1498 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1499 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1500 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1501 1502 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1503 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1504 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1505 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1506 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1507 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1508 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1510 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1511 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1512 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1513 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1514 1515 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1516 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1517 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1518 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1519 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1520 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1521 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1523 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1524 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1525 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1526 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1527 1528 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1529 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1530 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1531 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1532 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1533 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1534 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1536 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1537 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1538 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1539 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1540 1541 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1542 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1543 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1544 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1545 1546 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1547 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1548 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1549 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1550 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1551 1552 // ------------------------------------------------------------------------ 1553 // Routines for Extended types: long double, _Quad, complex flavours (use 1554 // critical section) 1555 // TYPE_ID, OP_ID, TYPE - detailed above 1556 // OP - operator 1557 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1558 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1559 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1560 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1561 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1562 } 1563 1564 /* ------------------------------------------------------------------------- */ 1565 // routines for long double type 1566 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1567 1) // __kmpc_atomic_float10_sub_rev 1568 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1569 1) // __kmpc_atomic_float10_div_rev 1570 #if KMP_HAVE_QUAD 1571 // routines for _Quad type 1572 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1573 1) // __kmpc_atomic_float16_sub_rev 1574 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1575 1) // __kmpc_atomic_float16_div_rev 1576 #if (KMP_ARCH_X86) 1577 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1578 1) // __kmpc_atomic_float16_sub_a16_rev 1579 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1580 1) // __kmpc_atomic_float16_div_a16_rev 1581 #endif // KMP_ARCH_X86 1582 #endif // KMP_HAVE_QUAD 1583 1584 // routines for complex types 1585 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1586 1) // __kmpc_atomic_cmplx4_sub_rev 1587 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1588 1) // __kmpc_atomic_cmplx4_div_rev 1589 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1590 1) // __kmpc_atomic_cmplx8_sub_rev 1591 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1592 1) // __kmpc_atomic_cmplx8_div_rev 1593 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1594 1) // __kmpc_atomic_cmplx10_sub_rev 1595 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1596 1) // __kmpc_atomic_cmplx10_div_rev 1597 #if KMP_HAVE_QUAD 1598 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1599 1) // __kmpc_atomic_cmplx16_sub_rev 1600 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1601 1) // __kmpc_atomic_cmplx16_div_rev 1602 #if (KMP_ARCH_X86) 1603 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1604 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1605 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1606 1) // __kmpc_atomic_cmplx16_div_a16_rev 1607 #endif // KMP_ARCH_X86 1608 #endif // KMP_HAVE_QUAD 1609 1610 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1611 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1612 1613 /* ------------------------------------------------------------------------ */ 1614 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1615 /* Note: in order to reduce the total number of types combinations */ 1616 /* it is supposed that compiler converts RHS to longest floating type,*/ 1617 /* that is _Quad, before call to any of these routines */ 1618 /* Conversion to _Quad will be done by the compiler during calculation, */ 1619 /* conversion back to TYPE - before the assignment, like: */ 1620 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1621 /* Performance penalty expected because of SW emulation use */ 1622 /* ------------------------------------------------------------------------ */ 1623 1624 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1625 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1626 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1627 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1628 KA_TRACE(100, \ 1629 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1630 gtid)); 1631 1632 // ------------------------------------------------------------------------- 1633 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1634 GOMP_FLAG) \ 1635 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1636 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1637 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1638 } 1639 1640 // ------------------------------------------------------------------------- 1641 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1642 // ------------------------------------------------------------------------- 1643 // X86 or X86_64: no alignment problems ==================================== 1644 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1645 LCK_ID, MASK, GOMP_FLAG) \ 1646 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1647 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1648 OP_CMPXCHG(TYPE, BITS, OP) \ 1649 } 1650 // ------------------------------------------------------------------------- 1651 #else 1652 // ------------------------------------------------------------------------ 1653 // Code for other architectures that don't handle unaligned accesses. 1654 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1655 LCK_ID, MASK, GOMP_FLAG) \ 1656 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1657 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1658 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1659 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1660 } else { \ 1661 KMP_CHECK_GTID; \ 1662 OP_UPDATE_CRITICAL(TYPE, OP, \ 1663 LCK_ID) /* unaligned address - use critical */ \ 1664 } \ 1665 } 1666 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1667 1668 // ------------------------------------------------------------------------- 1669 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1670 // ------------------------------------------------------------------------- 1671 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1672 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1673 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1674 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1675 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1676 } 1677 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1678 LCK_ID, GOMP_FLAG) \ 1679 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1680 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1681 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1682 } 1683 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1684 1685 // RHS=float8 1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1687 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1688 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1689 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1690 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1691 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1692 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1693 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1695 0) // __kmpc_atomic_fixed4_mul_float8 1696 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1697 0) // __kmpc_atomic_fixed4_div_float8 1698 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1699 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1700 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1701 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1702 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1703 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1704 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1705 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1706 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1707 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1708 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1709 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1710 1711 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1712 // use them) 1713 #if KMP_HAVE_QUAD 1714 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1715 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1716 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1717 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1718 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1719 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1720 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1722 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1724 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1726 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1727 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1728 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1729 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1730 1731 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1732 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1733 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1734 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1735 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1736 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1737 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1739 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1741 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1743 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1744 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1745 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1746 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1747 1748 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1749 0) // __kmpc_atomic_fixed4_add_fp 1750 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1751 0) // __kmpc_atomic_fixed4u_add_fp 1752 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1753 0) // __kmpc_atomic_fixed4_sub_fp 1754 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1755 0) // __kmpc_atomic_fixed4u_sub_fp 1756 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1757 0) // __kmpc_atomic_fixed4_mul_fp 1758 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1759 0) // __kmpc_atomic_fixed4u_mul_fp 1760 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1761 0) // __kmpc_atomic_fixed4_div_fp 1762 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1763 0) // __kmpc_atomic_fixed4u_div_fp 1764 1765 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1766 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1767 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1768 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1769 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1770 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1771 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1773 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1775 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1777 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1778 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1779 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1780 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1781 1782 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1783 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1784 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1785 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1786 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1787 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1788 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1789 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1790 1791 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1792 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1793 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1794 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1795 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1796 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1797 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1798 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1799 1800 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1801 1) // __kmpc_atomic_float10_add_fp 1802 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1803 1) // __kmpc_atomic_float10_sub_fp 1804 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1805 1) // __kmpc_atomic_float10_mul_fp 1806 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1807 1) // __kmpc_atomic_float10_div_fp 1808 1809 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1810 // Reverse operations 1811 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1812 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1813 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1814 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1815 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1816 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1817 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1818 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1819 1820 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1821 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1822 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1823 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1824 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1825 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1826 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1827 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1828 1829 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1830 0) // __kmpc_atomic_fixed4_sub_rev_fp 1831 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1832 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1833 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1834 0) // __kmpc_atomic_fixed4_div_rev_fp 1835 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1836 0) // __kmpc_atomic_fixed4u_div_rev_fp 1837 1838 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1839 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1840 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1841 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1842 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1843 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1844 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1845 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1846 1847 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1848 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1849 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1850 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1851 1852 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1853 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1854 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1855 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1856 1857 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1858 1) // __kmpc_atomic_float10_sub_rev_fp 1859 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1860 1) // __kmpc_atomic_float10_div_rev_fp 1861 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1862 1863 #endif // KMP_HAVE_QUAD 1864 1865 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1866 // ------------------------------------------------------------------------ 1867 // X86 or X86_64: no alignment problems ==================================== 1868 #if USE_CMPXCHG_FIX 1869 // workaround for C78287 (complex(kind=4) data type) 1870 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1871 LCK_ID, MASK, GOMP_FLAG) \ 1872 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1873 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1874 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1875 } 1876 // end of the second part of the workaround for C78287 1877 #else 1878 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1879 LCK_ID, MASK, GOMP_FLAG) \ 1880 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1881 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1882 OP_CMPXCHG(TYPE, BITS, OP) \ 1883 } 1884 #endif // USE_CMPXCHG_FIX 1885 #else 1886 // ------------------------------------------------------------------------ 1887 // Code for other architectures that don't handle unaligned accesses. 1888 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1889 LCK_ID, MASK, GOMP_FLAG) \ 1890 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1891 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1892 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1893 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1894 } else { \ 1895 KMP_CHECK_GTID; \ 1896 OP_UPDATE_CRITICAL(TYPE, OP, \ 1897 LCK_ID) /* unaligned address - use critical */ \ 1898 } \ 1899 } 1900 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1901 1902 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1903 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1904 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1905 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1906 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1907 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1908 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1909 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1910 1911 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1912 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1913 1914 // ------------------------------------------------------------------------ 1915 // Atomic READ routines 1916 1917 // ------------------------------------------------------------------------ 1918 // Beginning of a definition (provides name, parameters, gebug trace) 1919 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1920 // fixed) 1921 // OP_ID - operation identifier (add, sub, mul, ...) 1922 // TYPE - operands' type 1923 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1924 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1925 TYPE *loc) { \ 1926 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1927 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1928 1929 // ------------------------------------------------------------------------ 1930 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1931 // TYPE - operands' type 1932 // BITS - size in bits, used to distinguish low level calls 1933 // OP - operator 1934 // Note: temp_val introduced in order to force the compiler to read 1935 // *lhs only once (w/o it the compiler reads *lhs twice) 1936 // TODO: check if it is still necessary 1937 // Return old value regardless of the result of "compare & swap# operation 1938 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1939 { \ 1940 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1941 union f_i_union { \ 1942 TYPE f_val; \ 1943 kmp_int##BITS i_val; \ 1944 }; \ 1945 union f_i_union old_value; \ 1946 temp_val = *loc; \ 1947 old_value.f_val = temp_val; \ 1948 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1949 (kmp_int##BITS *)loc, \ 1950 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1951 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1952 new_value = old_value.f_val; \ 1953 return new_value; \ 1954 } 1955 1956 // ------------------------------------------------------------------------- 1957 // Operation on *lhs, rhs bound by critical section 1958 // OP - operator (it's supposed to contain an assignment) 1959 // LCK_ID - lock identifier 1960 // Note: don't check gtid as it should always be valid 1961 // 1, 2-byte - expect valid parameter, other - check before this macro 1962 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1963 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1964 \ 1965 new_value = (*loc); \ 1966 \ 1967 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1968 1969 // ------------------------------------------------------------------------- 1970 #ifdef KMP_GOMP_COMPAT 1971 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1972 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1973 KMP_CHECK_GTID; \ 1974 OP_CRITICAL_READ(OP, 0); \ 1975 return new_value; \ 1976 } 1977 #else 1978 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1979 #endif /* KMP_GOMP_COMPAT */ 1980 1981 // ------------------------------------------------------------------------- 1982 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1983 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1984 TYPE new_value; \ 1985 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1986 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1987 return new_value; \ 1988 } 1989 // ------------------------------------------------------------------------- 1990 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1991 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1992 TYPE new_value; \ 1993 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1994 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1995 } 1996 // ------------------------------------------------------------------------ 1997 // Routines for Extended types: long double, _Quad, complex flavours (use 1998 // critical section) 1999 // TYPE_ID, OP_ID, TYPE - detailed above 2000 // OP - operator 2001 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2002 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2003 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 2004 TYPE new_value; \ 2005 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 2006 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 2007 return new_value; \ 2008 } 2009 2010 // ------------------------------------------------------------------------ 2011 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 2012 // value doesn't work. 2013 // Let's return the read value through the additional parameter. 2014 #if (KMP_OS_WINDOWS) 2015 2016 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 2017 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2018 \ 2019 (*out) = (*loc); \ 2020 \ 2021 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2022 // ------------------------------------------------------------------------ 2023 #ifdef KMP_GOMP_COMPAT 2024 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 2025 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2026 KMP_CHECK_GTID; \ 2027 OP_CRITICAL_READ_WRK(OP, 0); \ 2028 } 2029 #else 2030 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 2031 #endif /* KMP_GOMP_COMPAT */ 2032 // ------------------------------------------------------------------------ 2033 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2034 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 2035 TYPE *loc) { \ 2036 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2037 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2038 2039 // ------------------------------------------------------------------------ 2040 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2041 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2042 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 2043 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 2044 } 2045 2046 #endif // KMP_OS_WINDOWS 2047 2048 // ------------------------------------------------------------------------ 2049 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2050 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 2051 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 2052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 2053 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2054 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2055 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2056 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2057 2058 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2059 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2061 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2063 2064 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2065 1) // __kmpc_atomic_float10_rd 2066 #if KMP_HAVE_QUAD 2067 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2068 1) // __kmpc_atomic_float16_rd 2069 #endif // KMP_HAVE_QUAD 2070 2071 // Fix for CQ220361 on Windows* OS 2072 #if (KMP_OS_WINDOWS) 2073 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2074 1) // __kmpc_atomic_cmplx4_rd 2075 #else 2076 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2077 1) // __kmpc_atomic_cmplx4_rd 2078 #endif // (KMP_OS_WINDOWS) 2079 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2080 1) // __kmpc_atomic_cmplx8_rd 2081 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2082 1) // __kmpc_atomic_cmplx10_rd 2083 #if KMP_HAVE_QUAD 2084 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2085 1) // __kmpc_atomic_cmplx16_rd 2086 #if (KMP_ARCH_X86) 2087 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2088 1) // __kmpc_atomic_float16_a16_rd 2089 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2090 1) // __kmpc_atomic_cmplx16_a16_rd 2091 #endif // (KMP_ARCH_X86) 2092 #endif // KMP_HAVE_QUAD 2093 2094 // ------------------------------------------------------------------------ 2095 // Atomic WRITE routines 2096 2097 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2098 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2099 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2100 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2101 } 2102 // ------------------------------------------------------------------------ 2103 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2104 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2105 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2106 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2107 } 2108 2109 // ------------------------------------------------------------------------ 2110 // Operation on *lhs, rhs using "compare_and_store" routine 2111 // TYPE - operands' type 2112 // BITS - size in bits, used to distinguish low level calls 2113 // OP - operator 2114 // Note: temp_val introduced in order to force the compiler to read 2115 // *lhs only once (w/o it the compiler reads *lhs twice) 2116 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2117 { \ 2118 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2119 TYPE old_value, new_value; \ 2120 temp_val = *lhs; \ 2121 old_value = temp_val; \ 2122 new_value = rhs; \ 2123 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2124 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2125 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2126 temp_val = *lhs; \ 2127 old_value = temp_val; \ 2128 new_value = rhs; \ 2129 } \ 2130 } 2131 2132 // ------------------------------------------------------------------------- 2133 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2134 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2135 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2136 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2137 } 2138 2139 // ------------------------------------------------------------------------ 2140 // Routines for Extended types: long double, _Quad, complex flavours (use 2141 // critical section) 2142 // TYPE_ID, OP_ID, TYPE - detailed above 2143 // OP - operator 2144 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2145 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2146 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2147 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2148 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2149 } 2150 // ------------------------------------------------------------------------- 2151 2152 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2153 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2154 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2155 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2156 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2157 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2158 #if (KMP_ARCH_X86) 2159 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2160 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2161 #else 2162 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2163 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2164 #endif // (KMP_ARCH_X86) 2165 2166 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2167 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2168 #if (KMP_ARCH_X86) 2169 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2170 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2171 #else 2172 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2173 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2174 #endif // (KMP_ARCH_X86) 2175 2176 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2177 1) // __kmpc_atomic_float10_wr 2178 #if KMP_HAVE_QUAD 2179 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2180 1) // __kmpc_atomic_float16_wr 2181 #endif // KMP_HAVE_QUAD 2182 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2183 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2184 1) // __kmpc_atomic_cmplx8_wr 2185 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2186 1) // __kmpc_atomic_cmplx10_wr 2187 #if KMP_HAVE_QUAD 2188 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2189 1) // __kmpc_atomic_cmplx16_wr 2190 #if (KMP_ARCH_X86) 2191 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2192 1) // __kmpc_atomic_float16_a16_wr 2193 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2194 1) // __kmpc_atomic_cmplx16_a16_wr 2195 #endif // (KMP_ARCH_X86) 2196 #endif // KMP_HAVE_QUAD 2197 2198 // ------------------------------------------------------------------------ 2199 // Atomic CAPTURE routines 2200 2201 // Beginning of a definition (provides name, parameters, gebug trace) 2202 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2203 // fixed) 2204 // OP_ID - operation identifier (add, sub, mul, ...) 2205 // TYPE - operands' type 2206 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2207 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2208 TYPE *lhs, TYPE rhs, int flag) { \ 2209 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2210 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2211 2212 // ------------------------------------------------------------------------- 2213 // Operation on *lhs, rhs bound by critical section 2214 // OP - operator (it's supposed to contain an assignment) 2215 // LCK_ID - lock identifier 2216 // Note: don't check gtid as it should always be valid 2217 // 1, 2-byte - expect valid parameter, other - check before this macro 2218 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2219 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2220 \ 2221 if (flag) { \ 2222 (*lhs) OP rhs; \ 2223 new_value = (*lhs); \ 2224 } else { \ 2225 new_value = (*lhs); \ 2226 (*lhs) OP rhs; \ 2227 } \ 2228 \ 2229 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2230 return new_value; 2231 2232 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \ 2233 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2234 \ 2235 if (flag) { \ 2236 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2237 new_value = (*lhs); \ 2238 } else { \ 2239 new_value = (*lhs); \ 2240 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2241 } \ 2242 \ 2243 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2244 return new_value; 2245 2246 // ------------------------------------------------------------------------ 2247 #ifdef KMP_GOMP_COMPAT 2248 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \ 2249 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2250 KMP_CHECK_GTID; \ 2251 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \ 2252 } 2253 #else 2254 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) 2255 #endif /* KMP_GOMP_COMPAT */ 2256 2257 // ------------------------------------------------------------------------ 2258 // Operation on *lhs, rhs using "compare_and_store" routine 2259 // TYPE - operands' type 2260 // BITS - size in bits, used to distinguish low level calls 2261 // OP - operator 2262 // Note: temp_val introduced in order to force the compiler to read 2263 // *lhs only once (w/o it the compiler reads *lhs twice) 2264 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2265 { \ 2266 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2267 TYPE old_value, new_value; \ 2268 temp_val = *lhs; \ 2269 old_value = temp_val; \ 2270 new_value = (TYPE)(old_value OP rhs); \ 2271 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2272 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2273 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2274 temp_val = *lhs; \ 2275 old_value = temp_val; \ 2276 new_value = (TYPE)(old_value OP rhs); \ 2277 } \ 2278 if (flag) { \ 2279 return new_value; \ 2280 } else \ 2281 return old_value; \ 2282 } 2283 2284 // ------------------------------------------------------------------------- 2285 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2286 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2287 TYPE new_value; \ 2288 (void)new_value; \ 2289 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2290 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2291 } 2292 2293 // ------------------------------------------------------------------------- 2294 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2295 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2296 TYPE old_value, new_value; \ 2297 (void)new_value; \ 2298 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2299 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2300 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2301 if (flag) { \ 2302 return old_value OP rhs; \ 2303 } else \ 2304 return old_value; \ 2305 } 2306 // ------------------------------------------------------------------------- 2307 2308 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2309 0) // __kmpc_atomic_fixed4_add_cpt 2310 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2311 0) // __kmpc_atomic_fixed4_sub_cpt 2312 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2313 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2314 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2315 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2316 2317 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2318 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2319 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2320 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2321 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2322 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2323 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2324 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2325 2326 // ------------------------------------------------------------------------ 2327 // Entries definition for integer operands 2328 // TYPE_ID - operands type and size (fixed4, float4) 2329 // OP_ID - operation identifier (add, sub, mul, ...) 2330 // TYPE - operand type 2331 // BITS - size in bits, used to distinguish low level calls 2332 // OP - operator (used in critical section) 2333 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2334 // ------------------------------------------------------------------------ 2335 // Routines for ATOMIC integer operands, other operators 2336 // ------------------------------------------------------------------------ 2337 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2338 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2339 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2340 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2341 0) // __kmpc_atomic_fixed1_andb_cpt 2342 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2343 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2344 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2345 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2346 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2347 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2348 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2349 0) // __kmpc_atomic_fixed1_orb_cpt 2350 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2352 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2354 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2355 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2356 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2357 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2358 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2359 0) // __kmpc_atomic_fixed1_xor_cpt 2360 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2361 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2362 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2363 0) // __kmpc_atomic_fixed2_andb_cpt 2364 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2365 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2366 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2367 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2368 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2369 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2370 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2371 0) // __kmpc_atomic_fixed2_orb_cpt 2372 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2374 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2376 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2377 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2378 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2379 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2380 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2381 0) // __kmpc_atomic_fixed2_xor_cpt 2382 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2383 0) // __kmpc_atomic_fixed4_andb_cpt 2384 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2385 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2386 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2387 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2388 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2389 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2390 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2391 0) // __kmpc_atomic_fixed4_orb_cpt 2392 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2394 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2395 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2396 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2397 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2398 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2399 0) // __kmpc_atomic_fixed4_xor_cpt 2400 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2401 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2402 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2403 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2404 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2405 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2406 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2408 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2410 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2412 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2414 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2415 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2416 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2417 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2418 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2419 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2420 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2421 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2422 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2423 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2424 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2425 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2426 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2427 2428 // CAPTURE routines for mixed types RHS=float16 2429 #if KMP_HAVE_QUAD 2430 2431 // Beginning of a definition (provides name, parameters, gebug trace) 2432 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2433 // fixed) 2434 // OP_ID - operation identifier (add, sub, mul, ...) 2435 // TYPE - operands' type 2436 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2437 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2438 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2439 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2440 KA_TRACE(100, \ 2441 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2442 gtid)); 2443 2444 // ------------------------------------------------------------------------- 2445 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2446 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2447 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2448 TYPE new_value; \ 2449 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2450 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2451 } 2452 2453 // ------------------------------------------------------------------------- 2454 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2455 LCK_ID, GOMP_FLAG) \ 2456 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2457 TYPE new_value; \ 2458 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2459 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2460 } 2461 2462 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2463 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2464 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2465 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2466 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2467 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2468 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2469 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2472 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2474 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2475 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2476 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2477 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2478 2479 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2480 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2481 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2482 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2483 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2484 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2485 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2486 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2487 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2489 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2491 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2492 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2493 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2494 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2495 2496 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2497 0) // __kmpc_atomic_fixed4_add_cpt_fp 2498 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2499 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2500 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2501 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2502 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2503 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2504 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2505 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2506 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2507 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2508 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2509 0) // __kmpc_atomic_fixed4_div_cpt_fp 2510 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2511 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2512 2513 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2514 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2515 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2516 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2517 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2518 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2519 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2520 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2521 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2523 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2525 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2526 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2527 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2528 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2529 2530 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2531 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2532 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2533 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2534 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2535 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2536 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2537 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2538 2539 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2540 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2541 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2542 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2543 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2544 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2545 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2546 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2547 2548 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2549 1) // __kmpc_atomic_float10_add_cpt_fp 2550 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2551 1) // __kmpc_atomic_float10_sub_cpt_fp 2552 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2553 1) // __kmpc_atomic_float10_mul_cpt_fp 2554 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2555 1) // __kmpc_atomic_float10_div_cpt_fp 2556 2557 #endif // KMP_HAVE_QUAD 2558 2559 // ------------------------------------------------------------------------ 2560 // Routines for C/C++ Reduction operators && and || 2561 2562 // ------------------------------------------------------------------------- 2563 // Operation on *lhs, rhs bound by critical section 2564 // OP - operator (it's supposed to contain an assignment) 2565 // LCK_ID - lock identifier 2566 // Note: don't check gtid as it should always be valid 2567 // 1, 2-byte - expect valid parameter, other - check before this macro 2568 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2569 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2570 \ 2571 if (flag) { \ 2572 new_value OP rhs; \ 2573 (*lhs) = new_value; \ 2574 } else { \ 2575 new_value = (*lhs); \ 2576 (*lhs) OP rhs; \ 2577 } \ 2578 \ 2579 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2580 2581 // ------------------------------------------------------------------------ 2582 #ifdef KMP_GOMP_COMPAT 2583 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2584 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2585 KMP_CHECK_GTID; \ 2586 OP_CRITICAL_L_CPT(OP, 0); \ 2587 return new_value; \ 2588 } 2589 #else 2590 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2591 #endif /* KMP_GOMP_COMPAT */ 2592 2593 // ------------------------------------------------------------------------ 2594 // Need separate macros for &&, || because there is no combined assignment 2595 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2596 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2597 TYPE new_value; \ 2598 (void)new_value; \ 2599 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2600 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2601 } 2602 2603 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2604 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2605 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2606 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2607 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2608 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2609 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2610 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2611 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2612 0) // __kmpc_atomic_fixed4_andl_cpt 2613 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2614 0) // __kmpc_atomic_fixed4_orl_cpt 2615 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2616 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2617 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2618 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2619 2620 // ------------------------------------------------------------------------- 2621 // Routines for Fortran operators that matched no one in C: 2622 // MAX, MIN, .EQV., .NEQV. 2623 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2624 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2625 2626 // ------------------------------------------------------------------------- 2627 // MIN and MAX need separate macros 2628 // OP - operator to check if we need any actions? 2629 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2630 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2631 \ 2632 if (*lhs OP rhs) { /* still need actions? */ \ 2633 old_value = *lhs; \ 2634 *lhs = rhs; \ 2635 if (flag) \ 2636 new_value = rhs; \ 2637 else \ 2638 new_value = old_value; \ 2639 } else { \ 2640 new_value = *lhs; \ 2641 } \ 2642 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2643 return new_value; 2644 2645 // ------------------------------------------------------------------------- 2646 #ifdef KMP_GOMP_COMPAT 2647 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2648 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2649 KMP_CHECK_GTID; \ 2650 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2651 } 2652 #else 2653 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2654 #endif /* KMP_GOMP_COMPAT */ 2655 2656 // ------------------------------------------------------------------------- 2657 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2658 { \ 2659 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2660 /*TYPE old_value; */ \ 2661 temp_val = *lhs; \ 2662 old_value = temp_val; \ 2663 while (old_value OP rhs && /* still need actions? */ \ 2664 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2665 (kmp_int##BITS *)lhs, \ 2666 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2667 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2668 temp_val = *lhs; \ 2669 old_value = temp_val; \ 2670 } \ 2671 if (flag) \ 2672 return rhs; \ 2673 else \ 2674 return old_value; \ 2675 } 2676 2677 // ------------------------------------------------------------------------- 2678 // 1-byte, 2-byte operands - use critical section 2679 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2680 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2681 TYPE new_value, old_value; \ 2682 if (*lhs OP rhs) { /* need actions? */ \ 2683 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2684 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2685 } \ 2686 return *lhs; \ 2687 } 2688 2689 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2690 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2691 TYPE new_value, old_value; \ 2692 (void)new_value; \ 2693 if (*lhs OP rhs) { \ 2694 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2695 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2696 } \ 2697 return *lhs; \ 2698 } 2699 2700 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2701 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2702 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2703 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2704 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2705 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2706 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2707 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2708 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2709 0) // __kmpc_atomic_fixed4_max_cpt 2710 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2711 0) // __kmpc_atomic_fixed4_min_cpt 2712 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2713 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2714 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2715 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2716 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2717 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2718 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2719 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2720 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2721 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2722 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2723 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2724 MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r, 2725 1) // __kmpc_atomic_float10_max_cpt 2726 MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r, 2727 1) // __kmpc_atomic_float10_min_cpt 2728 #if KMP_HAVE_QUAD 2729 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2730 1) // __kmpc_atomic_float16_max_cpt 2731 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2732 1) // __kmpc_atomic_float16_min_cpt 2733 #if (KMP_ARCH_X86) 2734 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2735 1) // __kmpc_atomic_float16_max_a16_cpt 2736 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2737 1) // __kmpc_atomic_float16_mix_a16_cpt 2738 #endif // (KMP_ARCH_X86) 2739 #endif // KMP_HAVE_QUAD 2740 2741 // ------------------------------------------------------------------------ 2742 #ifdef KMP_GOMP_COMPAT 2743 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2744 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2745 KMP_CHECK_GTID; \ 2746 OP_CRITICAL_CPT(OP, 0); \ 2747 } 2748 #else 2749 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2750 #endif /* KMP_GOMP_COMPAT */ 2751 // ------------------------------------------------------------------------ 2752 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2753 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2754 TYPE new_value; \ 2755 (void)new_value; \ 2756 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 2757 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2758 } 2759 2760 // ------------------------------------------------------------------------ 2761 2762 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2763 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2764 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2765 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2766 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2767 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2768 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2769 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2770 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2771 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2772 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2773 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2774 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2775 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2776 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2777 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2778 2779 // ------------------------------------------------------------------------ 2780 // Routines for Extended types: long double, _Quad, complex flavours (use 2781 // critical section) 2782 // TYPE_ID, OP_ID, TYPE - detailed above 2783 // OP - operator 2784 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2785 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2786 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2787 TYPE new_value; \ 2788 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2789 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2790 } 2791 2792 // ------------------------------------------------------------------------ 2793 // Workaround for cmplx4. Regular routines with return value don't work 2794 // on Win_32e. Let's return captured values through the additional parameter. 2795 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2796 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2797 \ 2798 if (flag) { \ 2799 (*lhs) OP rhs; \ 2800 (*out) = (*lhs); \ 2801 } else { \ 2802 (*out) = (*lhs); \ 2803 (*lhs) OP rhs; \ 2804 } \ 2805 \ 2806 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2807 return; 2808 // ------------------------------------------------------------------------ 2809 2810 #ifdef KMP_GOMP_COMPAT 2811 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2812 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2813 KMP_CHECK_GTID; \ 2814 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2815 } 2816 #else 2817 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2818 #endif /* KMP_GOMP_COMPAT */ 2819 // ------------------------------------------------------------------------ 2820 2821 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2822 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2823 TYPE rhs, TYPE *out, int flag) { \ 2824 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2825 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2826 // ------------------------------------------------------------------------ 2827 2828 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2829 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2830 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2831 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2832 } 2833 // The end of workaround for cmplx4 2834 2835 /* ------------------------------------------------------------------------- */ 2836 // routines for long double type 2837 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2838 1) // __kmpc_atomic_float10_add_cpt 2839 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2840 1) // __kmpc_atomic_float10_sub_cpt 2841 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2842 1) // __kmpc_atomic_float10_mul_cpt 2843 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2844 1) // __kmpc_atomic_float10_div_cpt 2845 #if KMP_HAVE_QUAD 2846 // routines for _Quad type 2847 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2848 1) // __kmpc_atomic_float16_add_cpt 2849 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2850 1) // __kmpc_atomic_float16_sub_cpt 2851 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2852 1) // __kmpc_atomic_float16_mul_cpt 2853 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2854 1) // __kmpc_atomic_float16_div_cpt 2855 #if (KMP_ARCH_X86) 2856 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2857 1) // __kmpc_atomic_float16_add_a16_cpt 2858 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2859 1) // __kmpc_atomic_float16_sub_a16_cpt 2860 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2861 1) // __kmpc_atomic_float16_mul_a16_cpt 2862 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2863 1) // __kmpc_atomic_float16_div_a16_cpt 2864 #endif // (KMP_ARCH_X86) 2865 #endif // KMP_HAVE_QUAD 2866 2867 // routines for complex types 2868 2869 // cmplx4 routines to return void 2870 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2871 1) // __kmpc_atomic_cmplx4_add_cpt 2872 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2873 1) // __kmpc_atomic_cmplx4_sub_cpt 2874 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2875 1) // __kmpc_atomic_cmplx4_mul_cpt 2876 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2877 1) // __kmpc_atomic_cmplx4_div_cpt 2878 2879 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2880 1) // __kmpc_atomic_cmplx8_add_cpt 2881 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2882 1) // __kmpc_atomic_cmplx8_sub_cpt 2883 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2884 1) // __kmpc_atomic_cmplx8_mul_cpt 2885 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2886 1) // __kmpc_atomic_cmplx8_div_cpt 2887 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2888 1) // __kmpc_atomic_cmplx10_add_cpt 2889 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2890 1) // __kmpc_atomic_cmplx10_sub_cpt 2891 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2892 1) // __kmpc_atomic_cmplx10_mul_cpt 2893 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2894 1) // __kmpc_atomic_cmplx10_div_cpt 2895 #if KMP_HAVE_QUAD 2896 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2897 1) // __kmpc_atomic_cmplx16_add_cpt 2898 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2899 1) // __kmpc_atomic_cmplx16_sub_cpt 2900 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2901 1) // __kmpc_atomic_cmplx16_mul_cpt 2902 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2903 1) // __kmpc_atomic_cmplx16_div_cpt 2904 #if (KMP_ARCH_X86) 2905 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2906 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2907 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2908 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2909 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2910 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2911 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2912 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2913 #endif // (KMP_ARCH_X86) 2914 #endif // KMP_HAVE_QUAD 2915 2916 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2917 // binop x; v = x; } for non-commutative operations. 2918 // Supported only on IA-32 architecture and Intel(R) 64 2919 2920 // ------------------------------------------------------------------------- 2921 // Operation on *lhs, rhs bound by critical section 2922 // OP - operator (it's supposed to contain an assignment) 2923 // LCK_ID - lock identifier 2924 // Note: don't check gtid as it should always be valid 2925 // 1, 2-byte - expect valid parameter, other - check before this macro 2926 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 2927 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2928 \ 2929 if (flag) { \ 2930 /*temp_val = (*lhs);*/ \ 2931 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2932 new_value = (*lhs); \ 2933 } else { \ 2934 new_value = (*lhs); \ 2935 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2936 } \ 2937 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2938 return new_value; 2939 2940 // ------------------------------------------------------------------------ 2941 #ifdef KMP_GOMP_COMPAT 2942 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \ 2943 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2944 KMP_CHECK_GTID; \ 2945 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \ 2946 } 2947 #else 2948 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) 2949 #endif /* KMP_GOMP_COMPAT */ 2950 2951 // ------------------------------------------------------------------------ 2952 // Operation on *lhs, rhs using "compare_and_store" routine 2953 // TYPE - operands' type 2954 // BITS - size in bits, used to distinguish low level calls 2955 // OP - operator 2956 // Note: temp_val introduced in order to force the compiler to read 2957 // *lhs only once (w/o it the compiler reads *lhs twice) 2958 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2959 { \ 2960 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2961 TYPE old_value, new_value; \ 2962 temp_val = *lhs; \ 2963 old_value = temp_val; \ 2964 new_value = (TYPE)(rhs OP old_value); \ 2965 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2966 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2967 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2968 temp_val = *lhs; \ 2969 old_value = temp_val; \ 2970 new_value = (TYPE)(rhs OP old_value); \ 2971 } \ 2972 if (flag) { \ 2973 return new_value; \ 2974 } else \ 2975 return old_value; \ 2976 } 2977 2978 // ------------------------------------------------------------------------- 2979 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2980 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2981 TYPE new_value; \ 2982 (void)new_value; \ 2983 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 2984 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2985 } 2986 2987 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2989 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2991 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2993 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2994 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2995 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2996 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2997 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2999 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 3000 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 3001 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 3002 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 3003 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 3004 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 3005 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 3006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 3007 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 3008 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 3009 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 3010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 3011 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 3012 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 3013 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 3014 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 3015 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 3016 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 3017 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 3018 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 3019 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 3020 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 3021 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 3022 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 3023 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 3024 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 3025 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 3026 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 3027 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 3028 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 3029 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 3030 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 3031 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 3032 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 3033 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 3034 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 3035 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 3036 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 3037 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 3038 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 3039 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 3040 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 3041 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 3042 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 3043 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 3044 3045 // ------------------------------------------------------------------------ 3046 // Routines for Extended types: long double, _Quad, complex flavours (use 3047 // critical section) 3048 // TYPE_ID, OP_ID, TYPE - detailed above 3049 // OP - operator 3050 // LCK_ID - lock identifier, used to possibly distinguish lock variable 3051 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 3052 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 3053 TYPE new_value; \ 3054 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 3055 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3056 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 3057 } 3058 3059 /* ------------------------------------------------------------------------- */ 3060 // routines for long double type 3061 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 3062 1) // __kmpc_atomic_float10_sub_cpt_rev 3063 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 3064 1) // __kmpc_atomic_float10_div_cpt_rev 3065 #if KMP_HAVE_QUAD 3066 // routines for _Quad type 3067 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 3068 1) // __kmpc_atomic_float16_sub_cpt_rev 3069 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 3070 1) // __kmpc_atomic_float16_div_cpt_rev 3071 #if (KMP_ARCH_X86) 3072 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3073 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3074 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3075 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3076 #endif // (KMP_ARCH_X86) 3077 #endif // KMP_HAVE_QUAD 3078 3079 // routines for complex types 3080 3081 // ------------------------------------------------------------------------ 3082 // Workaround for cmplx4. Regular routines with return value don't work 3083 // on Win_32e. Let's return captured values through the additional parameter. 3084 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3085 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3086 \ 3087 if (flag) { \ 3088 (*lhs) = (rhs)OP(*lhs); \ 3089 (*out) = (*lhs); \ 3090 } else { \ 3091 (*out) = (*lhs); \ 3092 (*lhs) = (rhs)OP(*lhs); \ 3093 } \ 3094 \ 3095 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3096 return; 3097 // ------------------------------------------------------------------------ 3098 3099 #ifdef KMP_GOMP_COMPAT 3100 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3101 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3102 KMP_CHECK_GTID; \ 3103 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3104 } 3105 #else 3106 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3107 #endif /* KMP_GOMP_COMPAT */ 3108 // ------------------------------------------------------------------------ 3109 3110 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3111 GOMP_FLAG) \ 3112 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3113 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3114 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3115 } 3116 // The end of workaround for cmplx4 3117 3118 // !!! TODO: check if we need to return void for cmplx4 routines 3119 // cmplx4 routines to return void 3120 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3121 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3122 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3123 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3124 3125 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3126 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3127 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3128 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3129 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3130 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3131 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3132 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3133 #if KMP_HAVE_QUAD 3134 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3135 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3136 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3137 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3138 #if (KMP_ARCH_X86) 3139 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3140 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3141 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3142 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3143 #endif // (KMP_ARCH_X86) 3144 #endif // KMP_HAVE_QUAD 3145 3146 // Capture reverse for mixed type: RHS=float16 3147 #if KMP_HAVE_QUAD 3148 3149 // Beginning of a definition (provides name, parameters, gebug trace) 3150 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3151 // fixed) 3152 // OP_ID - operation identifier (add, sub, mul, ...) 3153 // TYPE - operands' type 3154 // ------------------------------------------------------------------------- 3155 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3156 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3157 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3158 TYPE new_value; \ 3159 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3160 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3161 } 3162 3163 // ------------------------------------------------------------------------- 3164 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3165 LCK_ID, GOMP_FLAG) \ 3166 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3167 TYPE new_value; \ 3168 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 3169 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \ 3170 } 3171 3172 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3173 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3174 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3175 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3176 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3177 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3178 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3179 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3180 3181 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3182 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3183 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3184 1, 3185 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3186 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3187 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3188 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3189 1, 3190 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3191 3192 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3193 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3194 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3195 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3196 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3197 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3198 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3199 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3200 3201 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3202 7, 3203 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3204 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3205 8i, 7, 3206 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3207 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3208 7, 3209 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3210 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3211 8i, 7, 3212 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3213 3214 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3215 4r, 3, 3216 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3217 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3218 4r, 3, 3219 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3220 3221 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3222 8r, 7, 3223 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3224 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3225 8r, 7, 3226 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3227 3228 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3229 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3230 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3231 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3232 3233 #endif // KMP_HAVE_QUAD 3234 3235 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3236 3237 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3238 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3239 TYPE rhs) { \ 3240 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3241 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3242 3243 #define CRITICAL_SWP(LCK_ID) \ 3244 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3245 \ 3246 old_value = (*lhs); \ 3247 (*lhs) = rhs; \ 3248 \ 3249 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3250 return old_value; 3251 3252 // ------------------------------------------------------------------------ 3253 #ifdef KMP_GOMP_COMPAT 3254 #define GOMP_CRITICAL_SWP(FLAG) \ 3255 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3256 KMP_CHECK_GTID; \ 3257 CRITICAL_SWP(0); \ 3258 } 3259 #else 3260 #define GOMP_CRITICAL_SWP(FLAG) 3261 #endif /* KMP_GOMP_COMPAT */ 3262 3263 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3264 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3265 TYPE old_value; \ 3266 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3267 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3268 return old_value; \ 3269 } 3270 // ------------------------------------------------------------------------ 3271 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3272 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3273 TYPE old_value; \ 3274 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3275 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3276 return old_value; \ 3277 } 3278 3279 // ------------------------------------------------------------------------ 3280 #define CMPXCHG_SWP(TYPE, BITS) \ 3281 { \ 3282 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3283 TYPE old_value, new_value; \ 3284 temp_val = *lhs; \ 3285 old_value = temp_val; \ 3286 new_value = rhs; \ 3287 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3288 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3289 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3290 temp_val = *lhs; \ 3291 old_value = temp_val; \ 3292 new_value = rhs; \ 3293 } \ 3294 return old_value; \ 3295 } 3296 3297 // ------------------------------------------------------------------------- 3298 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3299 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3300 TYPE old_value; \ 3301 (void)old_value; \ 3302 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3303 CMPXCHG_SWP(TYPE, BITS) \ 3304 } 3305 3306 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3307 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3308 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3309 3310 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3311 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3312 3313 #if (KMP_ARCH_X86) 3314 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3315 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3316 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3317 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3318 #else 3319 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3320 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3321 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3322 #endif // (KMP_ARCH_X86) 3323 3324 // ------------------------------------------------------------------------ 3325 // Routines for Extended types: long double, _Quad, complex flavours (use 3326 // critical section) 3327 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3328 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3329 TYPE old_value; \ 3330 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3331 CRITICAL_SWP(LCK_ID) \ 3332 } 3333 3334 // ------------------------------------------------------------------------ 3335 // !!! TODO: check if we need to return void for cmplx4 routines 3336 // Workaround for cmplx4. Regular routines with return value don't work 3337 // on Win_32e. Let's return captured values through the additional parameter. 3338 3339 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3340 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3341 TYPE rhs, TYPE *out) { \ 3342 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3343 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3344 3345 #define CRITICAL_SWP_WRK(LCK_ID) \ 3346 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3347 \ 3348 tmp = (*lhs); \ 3349 (*lhs) = (rhs); \ 3350 (*out) = tmp; \ 3351 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3352 return; 3353 // ------------------------------------------------------------------------ 3354 3355 #ifdef KMP_GOMP_COMPAT 3356 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3357 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3358 KMP_CHECK_GTID; \ 3359 CRITICAL_SWP_WRK(0); \ 3360 } 3361 #else 3362 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3363 #endif /* KMP_GOMP_COMPAT */ 3364 // ------------------------------------------------------------------------ 3365 3366 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3367 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3368 TYPE tmp; \ 3369 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3370 CRITICAL_SWP_WRK(LCK_ID) \ 3371 } 3372 // The end of workaround for cmplx4 3373 3374 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3375 #if KMP_HAVE_QUAD 3376 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3377 #endif // KMP_HAVE_QUAD 3378 // cmplx4 routine to return void 3379 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3380 3381 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3382 // __kmpc_atomic_cmplx4_swp 3383 3384 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3385 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3386 #if KMP_HAVE_QUAD 3387 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3388 #if (KMP_ARCH_X86) 3389 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3390 1) // __kmpc_atomic_float16_a16_swp 3391 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3392 1) // __kmpc_atomic_cmplx16_a16_swp 3393 #endif // (KMP_ARCH_X86) 3394 #endif // KMP_HAVE_QUAD 3395 3396 // End of OpenMP 4.0 Capture 3397 3398 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3399 3400 #undef OP_CRITICAL 3401 3402 /* ------------------------------------------------------------------------ */ 3403 /* Generic atomic routines */ 3404 3405 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3406 void (*f)(void *, void *, void *)) { 3407 KMP_DEBUG_ASSERT(__kmp_init_serial); 3408 3409 if ( 3410 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3411 FALSE /* must use lock */ 3412 #else 3413 TRUE 3414 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3415 ) { 3416 kmp_int8 old_value, new_value; 3417 3418 old_value = *(kmp_int8 *)lhs; 3419 (*f)(&new_value, &old_value, rhs); 3420 3421 /* TODO: Should this be acquire or release? */ 3422 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3423 *(kmp_int8 *)&new_value)) { 3424 KMP_CPU_PAUSE(); 3425 3426 old_value = *(kmp_int8 *)lhs; 3427 (*f)(&new_value, &old_value, rhs); 3428 } 3429 3430 return; 3431 } else { 3432 // All 1-byte data is of integer data type. 3433 3434 #ifdef KMP_GOMP_COMPAT 3435 if (__kmp_atomic_mode == 2) { 3436 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3437 } else 3438 #endif /* KMP_GOMP_COMPAT */ 3439 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3440 3441 (*f)(lhs, lhs, rhs); 3442 3443 #ifdef KMP_GOMP_COMPAT 3444 if (__kmp_atomic_mode == 2) { 3445 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3446 } else 3447 #endif /* KMP_GOMP_COMPAT */ 3448 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3449 } 3450 } 3451 3452 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3453 void (*f)(void *, void *, void *)) { 3454 if ( 3455 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3456 FALSE /* must use lock */ 3457 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3458 TRUE /* no alignment problems */ 3459 #else 3460 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3461 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3462 ) { 3463 kmp_int16 old_value, new_value; 3464 3465 old_value = *(kmp_int16 *)lhs; 3466 (*f)(&new_value, &old_value, rhs); 3467 3468 /* TODO: Should this be acquire or release? */ 3469 while (!KMP_COMPARE_AND_STORE_ACQ16( 3470 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3471 KMP_CPU_PAUSE(); 3472 3473 old_value = *(kmp_int16 *)lhs; 3474 (*f)(&new_value, &old_value, rhs); 3475 } 3476 3477 return; 3478 } else { 3479 // All 2-byte data is of integer data type. 3480 3481 #ifdef KMP_GOMP_COMPAT 3482 if (__kmp_atomic_mode == 2) { 3483 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3484 } else 3485 #endif /* KMP_GOMP_COMPAT */ 3486 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3487 3488 (*f)(lhs, lhs, rhs); 3489 3490 #ifdef KMP_GOMP_COMPAT 3491 if (__kmp_atomic_mode == 2) { 3492 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3493 } else 3494 #endif /* KMP_GOMP_COMPAT */ 3495 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3496 } 3497 } 3498 3499 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3500 void (*f)(void *, void *, void *)) { 3501 KMP_DEBUG_ASSERT(__kmp_init_serial); 3502 3503 if ( 3504 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3505 // Gomp compatibility is broken if this routine is called for floats. 3506 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3507 TRUE /* no alignment problems */ 3508 #else 3509 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3510 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3511 ) { 3512 kmp_int32 old_value, new_value; 3513 3514 old_value = *(kmp_int32 *)lhs; 3515 (*f)(&new_value, &old_value, rhs); 3516 3517 /* TODO: Should this be acquire or release? */ 3518 while (!KMP_COMPARE_AND_STORE_ACQ32( 3519 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3520 KMP_CPU_PAUSE(); 3521 3522 old_value = *(kmp_int32 *)lhs; 3523 (*f)(&new_value, &old_value, rhs); 3524 } 3525 3526 return; 3527 } else { 3528 // Use __kmp_atomic_lock_4i for all 4-byte data, 3529 // even if it isn't of integer data type. 3530 3531 #ifdef KMP_GOMP_COMPAT 3532 if (__kmp_atomic_mode == 2) { 3533 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3534 } else 3535 #endif /* KMP_GOMP_COMPAT */ 3536 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3537 3538 (*f)(lhs, lhs, rhs); 3539 3540 #ifdef KMP_GOMP_COMPAT 3541 if (__kmp_atomic_mode == 2) { 3542 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3543 } else 3544 #endif /* KMP_GOMP_COMPAT */ 3545 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3546 } 3547 } 3548 3549 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3550 void (*f)(void *, void *, void *)) { 3551 KMP_DEBUG_ASSERT(__kmp_init_serial); 3552 if ( 3553 3554 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3555 FALSE /* must use lock */ 3556 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3557 TRUE /* no alignment problems */ 3558 #else 3559 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3560 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3561 ) { 3562 kmp_int64 old_value, new_value; 3563 3564 old_value = *(kmp_int64 *)lhs; 3565 (*f)(&new_value, &old_value, rhs); 3566 /* TODO: Should this be acquire or release? */ 3567 while (!KMP_COMPARE_AND_STORE_ACQ64( 3568 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3569 KMP_CPU_PAUSE(); 3570 3571 old_value = *(kmp_int64 *)lhs; 3572 (*f)(&new_value, &old_value, rhs); 3573 } 3574 3575 return; 3576 } else { 3577 // Use __kmp_atomic_lock_8i for all 8-byte data, 3578 // even if it isn't of integer data type. 3579 3580 #ifdef KMP_GOMP_COMPAT 3581 if (__kmp_atomic_mode == 2) { 3582 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3583 } else 3584 #endif /* KMP_GOMP_COMPAT */ 3585 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3586 3587 (*f)(lhs, lhs, rhs); 3588 3589 #ifdef KMP_GOMP_COMPAT 3590 if (__kmp_atomic_mode == 2) { 3591 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3592 } else 3593 #endif /* KMP_GOMP_COMPAT */ 3594 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3595 } 3596 } 3597 3598 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3599 void (*f)(void *, void *, void *)) { 3600 KMP_DEBUG_ASSERT(__kmp_init_serial); 3601 3602 #ifdef KMP_GOMP_COMPAT 3603 if (__kmp_atomic_mode == 2) { 3604 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3605 } else 3606 #endif /* KMP_GOMP_COMPAT */ 3607 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3608 3609 (*f)(lhs, lhs, rhs); 3610 3611 #ifdef KMP_GOMP_COMPAT 3612 if (__kmp_atomic_mode == 2) { 3613 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3614 } else 3615 #endif /* KMP_GOMP_COMPAT */ 3616 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3617 } 3618 3619 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3620 void (*f)(void *, void *, void *)) { 3621 KMP_DEBUG_ASSERT(__kmp_init_serial); 3622 3623 #ifdef KMP_GOMP_COMPAT 3624 if (__kmp_atomic_mode == 2) { 3625 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3626 } else 3627 #endif /* KMP_GOMP_COMPAT */ 3628 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3629 3630 (*f)(lhs, lhs, rhs); 3631 3632 #ifdef KMP_GOMP_COMPAT 3633 if (__kmp_atomic_mode == 2) { 3634 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3635 } else 3636 #endif /* KMP_GOMP_COMPAT */ 3637 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3638 } 3639 3640 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3641 void (*f)(void *, void *, void *)) { 3642 KMP_DEBUG_ASSERT(__kmp_init_serial); 3643 3644 #ifdef KMP_GOMP_COMPAT 3645 if (__kmp_atomic_mode == 2) { 3646 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3647 } else 3648 #endif /* KMP_GOMP_COMPAT */ 3649 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3650 3651 (*f)(lhs, lhs, rhs); 3652 3653 #ifdef KMP_GOMP_COMPAT 3654 if (__kmp_atomic_mode == 2) { 3655 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3656 } else 3657 #endif /* KMP_GOMP_COMPAT */ 3658 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3659 } 3660 3661 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3662 void (*f)(void *, void *, void *)) { 3663 KMP_DEBUG_ASSERT(__kmp_init_serial); 3664 3665 #ifdef KMP_GOMP_COMPAT 3666 if (__kmp_atomic_mode == 2) { 3667 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3668 } else 3669 #endif /* KMP_GOMP_COMPAT */ 3670 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3671 3672 (*f)(lhs, lhs, rhs); 3673 3674 #ifdef KMP_GOMP_COMPAT 3675 if (__kmp_atomic_mode == 2) { 3676 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3677 } else 3678 #endif /* KMP_GOMP_COMPAT */ 3679 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3680 } 3681 3682 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3683 // compiler; duplicated in order to not use 3-party names in pure Intel code 3684 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3685 void __kmpc_atomic_start(void) { 3686 int gtid = __kmp_entry_gtid(); 3687 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3688 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3689 } 3690 3691 void __kmpc_atomic_end(void) { 3692 int gtid = __kmp_get_gtid(); 3693 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3694 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3695 } 3696 3697 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3698 3699 // OpenMP 5.1 compare and swap 3700 3701 /*! 3702 @param loc Source code location 3703 @param gtid Global thread id 3704 @param x Memory location to operate on 3705 @param e Expected value 3706 @param d Desired value 3707 @return Result of comparison 3708 3709 Implements Compare And Swap atomic operation. 3710 3711 Sample code: 3712 #pragma omp atomic compare update capture 3713 { r = x == e; if(r) { x = d; } } 3714 */ 3715 bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) { 3716 return KMP_COMPARE_AND_STORE_ACQ8(x, e, d); 3717 } 3718 bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e, 3719 short d) { 3720 return KMP_COMPARE_AND_STORE_ACQ16(x, e, d); 3721 } 3722 bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e, 3723 kmp_int32 d) { 3724 return KMP_COMPARE_AND_STORE_ACQ32(x, e, d); 3725 } 3726 bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e, 3727 kmp_int64 d) { 3728 return KMP_COMPARE_AND_STORE_ACQ64(x, e, d); 3729 } 3730 3731 /*! 3732 @param loc Source code location 3733 @param gtid Global thread id 3734 @param x Memory location to operate on 3735 @param e Expected value 3736 @param d Desired value 3737 @return Old value of x 3738 3739 Implements Compare And Swap atomic operation. 3740 3741 Sample code: 3742 #pragma omp atomic compare update capture 3743 { v = x; if (x == e) { x = d; } } 3744 */ 3745 char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) { 3746 return KMP_COMPARE_AND_STORE_RET8(x, e, d); 3747 } 3748 short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e, 3749 short d) { 3750 return KMP_COMPARE_AND_STORE_RET16(x, e, d); 3751 } 3752 kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x, 3753 kmp_int32 e, kmp_int32 d) { 3754 return KMP_COMPARE_AND_STORE_RET32(x, e, d); 3755 } 3756 kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x, 3757 kmp_int64 e, kmp_int64 d) { 3758 return KMP_COMPARE_AND_STORE_RET64(x, e, d); 3759 } 3760 3761 /*! 3762 @param loc Source code location 3763 @param gtid Global thread id 3764 @param x Memory location to operate on 3765 @param e Expected value 3766 @param d Desired value 3767 @param pv Captured value location 3768 @return Result of comparison 3769 3770 Implements Compare And Swap + Capture atomic operation. 3771 3772 v gets old valie of x if comparison failed, untouched otherwise. 3773 Sample code: 3774 #pragma omp atomic compare update capture 3775 { r = x == e; if(r) { x = d; } else { v = x; } } 3776 */ 3777 bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e, 3778 char d, char *pv) { 3779 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d); 3780 if (old == e) 3781 return true; 3782 KMP_ASSERT(pv != NULL); 3783 *pv = old; 3784 return false; 3785 } 3786 bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e, 3787 short d, short *pv) { 3788 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d); 3789 if (old == e) 3790 return true; 3791 KMP_ASSERT(pv != NULL); 3792 *pv = old; 3793 return false; 3794 } 3795 bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x, 3796 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) { 3797 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d); 3798 if (old == e) 3799 return true; 3800 KMP_ASSERT(pv != NULL); 3801 *pv = old; 3802 return false; 3803 } 3804 bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x, 3805 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) { 3806 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d); 3807 if (old == e) 3808 return true; 3809 KMP_ASSERT(pv != NULL); 3810 *pv = old; 3811 return false; 3812 } 3813 3814 /*! 3815 @param loc Source code location 3816 @param gtid Global thread id 3817 @param x Memory location to operate on 3818 @param e Expected value 3819 @param d Desired value 3820 @param pv Captured value location 3821 @return Old value of x 3822 3823 Implements Compare And Swap + Capture atomic operation. 3824 3825 v gets new valie of x. 3826 Sample code: 3827 #pragma omp atomic compare update capture 3828 { if (x == e) { x = d; }; v = x; } 3829 */ 3830 char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e, 3831 char d, char *pv) { 3832 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d); 3833 KMP_ASSERT(pv != NULL); 3834 *pv = old == e ? d : old; 3835 return old; 3836 } 3837 short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e, 3838 short d, short *pv) { 3839 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d); 3840 KMP_ASSERT(pv != NULL); 3841 *pv = old == e ? d : old; 3842 return old; 3843 } 3844 kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x, 3845 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) { 3846 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d); 3847 KMP_ASSERT(pv != NULL); 3848 *pv = old == e ? d : old; 3849 return old; 3850 } 3851 kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x, 3852 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) { 3853 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d); 3854 KMP_ASSERT(pv != NULL); 3855 *pv = old == e ? d : old; 3856 return old; 3857 } 3858 3859 // End OpenMP 5.1 compare + capture 3860 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3861 3862 /*! 3863 @} 3864 */ 3865 3866 // end of file 3867