1// z_Linux_asm.S: - microtasking routines specifically 2// written for Intel platforms running Linux* OS 3 4// 5////===----------------------------------------------------------------------===// 6//// 7//// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8//// See https://llvm.org/LICENSE.txt for license information. 9//// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10//// 11////===----------------------------------------------------------------------===// 12// 13 14// ----------------------------------------------------------------------- 15// macros 16// ----------------------------------------------------------------------- 17 18#include "kmp_config.h" 19 20#if KMP_ARCH_X86 || KMP_ARCH_X86_64 21 22# if defined(__ELF__) && defined(__CET__) && defined(__has_include) 23# if __has_include(<cet.h>) 24# include <cet.h> 25# endif 26# endif 27 28# if !defined(_CET_ENDBR) 29# define _CET_ENDBR 30# endif 31 32# if KMP_MIC 33// the 'delay r16/r32/r64' should be used instead of the 'pause'. 34// The delay operation has the effect of removing the current thread from 35// the round-robin HT mechanism, and therefore speeds up the issue rate of 36// the other threads on the same core. 37// 38// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 39// barrier time to increase greatly for 3 or more threads per core. 40// 41// A value of 100 works pretty well for up to 4 threads per core, but isn't 42// quite as fast as 0 for 2 threads per core. 43// 44// We need to check what happens for oversubscription / > 4 threads per core. 45// It is possible that we need to pass the delay value in as a parameter 46// that the caller determines based on the total # threads / # cores. 47// 48//.macro pause_op 49// mov $100, %rax 50// delay %rax 51//.endm 52# else 53# define pause_op .byte 0xf3,0x90 54# endif // KMP_MIC 55 56# if KMP_OS_DARWIN 57# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 58# define KMP_LABEL(x) L_##x // form the name of label 59.macro KMP_CFI_DEF_OFFSET 60.endmacro 61.macro KMP_CFI_OFFSET 62.endmacro 63.macro KMP_CFI_REGISTER 64.endmacro 65.macro KMP_CFI_DEF 66.endmacro 67.macro ALIGN 68 .align $0 69.endmacro 70.macro DEBUG_INFO 71/* Not sure what .size does in icc, not sure if we need to do something 72 similar for OS X*. 73*/ 74.endmacro 75.macro PROC 76 ALIGN 4 77 .globl KMP_PREFIX_UNDERSCORE($0) 78KMP_PREFIX_UNDERSCORE($0): 79 _CET_ENDBR 80.endmacro 81# else // KMP_OS_DARWIN 82# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols 83// Format labels so that they don't override function names in gdb's backtraces 84// MIC assembler doesn't accept .L syntax, the L works fine there (as well as 85// on OS X*) 86# if KMP_MIC 87# define KMP_LABEL(x) L_##x // local label 88# else 89# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 90# endif // KMP_MIC 91.macro ALIGN size 92 .align 1<<(\size) 93.endm 94.macro DEBUG_INFO proc 95 .cfi_endproc 96// Not sure why we need .type and .size for the functions 97 .align 16 98 .type \proc,@function 99 .size \proc,.-\proc 100.endm 101.macro PROC proc 102 ALIGN 4 103 .globl KMP_PREFIX_UNDERSCORE(\proc) 104KMP_PREFIX_UNDERSCORE(\proc): 105 .cfi_startproc 106 _CET_ENDBR 107.endm 108.macro KMP_CFI_DEF_OFFSET sz 109 .cfi_def_cfa_offset \sz 110.endm 111.macro KMP_CFI_OFFSET reg, sz 112 .cfi_offset \reg,\sz 113.endm 114.macro KMP_CFI_REGISTER reg 115 .cfi_def_cfa_register \reg 116.endm 117.macro KMP_CFI_DEF reg, sz 118 .cfi_def_cfa \reg,\sz 119.endm 120# endif // KMP_OS_DARWIN 121#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 122 123#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) 124 125# if KMP_OS_DARWIN 126# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 127# define KMP_LABEL(x) L_##x // form the name of label 128 129.macro ALIGN 130 .align $0 131.endmacro 132 133.macro DEBUG_INFO 134/* Not sure what .size does in icc, not sure if we need to do something 135 similar for OS X*. 136*/ 137.endmacro 138 139.macro PROC 140 ALIGN 4 141 .globl KMP_PREFIX_UNDERSCORE($0) 142KMP_PREFIX_UNDERSCORE($0): 143.endmacro 144# elif KMP_OS_WINDOWS 145# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols 146// Format labels so that they don't override function names in gdb's backtraces 147# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 148 149.macro ALIGN size 150 .align 1<<(\size) 151.endm 152 153.macro DEBUG_INFO proc 154 ALIGN 2 155.endm 156 157.macro PROC proc 158 ALIGN 2 159 .globl KMP_PREFIX_UNDERSCORE(\proc) 160KMP_PREFIX_UNDERSCORE(\proc): 161.endm 162# else // KMP_OS_DARWIN || KMP_OS_WINDOWS 163# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols 164// Format labels so that they don't override function names in gdb's backtraces 165# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 166 167.macro ALIGN size 168 .align 1<<(\size) 169.endm 170 171.macro DEBUG_INFO proc 172 .cfi_endproc 173// Not sure why we need .type and .size for the functions 174 ALIGN 2 175#if KMP_ARCH_ARM 176 .type \proc,%function 177#else 178 .type \proc,@function 179#endif 180 .size \proc,.-\proc 181.endm 182 183.macro PROC proc 184 ALIGN 2 185 .globl KMP_PREFIX_UNDERSCORE(\proc) 186KMP_PREFIX_UNDERSCORE(\proc): 187 .cfi_startproc 188.endm 189# endif // KMP_OS_DARWIN 190 191# if KMP_OS_LINUX 192// BTI and PAC gnu property note 193# define NT_GNU_PROPERTY_TYPE_0 5 194# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 195# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 196# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 197 198# define GNU_PROPERTY(type, value) \ 199 .pushsection .note.gnu.property, "a"; \ 200 .p2align 3; \ 201 .word 4; \ 202 .word 16; \ 203 .word NT_GNU_PROPERTY_TYPE_0; \ 204 .asciz "GNU"; \ 205 .word type; \ 206 .word 4; \ 207 .word value; \ 208 .word 0; \ 209 .popsection 210# endif 211 212# if defined(__ARM_FEATURE_BTI_DEFAULT) 213# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI 214# else 215# define BTI_FLAG 0 216# endif 217# if __ARM_FEATURE_PAC_DEFAULT & 3 218# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC 219# else 220# define PAC_FLAG 0 221# endif 222 223# if (BTI_FLAG | PAC_FLAG) != 0 224# if PAC_FLAG != 0 225# define PACBTI_C hint #25 226# define PACBTI_RET hint #29 227# else 228# define PACBTI_C hint #34 229# define PACBTI_RET 230# endif 231# define GNU_PROPERTY_BTI_PAC \ 232 GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) 233# else 234# define PACBTI_C 235# define PACBTI_RET 236# define GNU_PROPERTY_BTI_PAC 237# endif 238#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) 239 240.macro COMMON name, size, align_power 241#if KMP_OS_DARWIN 242 .comm \name, \size 243#elif KMP_OS_WINDOWS 244 .comm \name, \size, \align_power 245#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS 246 .comm \name, \size, (1<<(\align_power)) 247#endif 248.endm 249 250// ----------------------------------------------------------------------- 251// data 252// ----------------------------------------------------------------------- 253 254#ifdef KMP_GOMP_COMPAT 255 256// Support for unnamed common blocks. 257// 258// Because the symbol ".gomp_critical_user_" contains a ".", we have to 259// put this stuff in assembly. 260 261# if KMP_ARCH_X86 262# if KMP_OS_DARWIN 263 .data 264 .comm .gomp_critical_user_,32 265 .data 266 .globl ___kmp_unnamed_critical_addr 267___kmp_unnamed_critical_addr: 268 .long .gomp_critical_user_ 269# else /* Linux* OS */ 270 .data 271 .comm .gomp_critical_user_,32,8 272 .data 273 ALIGN 4 274 .global __kmp_unnamed_critical_addr 275__kmp_unnamed_critical_addr: 276 .4byte .gomp_critical_user_ 277 .type __kmp_unnamed_critical_addr,@object 278 .size __kmp_unnamed_critical_addr,4 279# endif /* KMP_OS_DARWIN */ 280# endif /* KMP_ARCH_X86 */ 281 282# if KMP_ARCH_X86_64 283# if KMP_OS_DARWIN 284 .data 285 .comm .gomp_critical_user_,32 286 .data 287 .globl ___kmp_unnamed_critical_addr 288___kmp_unnamed_critical_addr: 289 .quad .gomp_critical_user_ 290# else /* Linux* OS */ 291 .data 292 .comm .gomp_critical_user_,32,8 293 .data 294 ALIGN 8 295 .global __kmp_unnamed_critical_addr 296__kmp_unnamed_critical_addr: 297 .8byte .gomp_critical_user_ 298 .type __kmp_unnamed_critical_addr,@object 299 .size __kmp_unnamed_critical_addr,8 300# endif /* KMP_OS_DARWIN */ 301# endif /* KMP_ARCH_X86_64 */ 302 303#endif /* KMP_GOMP_COMPAT */ 304 305 306#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 307 308// ----------------------------------------------------------------------- 309// microtasking routines specifically written for IA-32 architecture 310// running Linux* OS 311// ----------------------------------------------------------------------- 312 313 .ident "Intel Corporation" 314 .data 315 ALIGN 4 316// void 317// __kmp_x86_pause( void ); 318 319 .text 320 PROC __kmp_x86_pause 321 322 pause_op 323 ret 324 325 DEBUG_INFO __kmp_x86_pause 326 327# if !KMP_ASM_INTRINS 328 329//------------------------------------------------------------------------ 330// kmp_int32 331// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 332 333 PROC __kmp_test_then_add32 334 335 movl 4(%esp), %ecx 336 movl 8(%esp), %eax 337 lock 338 xaddl %eax,(%ecx) 339 ret 340 341 DEBUG_INFO __kmp_test_then_add32 342 343//------------------------------------------------------------------------ 344// FUNCTION __kmp_xchg_fixed8 345// 346// kmp_int32 347// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 348// 349// parameters: 350// p: 4(%esp) 351// d: 8(%esp) 352// 353// return: %al 354 PROC __kmp_xchg_fixed8 355 356 movl 4(%esp), %ecx // "p" 357 movb 8(%esp), %al // "d" 358 359 lock 360 xchgb %al,(%ecx) 361 ret 362 363 DEBUG_INFO __kmp_xchg_fixed8 364 365 366//------------------------------------------------------------------------ 367// FUNCTION __kmp_xchg_fixed16 368// 369// kmp_int16 370// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 371// 372// parameters: 373// p: 4(%esp) 374// d: 8(%esp) 375// return: %ax 376 PROC __kmp_xchg_fixed16 377 378 movl 4(%esp), %ecx // "p" 379 movw 8(%esp), %ax // "d" 380 381 lock 382 xchgw %ax,(%ecx) 383 ret 384 385 DEBUG_INFO __kmp_xchg_fixed16 386 387 388//------------------------------------------------------------------------ 389// FUNCTION __kmp_xchg_fixed32 390// 391// kmp_int32 392// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 393// 394// parameters: 395// p: 4(%esp) 396// d: 8(%esp) 397// 398// return: %eax 399 PROC __kmp_xchg_fixed32 400 401 movl 4(%esp), %ecx // "p" 402 movl 8(%esp), %eax // "d" 403 404 lock 405 xchgl %eax,(%ecx) 406 ret 407 408 DEBUG_INFO __kmp_xchg_fixed32 409 410 411// kmp_int8 412// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 413 PROC __kmp_compare_and_store8 414 415 movl 4(%esp), %ecx 416 movb 8(%esp), %al 417 movb 12(%esp), %dl 418 lock 419 cmpxchgb %dl,(%ecx) 420 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 421 and $1, %eax // sign extend previous instruction 422 ret 423 424 DEBUG_INFO __kmp_compare_and_store8 425 426// kmp_int16 427// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); 428 PROC __kmp_compare_and_store16 429 430 movl 4(%esp), %ecx 431 movw 8(%esp), %ax 432 movw 12(%esp), %dx 433 lock 434 cmpxchgw %dx,(%ecx) 435 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 436 and $1, %eax // sign extend previous instruction 437 ret 438 439 DEBUG_INFO __kmp_compare_and_store16 440 441// kmp_int32 442// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); 443 PROC __kmp_compare_and_store32 444 445 movl 4(%esp), %ecx 446 movl 8(%esp), %eax 447 movl 12(%esp), %edx 448 lock 449 cmpxchgl %edx,(%ecx) 450 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 451 and $1, %eax // sign extend previous instruction 452 ret 453 454 DEBUG_INFO __kmp_compare_and_store32 455 456// kmp_int32 457// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); 458 PROC __kmp_compare_and_store64 459 460 pushl %ebp 461 movl %esp, %ebp 462 pushl %ebx 463 pushl %edi 464 movl 8(%ebp), %edi 465 movl 12(%ebp), %eax // "cv" low order word 466 movl 16(%ebp), %edx // "cv" high order word 467 movl 20(%ebp), %ebx // "sv" low order word 468 movl 24(%ebp), %ecx // "sv" high order word 469 lock 470 cmpxchg8b (%edi) 471 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 472 and $1, %eax // sign extend previous instruction 473 popl %edi 474 popl %ebx 475 movl %ebp, %esp 476 popl %ebp 477 ret 478 479 DEBUG_INFO __kmp_compare_and_store64 480 481// kmp_int8 482// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); 483 PROC __kmp_compare_and_store_ret8 484 485 movl 4(%esp), %ecx 486 movb 8(%esp), %al 487 movb 12(%esp), %dl 488 lock 489 cmpxchgb %dl,(%ecx) 490 ret 491 492 DEBUG_INFO __kmp_compare_and_store_ret8 493 494// kmp_int16 495// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, 496// kmp_int16 sv); 497 PROC __kmp_compare_and_store_ret16 498 499 movl 4(%esp), %ecx 500 movw 8(%esp), %ax 501 movw 12(%esp), %dx 502 lock 503 cmpxchgw %dx,(%ecx) 504 ret 505 506 DEBUG_INFO __kmp_compare_and_store_ret16 507 508// kmp_int32 509// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, 510// kmp_int32 sv); 511 PROC __kmp_compare_and_store_ret32 512 513 movl 4(%esp), %ecx 514 movl 8(%esp), %eax 515 movl 12(%esp), %edx 516 lock 517 cmpxchgl %edx,(%ecx) 518 ret 519 520 DEBUG_INFO __kmp_compare_and_store_ret32 521 522// kmp_int64 523// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, 524// kmp_int64 sv); 525 PROC __kmp_compare_and_store_ret64 526 527 pushl %ebp 528 movl %esp, %ebp 529 pushl %ebx 530 pushl %edi 531 movl 8(%ebp), %edi 532 movl 12(%ebp), %eax // "cv" low order word 533 movl 16(%ebp), %edx // "cv" high order word 534 movl 20(%ebp), %ebx // "sv" low order word 535 movl 24(%ebp), %ecx // "sv" high order word 536 lock 537 cmpxchg8b (%edi) 538 popl %edi 539 popl %ebx 540 movl %ebp, %esp 541 popl %ebp 542 ret 543 544 DEBUG_INFO __kmp_compare_and_store_ret64 545 546 547//------------------------------------------------------------------------ 548// FUNCTION __kmp_xchg_real32 549// 550// kmp_real32 551// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 552// 553// parameters: 554// addr: 4(%esp) 555// data: 8(%esp) 556// 557// return: %eax 558 PROC __kmp_xchg_real32 559 560 pushl %ebp 561 movl %esp, %ebp 562 subl $4, %esp 563 pushl %esi 564 565 movl 4(%ebp), %esi 566 flds (%esi) 567 // load <addr> 568 fsts -4(%ebp) 569 // store old value 570 571 movl 8(%ebp), %eax 572 573 lock 574 xchgl %eax, (%esi) 575 576 flds -4(%ebp) 577 // return old value 578 579 popl %esi 580 movl %ebp, %esp 581 popl %ebp 582 ret 583 584 DEBUG_INFO __kmp_xchg_real32 585 586# endif /* !KMP_ASM_INTRINS */ 587 588//------------------------------------------------------------------------ 589// int 590// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 591// int gtid, int tid, 592// int argc, void *p_argv[] 593// #if OMPT_SUPPORT 594// , 595// void **exit_frame_ptr 596// #endif 597// ) { 598// #if OMPT_SUPPORT 599// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 600// #endif 601// 602// (*pkfn)( & gtid, & tid, argv[0], ... ); 603// return 1; 604// } 605 606// -- Begin __kmp_invoke_microtask 607// mark_begin; 608 PROC __kmp_invoke_microtask 609 610 pushl %ebp 611 KMP_CFI_DEF_OFFSET 8 612 KMP_CFI_OFFSET ebp,-8 613 movl %esp,%ebp // establish the base pointer for this routine. 614 KMP_CFI_REGISTER ebp 615 subl $8,%esp // allocate space for two local variables. 616 // These varibales are: 617 // argv: -4(%ebp) 618 // temp: -8(%ebp) 619 // 620 pushl %ebx // save %ebx to use during this routine 621 // 622#if OMPT_SUPPORT 623 movl 28(%ebp),%ebx // get exit_frame address 624 movl %ebp,(%ebx) // save exit_frame 625#endif 626 627 movl 20(%ebp),%ebx // Stack alignment - # args 628 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) 629 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 630 movl %esp,%eax // 631 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 632 movl %eax,%ebx // Save to %ebx 633 andl $0xFFFFFF80,%eax // mask off 7 bits 634 subl %eax,%ebx // Amount to subtract from %esp 635 subl %ebx,%esp // Prepare the stack ptr -- 636 // now it will be aligned on 128-byte boundary at the call 637 638 movl 24(%ebp),%eax // copy from p_argv[] 639 movl %eax,-4(%ebp) // into the local variable *argv. 640 641 movl 20(%ebp),%ebx // argc is 20(%ebp) 642 shll $2,%ebx 643 644KMP_LABEL(invoke_2): 645 cmpl $0,%ebx 646 jg KMP_LABEL(invoke_4) 647 jmp KMP_LABEL(invoke_3) 648 ALIGN 2 649KMP_LABEL(invoke_4): 650 movl -4(%ebp),%eax 651 subl $4,%ebx // decrement argc. 652 addl %ebx,%eax // index into argv. 653 movl (%eax),%edx 654 pushl %edx 655 656 jmp KMP_LABEL(invoke_2) 657 ALIGN 2 658KMP_LABEL(invoke_3): 659 leal 16(%ebp),%eax // push & tid 660 pushl %eax 661 662 leal 12(%ebp),%eax // push & gtid 663 pushl %eax 664 665 movl 8(%ebp),%ebx 666 call *%ebx // call (*pkfn)(); 667 668 movl $1,%eax // return 1; 669 670 movl -12(%ebp),%ebx // restore %ebx 671 leave 672 KMP_CFI_DEF esp,4 673 ret 674 675 DEBUG_INFO __kmp_invoke_microtask 676// -- End __kmp_invoke_microtask 677 678 679// kmp_uint64 680// __kmp_hardware_timestamp(void) 681 PROC __kmp_hardware_timestamp 682 rdtsc 683 ret 684 685 DEBUG_INFO __kmp_hardware_timestamp 686// -- End __kmp_hardware_timestamp 687 688#endif /* KMP_ARCH_X86 */ 689 690 691#if KMP_ARCH_X86_64 692 693// ----------------------------------------------------------------------- 694// microtasking routines specifically written for IA-32 architecture and 695// Intel(R) 64 running Linux* OS 696// ----------------------------------------------------------------------- 697 698// -- Machine type P 699// mark_description "Intel Corporation"; 700 .ident "Intel Corporation" 701// -- .file "z_Linux_asm.S" 702 .data 703 ALIGN 4 704 705// To prevent getting our code into .data section .text added to every routine 706// definition for x86_64. 707//------------------------------------------------------------------------ 708# if !KMP_ASM_INTRINS 709 710//------------------------------------------------------------------------ 711// FUNCTION __kmp_test_then_add32 712// 713// kmp_int32 714// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 715// 716// parameters: 717// p: %rdi 718// d: %esi 719// 720// return: %eax 721 .text 722 PROC __kmp_test_then_add32 723 724 movl %esi, %eax // "d" 725 lock 726 xaddl %eax,(%rdi) 727 ret 728 729 DEBUG_INFO __kmp_test_then_add32 730 731 732//------------------------------------------------------------------------ 733// FUNCTION __kmp_test_then_add64 734// 735// kmp_int64 736// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 737// 738// parameters: 739// p: %rdi 740// d: %rsi 741// return: %rax 742 .text 743 PROC __kmp_test_then_add64 744 745 movq %rsi, %rax // "d" 746 lock 747 xaddq %rax,(%rdi) 748 ret 749 750 DEBUG_INFO __kmp_test_then_add64 751 752 753//------------------------------------------------------------------------ 754// FUNCTION __kmp_xchg_fixed8 755// 756// kmp_int32 757// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 758// 759// parameters: 760// p: %rdi 761// d: %sil 762// 763// return: %al 764 .text 765 PROC __kmp_xchg_fixed8 766 767 movb %sil, %al // "d" 768 769 lock 770 xchgb %al,(%rdi) 771 ret 772 773 DEBUG_INFO __kmp_xchg_fixed8 774 775 776//------------------------------------------------------------------------ 777// FUNCTION __kmp_xchg_fixed16 778// 779// kmp_int16 780// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 781// 782// parameters: 783// p: %rdi 784// d: %si 785// return: %ax 786 .text 787 PROC __kmp_xchg_fixed16 788 789 movw %si, %ax // "d" 790 791 lock 792 xchgw %ax,(%rdi) 793 ret 794 795 DEBUG_INFO __kmp_xchg_fixed16 796 797 798//------------------------------------------------------------------------ 799// FUNCTION __kmp_xchg_fixed32 800// 801// kmp_int32 802// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 803// 804// parameters: 805// p: %rdi 806// d: %esi 807// 808// return: %eax 809 .text 810 PROC __kmp_xchg_fixed32 811 812 movl %esi, %eax // "d" 813 814 lock 815 xchgl %eax,(%rdi) 816 ret 817 818 DEBUG_INFO __kmp_xchg_fixed32 819 820 821//------------------------------------------------------------------------ 822// FUNCTION __kmp_xchg_fixed64 823// 824// kmp_int64 825// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 826// 827// parameters: 828// p: %rdi 829// d: %rsi 830// return: %rax 831 .text 832 PROC __kmp_xchg_fixed64 833 834 movq %rsi, %rax // "d" 835 836 lock 837 xchgq %rax,(%rdi) 838 ret 839 840 DEBUG_INFO __kmp_xchg_fixed64 841 842 843//------------------------------------------------------------------------ 844// FUNCTION __kmp_compare_and_store8 845// 846// kmp_int8 847// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 848// 849// parameters: 850// p: %rdi 851// cv: %esi 852// sv: %edx 853// 854// return: %eax 855 .text 856 PROC __kmp_compare_and_store8 857 858 movb %sil, %al // "cv" 859 lock 860 cmpxchgb %dl,(%rdi) 861 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 862 andq $1, %rax // sign extend previous instruction for return value 863 ret 864 865 DEBUG_INFO __kmp_compare_and_store8 866 867 868//------------------------------------------------------------------------ 869// FUNCTION __kmp_compare_and_store16 870// 871// kmp_int16 872// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 873// 874// parameters: 875// p: %rdi 876// cv: %si 877// sv: %dx 878// 879// return: %eax 880 .text 881 PROC __kmp_compare_and_store16 882 883 movw %si, %ax // "cv" 884 lock 885 cmpxchgw %dx,(%rdi) 886 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 887 andq $1, %rax // sign extend previous instruction for return value 888 ret 889 890 DEBUG_INFO __kmp_compare_and_store16 891 892 893//------------------------------------------------------------------------ 894// FUNCTION __kmp_compare_and_store32 895// 896// kmp_int32 897// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 898// 899// parameters: 900// p: %rdi 901// cv: %esi 902// sv: %edx 903// 904// return: %eax 905 .text 906 PROC __kmp_compare_and_store32 907 908 movl %esi, %eax // "cv" 909 lock 910 cmpxchgl %edx,(%rdi) 911 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 912 andq $1, %rax // sign extend previous instruction for return value 913 ret 914 915 DEBUG_INFO __kmp_compare_and_store32 916 917 918//------------------------------------------------------------------------ 919// FUNCTION __kmp_compare_and_store64 920// 921// kmp_int32 922// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 923// 924// parameters: 925// p: %rdi 926// cv: %rsi 927// sv: %rdx 928// return: %eax 929 .text 930 PROC __kmp_compare_and_store64 931 932 movq %rsi, %rax // "cv" 933 lock 934 cmpxchgq %rdx,(%rdi) 935 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 936 andq $1, %rax // sign extend previous instruction for return value 937 ret 938 939 DEBUG_INFO __kmp_compare_and_store64 940 941//------------------------------------------------------------------------ 942// FUNCTION __kmp_compare_and_store_ret8 943// 944// kmp_int8 945// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 946// 947// parameters: 948// p: %rdi 949// cv: %esi 950// sv: %edx 951// 952// return: %eax 953 .text 954 PROC __kmp_compare_and_store_ret8 955 956 movb %sil, %al // "cv" 957 lock 958 cmpxchgb %dl,(%rdi) 959 ret 960 961 DEBUG_INFO __kmp_compare_and_store_ret8 962 963 964//------------------------------------------------------------------------ 965// FUNCTION __kmp_compare_and_store_ret16 966// 967// kmp_int16 968// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 969// 970// parameters: 971// p: %rdi 972// cv: %si 973// sv: %dx 974// 975// return: %eax 976 .text 977 PROC __kmp_compare_and_store_ret16 978 979 movw %si, %ax // "cv" 980 lock 981 cmpxchgw %dx,(%rdi) 982 ret 983 984 DEBUG_INFO __kmp_compare_and_store_ret16 985 986 987//------------------------------------------------------------------------ 988// FUNCTION __kmp_compare_and_store_ret32 989// 990// kmp_int32 991// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 992// 993// parameters: 994// p: %rdi 995// cv: %esi 996// sv: %edx 997// 998// return: %eax 999 .text 1000 PROC __kmp_compare_and_store_ret32 1001 1002 movl %esi, %eax // "cv" 1003 lock 1004 cmpxchgl %edx,(%rdi) 1005 ret 1006 1007 DEBUG_INFO __kmp_compare_and_store_ret32 1008 1009 1010//------------------------------------------------------------------------ 1011// FUNCTION __kmp_compare_and_store_ret64 1012// 1013// kmp_int64 1014// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 1015// 1016// parameters: 1017// p: %rdi 1018// cv: %rsi 1019// sv: %rdx 1020// return: %eax 1021 .text 1022 PROC __kmp_compare_and_store_ret64 1023 1024 movq %rsi, %rax // "cv" 1025 lock 1026 cmpxchgq %rdx,(%rdi) 1027 ret 1028 1029 DEBUG_INFO __kmp_compare_and_store_ret64 1030 1031# endif /* !KMP_ASM_INTRINS */ 1032 1033 1034# if !KMP_MIC 1035 1036# if !KMP_ASM_INTRINS 1037 1038//------------------------------------------------------------------------ 1039// FUNCTION __kmp_xchg_real32 1040// 1041// kmp_real32 1042// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 1043// 1044// parameters: 1045// addr: %rdi 1046// data: %xmm0 (lower 4 bytes) 1047// 1048// return: %xmm0 (lower 4 bytes) 1049 .text 1050 PROC __kmp_xchg_real32 1051 1052 movd %xmm0, %eax // load "data" to eax 1053 1054 lock 1055 xchgl %eax, (%rdi) 1056 1057 movd %eax, %xmm0 // load old value into return register 1058 1059 ret 1060 1061 DEBUG_INFO __kmp_xchg_real32 1062 1063 1064//------------------------------------------------------------------------ 1065// FUNCTION __kmp_xchg_real64 1066// 1067// kmp_real64 1068// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 1069// 1070// parameters: 1071// addr: %rdi 1072// data: %xmm0 (lower 8 bytes) 1073// return: %xmm0 (lower 8 bytes) 1074 .text 1075 PROC __kmp_xchg_real64 1076 1077 movd %xmm0, %rax // load "data" to rax 1078 1079 lock 1080 xchgq %rax, (%rdi) 1081 1082 movd %rax, %xmm0 // load old value into return register 1083 ret 1084 1085 DEBUG_INFO __kmp_xchg_real64 1086 1087 1088# endif /* !KMP_MIC */ 1089 1090# endif /* !KMP_ASM_INTRINS */ 1091 1092//------------------------------------------------------------------------ 1093// int 1094// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1095// int gtid, int tid, 1096// int argc, void *p_argv[] 1097// #if OMPT_SUPPORT 1098// , 1099// void **exit_frame_ptr 1100// #endif 1101// ) { 1102// #if OMPT_SUPPORT 1103// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1104// #endif 1105// 1106// (*pkfn)( & gtid, & tid, argv[0], ... ); 1107// return 1; 1108// } 1109// 1110// note: at call to pkfn must have %rsp 128-byte aligned for compiler 1111// 1112// parameters: 1113// %rdi: pkfn 1114// %esi: gtid 1115// %edx: tid 1116// %ecx: argc 1117// %r8: p_argv 1118// %r9: &exit_frame 1119// 1120// locals: 1121// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1122// __tid: tid parm pushed on stack so can pass &tid to pkfn 1123// 1124// reg temps: 1125// %rax: used all over the place 1126// %rdx: used in stack pointer alignment calculation 1127// %r11: used to traverse p_argv array 1128// %rsi: used as temporary for stack parameters 1129// used as temporary for number of pkfn parms to push 1130// %rbx: used to hold pkfn address, and zero constant, callee-save 1131// 1132// return: %eax (always 1/TRUE) 1133__gtid = -16 1134__tid = -24 1135 1136// -- Begin __kmp_invoke_microtask 1137// mark_begin; 1138 .text 1139 PROC __kmp_invoke_microtask 1140 1141 pushq %rbp // save base pointer 1142 KMP_CFI_DEF_OFFSET 16 1143 KMP_CFI_OFFSET rbp,-16 1144 movq %rsp,%rbp // establish the base pointer for this routine. 1145 KMP_CFI_REGISTER rbp 1146 1147#if OMPT_SUPPORT 1148 movq %rbp, (%r9) // save exit_frame 1149#endif 1150 1151 pushq %rbx // %rbx is callee-saved register 1152 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn 1153 pushq %rdx // Put tid on stack so can pass &tid to pkfn 1154 1155 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax 1156 movq $0, %rbx // constant for cmovs later 1157 subq $4, %rax // subtract four args passed in registers to pkfn 1158#if KMP_MIC 1159 js KMP_LABEL(kmp_0) // jump to movq 1160 jmp KMP_LABEL(kmp_0_exit) // jump ahead 1161KMP_LABEL(kmp_0): 1162 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1163KMP_LABEL(kmp_0_exit): 1164#else 1165 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1166#endif // KMP_MIC 1167 1168 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later 1169 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 1170 1171 movq %rsp, %rdx // 1172 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- 1173 // without align, stack ptr would be this 1174 movq %rdx, %rax // Save to %rax 1175 1176 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) 1177 subq %rax, %rdx // Amount to subtract from %rsp 1178 subq %rdx, %rsp // Prepare the stack ptr -- 1179 // now %rsp will align to 128-byte boundary at call site 1180 1181 // setup pkfn parameter reg and stack 1182 movq %rcx, %rax // argc -> %rax 1183 cmpq $0, %rsi 1184 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push 1185 shlq $3, %rcx // argc*8 -> %rcx 1186 movq %r8, %rdx // p_argv -> %rdx 1187 addq %rcx, %rdx // &p_argv[argc] -> %rdx 1188 1189 movq %rsi, %rcx // max (0, argc-4) -> %rcx 1190 1191KMP_LABEL(kmp_invoke_push_parms): 1192 // push nth - 7th parms to pkfn on stack 1193 subq $8, %rdx // decrement p_argv pointer to previous parm 1194 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi 1195 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) 1196 subl $1, %ecx 1197 1198// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 1199// if the name of the label that is an operand of this jecxz starts with a dot ("."); 1200// Apple's linker does not support 1-byte length relocation; 1201// Resolution: replace all .labelX entries with L_labelX. 1202 1203 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left 1204 jmp KMP_LABEL(kmp_invoke_push_parms) 1205 ALIGN 3 1206KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. 1207 // order here is important to avoid trashing 1208 // registers used for both input and output parms! 1209 movq %rdi, %rbx // pkfn -> %rbx 1210 leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) 1211 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) 1212 // Check if argc is 0 1213 cmpq $0, %rax 1214 je KMP_LABEL(kmp_no_args) // Jump ahead 1215 1216 movq %r8, %r11 // p_argv -> %r11 1217 1218#if KMP_MIC 1219 cmpq $4, %rax // argc >= 4? 1220 jns KMP_LABEL(kmp_4) // jump to movq 1221 jmp KMP_LABEL(kmp_4_exit) // jump ahead 1222KMP_LABEL(kmp_4): 1223 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1224KMP_LABEL(kmp_4_exit): 1225 1226 cmpq $3, %rax // argc >= 3? 1227 jns KMP_LABEL(kmp_3) // jump to movq 1228 jmp KMP_LABEL(kmp_3_exit) // jump ahead 1229KMP_LABEL(kmp_3): 1230 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1231KMP_LABEL(kmp_3_exit): 1232 1233 cmpq $2, %rax // argc >= 2? 1234 jns KMP_LABEL(kmp_2) // jump to movq 1235 jmp KMP_LABEL(kmp_2_exit) // jump ahead 1236KMP_LABEL(kmp_2): 1237 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1238KMP_LABEL(kmp_2_exit): 1239 1240 cmpq $1, %rax // argc >= 1? 1241 jns KMP_LABEL(kmp_1) // jump to movq 1242 jmp KMP_LABEL(kmp_1_exit) // jump ahead 1243KMP_LABEL(kmp_1): 1244 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1245KMP_LABEL(kmp_1_exit): 1246#else 1247 cmpq $4, %rax // argc >= 4? 1248 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1249 1250 cmpq $3, %rax // argc >= 3? 1251 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1252 1253 cmpq $2, %rax // argc >= 2? 1254 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1255 1256 cmpq $1, %rax // argc >= 1? 1257 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1258#endif // KMP_MIC 1259 1260KMP_LABEL(kmp_no_args): 1261 call *%rbx // call (*pkfn)(); 1262 movq $1, %rax // move 1 into return register; 1263 1264 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified 1265 movq %rbp, %rsp // restore stack pointer 1266 popq %rbp // restore frame pointer 1267 KMP_CFI_DEF rsp,8 1268 ret 1269 1270 DEBUG_INFO __kmp_invoke_microtask 1271// -- End __kmp_invoke_microtask 1272 1273// kmp_uint64 1274// __kmp_hardware_timestamp(void) 1275 .text 1276 PROC __kmp_hardware_timestamp 1277 rdtsc 1278 shlq $32, %rdx 1279 orq %rdx, %rax 1280 ret 1281 1282 DEBUG_INFO __kmp_hardware_timestamp 1283// -- End __kmp_hardware_timestamp 1284 1285//------------------------------------------------------------------------ 1286// FUNCTION __kmp_bsr32 1287// 1288// int 1289// __kmp_bsr32( int ); 1290 .text 1291 PROC __kmp_bsr32 1292 1293 bsr %edi,%eax 1294 ret 1295 1296 DEBUG_INFO __kmp_bsr32 1297 1298// ----------------------------------------------------------------------- 1299#endif /* KMP_ARCH_X86_64 */ 1300 1301// ' 1302#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) 1303 1304//------------------------------------------------------------------------ 1305// int 1306// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1307// int gtid, int tid, 1308// int argc, void *p_argv[] 1309// #if OMPT_SUPPORT 1310// , 1311// void **exit_frame_ptr 1312// #endif 1313// ) { 1314// #if OMPT_SUPPORT 1315// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1316// #endif 1317// 1318// (*pkfn)( & gtid, & tid, argv[0], ... ); 1319// 1320// // FIXME: This is done at call-site and can be removed here. 1321// #if OMPT_SUPPORT 1322// *exit_frame_ptr = 0; 1323// #endif 1324// 1325// return 1; 1326// } 1327// 1328// parameters: 1329// x0: pkfn 1330// w1: gtid 1331// w2: tid 1332// w3: argc 1333// x4: p_argv 1334// x5: &exit_frame 1335// 1336// locals: 1337// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1338// __tid: tid parm pushed on stack so can pass &tid to pkfn 1339// 1340// reg temps: 1341// x8: used to hold pkfn address 1342// w9: used as temporary for number of pkfn parms 1343// x10: used to traverse p_argv array 1344// x11: used as temporary for stack placement calculation 1345// x12: used as temporary for stack parameters 1346// x19: used to preserve exit_frame_ptr, callee-save 1347// 1348// return: w0 (always 1/TRUE) 1349// 1350 1351__gtid = 4 1352__tid = 8 1353 1354// -- Begin __kmp_invoke_microtask 1355// mark_begin; 1356 .text 1357 PROC __kmp_invoke_microtask 1358 PACBTI_C 1359 1360 stp x29, x30, [sp, #-16]! 1361# if OMPT_SUPPORT 1362 stp x19, x20, [sp, #-16]! 1363# endif 1364 mov x29, sp 1365 1366 orr w9, wzr, #1 1367 add w9, w9, w3, lsr #1 1368 sub sp, sp, w9, uxtw #4 1369 mov x11, sp 1370 1371 mov x8, x0 1372 str w1, [x29, #-__gtid] 1373 str w2, [x29, #-__tid] 1374 mov w9, w3 1375 mov x10, x4 1376# if OMPT_SUPPORT 1377 mov x19, x5 1378 str x29, [x19] 1379# endif 1380 1381 sub x0, x29, #__gtid 1382 sub x1, x29, #__tid 1383 1384 cbz w9, KMP_LABEL(kmp_1) 1385 ldr x2, [x10] 1386 1387 sub w9, w9, #1 1388 cbz w9, KMP_LABEL(kmp_1) 1389 ldr x3, [x10, #8]! 1390 1391 sub w9, w9, #1 1392 cbz w9, KMP_LABEL(kmp_1) 1393 ldr x4, [x10, #8]! 1394 1395 sub w9, w9, #1 1396 cbz w9, KMP_LABEL(kmp_1) 1397 ldr x5, [x10, #8]! 1398 1399 sub w9, w9, #1 1400 cbz w9, KMP_LABEL(kmp_1) 1401 ldr x6, [x10, #8]! 1402 1403 sub w9, w9, #1 1404 cbz w9, KMP_LABEL(kmp_1) 1405 ldr x7, [x10, #8]! 1406 1407KMP_LABEL(kmp_0): 1408 sub w9, w9, #1 1409 cbz w9, KMP_LABEL(kmp_1) 1410 ldr x12, [x10, #8]! 1411 str x12, [x11], #8 1412 b KMP_LABEL(kmp_0) 1413KMP_LABEL(kmp_1): 1414 blr x8 1415 orr w0, wzr, #1 1416 mov sp, x29 1417# if OMPT_SUPPORT 1418 str xzr, [x19] 1419 ldp x19, x20, [sp], #16 1420# endif 1421 ldp x29, x30, [sp], #16 1422 PACBTI_RET 1423 ret 1424 1425 DEBUG_INFO __kmp_invoke_microtask 1426// -- End __kmp_invoke_microtask 1427 1428#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */ 1429 1430#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM 1431 1432//------------------------------------------------------------------------ 1433// int 1434// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1435// int gtid, int tid, 1436// int argc, void *p_argv[] 1437// #if OMPT_SUPPORT 1438// , 1439// void **exit_frame_ptr 1440// #endif 1441// ) { 1442// #if OMPT_SUPPORT 1443// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1444// #endif 1445// 1446// (*pkfn)( & gtid, & tid, argv[0], ... ); 1447// 1448// // FIXME: This is done at call-site and can be removed here. 1449// #if OMPT_SUPPORT 1450// *exit_frame_ptr = 0; 1451// #endif 1452// 1453// return 1; 1454// } 1455// 1456// parameters: 1457// r0: pkfn 1458// r1: gtid 1459// r2: tid 1460// r3: argc 1461// r4(stack): p_argv 1462// r5(stack): &exit_frame 1463// 1464// locals: 1465// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1466// __tid: tid parm pushed on stack so can pass &tid to pkfn 1467// 1468// reg temps: 1469// r4: used to hold pkfn address 1470// r5: used as temporary for number of pkfn parms 1471// r6: used to traverse p_argv array 1472// r7: frame pointer (in some configurations) 1473// r8: used as temporary for stack placement calculation 1474// and as pointer to base of callee saved area 1475// r9: used as temporary for stack parameters 1476// r10: used to preserve exit_frame_ptr, callee-save 1477// r11: frame pointer (in some configurations) 1478// 1479// return: r0 (always 1/TRUE) 1480// 1481 1482__gtid = 4 1483__tid = 8 1484 1485// -- Begin __kmp_invoke_microtask 1486// mark_begin; 1487 .text 1488 PROC __kmp_invoke_microtask 1489 1490 // Pushing one extra register (r3) to keep the stack aligned 1491 // for when we call pkfn below 1492 push {r3-r11,lr} 1493 // Load p_argv and &exit_frame 1494 ldr r4, [sp, #10*4] 1495# if OMPT_SUPPORT 1496 ldr r5, [sp, #11*4] 1497# endif 1498 1499# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS) 1500# define FP r7 1501# define FPOFF 4*4 1502#else 1503# define FP r11 1504# define FPOFF 8*4 1505#endif 1506 add FP, sp, #FPOFF 1507# if OMPT_SUPPORT 1508 mov r10, r5 1509 str FP, [r10] 1510# endif 1511 mov r8, sp 1512 1513 // Calculate how much stack to allocate, in increments of 8 bytes. 1514 // We strictly need 4*(argc-2) bytes (2 arguments are passed in 1515 // registers) but allocate 4*argc for simplicity (to avoid needing 1516 // to handle the argc<2 cases). We align the number of bytes 1517 // allocated to 8 bytes, to keep the stack aligned. (Since we 1518 // already allocate more than enough, it's ok to round down 1519 // instead of up for the alignment.) We allocate another extra 1520 // 8 bytes for gtid and tid. 1521 mov r5, #1 1522 add r5, r5, r3, lsr #1 1523 sub sp, sp, r5, lsl #3 1524 1525 str r1, [r8, #-__gtid] 1526 str r2, [r8, #-__tid] 1527 mov r5, r3 1528 mov r6, r4 1529 mov r4, r0 1530 1531 // Prepare the first 2 parameters to pkfn - pointers to gtid and tid 1532 // in our stack frame. 1533 sub r0, r8, #__gtid 1534 sub r1, r8, #__tid 1535 1536 mov r8, sp 1537 1538 // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2 1539 cmp r5, #0 1540 beq KMP_LABEL(kmp_1) 1541 ldr r2, [r6] 1542 1543 subs r5, r5, #1 1544 beq KMP_LABEL(kmp_1) 1545 ldr r3, [r6, #4]! 1546 1547 // Loop, loading the rest of p_argv and writing the elements on the 1548 // stack. 1549KMP_LABEL(kmp_0): 1550 subs r5, r5, #1 1551 beq KMP_LABEL(kmp_1) 1552 ldr r12, [r6, #4]! 1553 str r12, [r8], #4 1554 b KMP_LABEL(kmp_0) 1555KMP_LABEL(kmp_1): 1556 blx r4 1557 mov r0, #1 1558 1559 sub r4, FP, #FPOFF 1560 mov sp, r4 1561# undef FP 1562# undef FPOFF 1563 1564# if OMPT_SUPPORT 1565 mov r1, #0 1566 str r1, [r10] 1567# endif 1568 pop {r3-r11,pc} 1569 1570 DEBUG_INFO __kmp_invoke_microtask 1571// -- End __kmp_invoke_microtask 1572 1573#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */ 1574 1575#if KMP_ARCH_PPC64 1576 1577//------------------------------------------------------------------------ 1578// int 1579// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1580// int gtid, int tid, 1581// int argc, void *p_argv[] 1582// #if OMPT_SUPPORT 1583// , 1584// void **exit_frame_ptr 1585// #endif 1586// ) { 1587// #if OMPT_SUPPORT 1588// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1589// #endif 1590// 1591// (*pkfn)( & gtid, & tid, argv[0], ... ); 1592// 1593// // FIXME: This is done at call-site and can be removed here. 1594// #if OMPT_SUPPORT 1595// *exit_frame_ptr = 0; 1596// #endif 1597// 1598// return 1; 1599// } 1600// 1601// parameters: 1602// r3: pkfn 1603// r4: gtid 1604// r5: tid 1605// r6: argc 1606// r7: p_argv 1607// r8: &exit_frame 1608// 1609// return: r3 (always 1/TRUE) 1610// 1611 .text 1612# if KMP_ARCH_PPC64_ELFv2 1613 .abiversion 2 1614# endif 1615 .globl __kmp_invoke_microtask 1616 1617# if KMP_ARCH_PPC64_ELFv2 1618 .p2align 4 1619# else 1620 .p2align 2 1621# endif 1622 1623 .type __kmp_invoke_microtask,@function 1624 1625# if KMP_ARCH_PPC64_ELFv2 1626__kmp_invoke_microtask: 1627.Lfunc_begin0: 1628.Lfunc_gep0: 1629 addis 2, 12, .TOC.-.Lfunc_gep0@ha 1630 addi 2, 2, .TOC.-.Lfunc_gep0@l 1631.Lfunc_lep0: 1632 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 1633# else 1634 .section .opd,"aw",@progbits 1635__kmp_invoke_microtask: 1636 .p2align 3 1637 .quad .Lfunc_begin0 1638 .quad .TOC.@tocbase 1639 .quad 0 1640 .text 1641.Lfunc_begin0: 1642# endif 1643 1644// -- Begin __kmp_invoke_microtask 1645// mark_begin; 1646 1647// We need to allocate a stack frame large enough to hold all of the parameters 1648// on the stack for the microtask plus what this function needs. That's 48 1649// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the 1650// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, 1651// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes 1652// to save r30 to hold a copy of r8. 1653 1654 .cfi_startproc 1655 mflr 0 1656 std 31, -8(1) 1657 std 0, 16(1) 1658 1659// This is unusual because normally we'd set r31 equal to r1 after the stack 1660// frame is established. In this case, however, we need to dynamically compute 1661// the stack frame size, and so we keep a direct copy of r1 to access our 1662// register save areas and restore the r1 value before returning. 1663 mr 31, 1 1664 .cfi_def_cfa_register r31 1665 .cfi_offset r31, -8 1666 .cfi_offset lr, 16 1667 1668// Compute the size necessary for the local stack frame. 1669# if KMP_ARCH_PPC64_ELFv2 1670 li 12, 72 1671# else 1672 li 12, 88 1673# endif 1674 sldi 0, 6, 3 1675 add 12, 0, 12 1676 neg 12, 12 1677 1678// We need to make sure that the stack frame stays aligned (to 16 bytes). 1679 li 0, -16 1680 and 12, 0, 12 1681 1682// Establish the local stack frame. 1683 stdux 1, 1, 12 1684 1685# if OMPT_SUPPORT 1686 .cfi_offset r30, -16 1687 std 30, -16(31) 1688 std 1, 0(8) 1689 mr 30, 8 1690# endif 1691 1692// Store gtid and tid to the stack because they're passed by reference to the microtask. 1693 stw 4, -20(31) 1694 stw 5, -24(31) 1695 1696 mr 12, 6 1697 mr 4, 7 1698 1699 cmpwi 0, 12, 1 1700 blt 0, .Lcall 1701 1702 ld 5, 0(4) 1703 1704 cmpwi 0, 12, 2 1705 blt 0, .Lcall 1706 1707 ld 6, 8(4) 1708 1709 cmpwi 0, 12, 3 1710 blt 0, .Lcall 1711 1712 ld 7, 16(4) 1713 1714 cmpwi 0, 12, 4 1715 blt 0, .Lcall 1716 1717 ld 8, 24(4) 1718 1719 cmpwi 0, 12, 5 1720 blt 0, .Lcall 1721 1722 ld 9, 32(4) 1723 1724 cmpwi 0, 12, 6 1725 blt 0, .Lcall 1726 1727 ld 10, 40(4) 1728 1729 cmpwi 0, 12, 7 1730 blt 0, .Lcall 1731 1732// There are more than 6 microtask parameters, so we need to store the 1733// remainder to the stack. 1734 addi 12, 12, -6 1735 mtctr 12 1736 1737// These are set to 8 bytes before the first desired store address (we're using 1738// pre-increment loads and stores in the loop below). The parameter save area 1739// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and 1740// 32 + 8*8 == 96 bytes above r1 for ELFv2. 1741 addi 4, 4, 40 1742# if KMP_ARCH_PPC64_ELFv2 1743 addi 12, 1, 88 1744# else 1745 addi 12, 1, 104 1746# endif 1747 1748.Lnext: 1749 ldu 0, 8(4) 1750 stdu 0, 8(12) 1751 bdnz .Lnext 1752 1753.Lcall: 1754# if KMP_ARCH_PPC64_ELFv2 1755 std 2, 24(1) 1756 mr 12, 3 1757#else 1758 std 2, 40(1) 1759// For ELFv1, we need to load the actual function address from the function descriptor. 1760 ld 12, 0(3) 1761 ld 2, 8(3) 1762 ld 11, 16(3) 1763#endif 1764 1765 addi 3, 31, -20 1766 addi 4, 31, -24 1767 1768 mtctr 12 1769 bctrl 1770# if KMP_ARCH_PPC64_ELFv2 1771 ld 2, 24(1) 1772# else 1773 ld 2, 40(1) 1774# endif 1775 1776# if OMPT_SUPPORT 1777 li 3, 0 1778 std 3, 0(30) 1779# endif 1780 1781 li 3, 1 1782 1783# if OMPT_SUPPORT 1784 ld 30, -16(31) 1785# endif 1786 1787 mr 1, 31 1788 ld 0, 16(1) 1789 ld 31, -8(1) 1790 mtlr 0 1791 blr 1792 1793 .long 0 1794 .quad 0 1795.Lfunc_end0: 1796 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 1797 .cfi_endproc 1798 1799// -- End __kmp_invoke_microtask 1800 1801#endif /* KMP_ARCH_PPC64 */ 1802 1803#if KMP_ARCH_RISCV64 1804 1805//------------------------------------------------------------------------ 1806// 1807// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1808// 1809// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1810// void *p_argv[] 1811// #if OMPT_SUPPORT 1812// , 1813// void **exit_frame_ptr 1814// #endif 1815// ) { 1816// #if OMPT_SUPPORT 1817// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1818// #endif 1819// 1820// (*pkfn)(>id, &tid, argv[0], ...); 1821// 1822// return 1; 1823// } 1824// 1825// Parameters: 1826// a0: pkfn 1827// a1: gtid 1828// a2: tid 1829// a3: argc 1830// a4: p_argv 1831// a5: exit_frame_ptr 1832// 1833// Locals: 1834// __gtid: gtid param pushed on stack so can pass >id to pkfn 1835// __tid: tid param pushed on stack so can pass &tid to pkfn 1836// 1837// Temp. registers: 1838// 1839// t0: used to calculate the dynamic stack size / used to hold pkfn address 1840// t1: used as temporary for stack placement calculation 1841// t2: used as temporary for stack arguments 1842// t3: used as temporary for number of remaining pkfn parms 1843// t4: used to traverse p_argv array 1844// 1845// return: a0 (always 1/TRUE) 1846// 1847 1848__gtid = -20 1849__tid = -24 1850 1851// -- Begin __kmp_invoke_microtask 1852// mark_begin; 1853 .text 1854 .globl __kmp_invoke_microtask 1855 .p2align 1 1856 .type __kmp_invoke_microtask,@function 1857__kmp_invoke_microtask: 1858 .cfi_startproc 1859 1860 // First, save ra and fp 1861 addi sp, sp, -16 1862 sd ra, 8(sp) 1863 sd fp, 0(sp) 1864 addi fp, sp, 16 1865 .cfi_def_cfa fp, 0 1866 .cfi_offset ra, -8 1867 .cfi_offset fp, -16 1868 1869 // Compute the dynamic stack size: 1870 // 1871 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 1872 // reference 1873 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 1874 // function by register. Given that we have 8 of such registers (a[0-7]) 1875 // and two + 'argc' arguments (consider >id and &tid), we need to 1876 // reserve max(0, argc - 6)*8 extra bytes 1877 // 1878 // The total number of bytes is then max(0, argc - 6)*8 + 8 1879 1880 // Compute max(0, argc - 6) using the following bithack: 1881 // max(0, x) = x - (x & (x >> 31)), where x := argc - 6 1882 // Source: http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax 1883 addi t0, a3, -6 1884 srai t1, t0, 31 1885 and t1, t0, t1 1886 sub t0, t0, t1 1887 1888 addi t0, t0, 1 1889 1890 slli t0, t0, 3 1891 sub sp, sp, t0 1892 1893 // Align the stack to 16 bytes 1894 andi sp, sp, -16 1895 1896 mv t0, a0 1897 mv t3, a3 1898 mv t4, a4 1899 1900#if OMPT_SUPPORT 1901 // Save frame pointer into exit_frame 1902 sd fp, 0(a5) 1903#endif 1904 1905 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 1906 1907 sw a1, __gtid(fp) 1908 sw a2, __tid(fp) 1909 1910 addi a0, fp, __gtid 1911 addi a1, fp, __tid 1912 1913 beqz t3, .L_kmp_3 1914 ld a2, 0(t4) 1915 1916 addi t3, t3, -1 1917 beqz t3, .L_kmp_3 1918 ld a3, 8(t4) 1919 1920 addi t3, t3, -1 1921 beqz t3, .L_kmp_3 1922 ld a4, 16(t4) 1923 1924 addi t3, t3, -1 1925 beqz t3, .L_kmp_3 1926 ld a5, 24(t4) 1927 1928 addi t3, t3, -1 1929 beqz t3, .L_kmp_3 1930 ld a6, 32(t4) 1931 1932 addi t3, t3, -1 1933 beqz t3, .L_kmp_3 1934 ld a7, 40(t4) 1935 1936 // Prepare any additional argument passed through the stack 1937 addi t4, t4, 48 1938 mv t1, sp 1939 j .L_kmp_2 1940.L_kmp_1: 1941 ld t2, 0(t4) 1942 sd t2, 0(t1) 1943 addi t4, t4, 8 1944 addi t1, t1, 8 1945.L_kmp_2: 1946 addi t3, t3, -1 1947 bnez t3, .L_kmp_1 1948 1949.L_kmp_3: 1950 // Call pkfn function 1951 jalr t0 1952 1953 // Restore stack and return 1954 1955 addi a0, zero, 1 1956 1957 addi sp, fp, -16 1958 ld fp, 0(sp) 1959 ld ra, 8(sp) 1960 addi sp, sp, 16 1961 ret 1962.Lfunc_end0: 1963 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 1964 .cfi_endproc 1965 1966// -- End __kmp_invoke_microtask 1967 1968#endif /* KMP_ARCH_RISCV64 */ 1969 1970#if KMP_ARCH_LOONGARCH64 1971 1972//------------------------------------------------------------------------ 1973// 1974// typedef void (*microtask_t)(int *gtid, int *tid, ...); 1975// 1976// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 1977// void *p_argv[] 1978// #if OMPT_SUPPORT 1979// , 1980// void **exit_frame_ptr 1981// #endif 1982// ) { 1983// #if OMPT_SUPPORT 1984// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 1985// #endif 1986// 1987// (*pkfn)(>id, &tid, argv[0], ...); 1988// 1989// return 1; 1990// } 1991// 1992// Parameters: 1993// a0: pkfn 1994// a1: gtid 1995// a2: tid 1996// a3: argc 1997// a4: p_argv 1998// a5: exit_frame_ptr 1999// 2000// Locals: 2001// __gtid: gtid param pushed on stack so can pass >id to pkfn 2002// __tid: tid param pushed on stack so can pass &tid to pkfn 2003// 2004// Temp registers: 2005// 2006// t0: used to calculate the dynamic stack size / used to hold pkfn address 2007// t1: used as temporary for stack placement calculation 2008// t2: used as temporary for stack arguments 2009// t3: used as temporary for number of remaining pkfn parms 2010// t4: used to traverse p_argv array 2011// 2012// return: a0 (always 1/TRUE) 2013// 2014 2015// -- Begin __kmp_invoke_microtask 2016// mark_begin; 2017 .text 2018 .globl __kmp_invoke_microtask 2019 .p2align 2 2020 .type __kmp_invoke_microtask,@function 2021__kmp_invoke_microtask: 2022 .cfi_startproc 2023 2024 // First, save ra and fp 2025 addi.d $sp, $sp, -16 2026 st.d $ra, $sp, 8 2027 st.d $fp, $sp, 0 2028 addi.d $fp, $sp, 16 2029 .cfi_def_cfa 22, 0 2030 .cfi_offset 1, -8 2031 .cfi_offset 22, -16 2032 2033 // Compute the dynamic stack size: 2034 // 2035 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 2036 // reference 2037 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 2038 // function by register. Given that we have 8 of such registers (a[0-7]) 2039 // and two + 'argc' arguments (consider >id and &tid), we need to 2040 // reserve max(0, argc - 6)*8 extra bytes 2041 // 2042 // The total number of bytes is then max(0, argc - 6)*8 + 8 2043 2044 addi.d $t0, $a3, -6 2045 slt $t1, $t0, $zero 2046 masknez $t0, $t0, $t1 2047 addi.d $t0, $t0, 1 2048 slli.d $t0, $t0, 3 2049 sub.d $sp, $sp, $t0 2050 2051 // Align the stack to 16 bytes 2052 bstrins.d $sp, $zero, 3, 0 2053 2054 move $t0, $a0 2055 move $t3, $a3 2056 move $t4, $a4 2057 2058#if OMPT_SUPPORT 2059 // Save frame pointer into exit_frame 2060 st.d $fp, $a5, 0 2061#endif 2062 2063 // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) 2064 2065 st.w $a1, $fp, -20 2066 st.w $a2, $fp, -24 2067 2068 addi.d $a0, $fp, -20 2069 addi.d $a1, $fp, -24 2070 2071 beqz $t3, .L_kmp_3 2072 ld.d $a2, $t4, 0 2073 2074 addi.d $t3, $t3, -1 2075 beqz $t3, .L_kmp_3 2076 ld.d $a3, $t4, 8 2077 2078 addi.d $t3, $t3, -1 2079 beqz $t3, .L_kmp_3 2080 ld.d $a4, $t4, 16 2081 2082 addi.d $t3, $t3, -1 2083 beqz $t3, .L_kmp_3 2084 ld.d $a5, $t4, 24 2085 2086 addi.d $t3, $t3, -1 2087 beqz $t3, .L_kmp_3 2088 ld.d $a6, $t4, 32 2089 2090 addi.d $t3, $t3, -1 2091 beqz $t3, .L_kmp_3 2092 ld.d $a7, $t4, 40 2093 2094 // Prepare any additional argument passed through the stack 2095 addi.d $t4, $t4, 48 2096 move $t1, $sp 2097 b .L_kmp_2 2098.L_kmp_1: 2099 ld.d $t2, $t4, 0 2100 st.d $t2, $t1, 0 2101 addi.d $t4, $t4, 8 2102 addi.d $t1, $t1, 8 2103.L_kmp_2: 2104 addi.d $t3, $t3, -1 2105 bnez $t3, .L_kmp_1 2106 2107.L_kmp_3: 2108 // Call pkfn function 2109 jirl $ra, $t0, 0 2110 2111 // Restore stack and return 2112 2113 addi.d $a0, $zero, 1 2114 2115 addi.d $sp, $fp, -16 2116 ld.d $fp, $sp, 0 2117 ld.d $ra, $sp, 8 2118 addi.d $sp, $sp, 16 2119 jr $ra 2120.Lfunc_end0: 2121 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2122 .cfi_endproc 2123 2124// -- End __kmp_invoke_microtask 2125 2126#endif /* KMP_ARCH_LOONGARCH64 */ 2127 2128#if KMP_ARCH_VE 2129 2130//------------------------------------------------------------------------ 2131// 2132// typedef void (*microtask_t)(int *gtid, int *tid, ...); 2133// 2134// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 2135// void *p_argv[] 2136// #if OMPT_SUPPORT 2137// , 2138// void **exit_frame_ptr 2139// #endif 2140// ) { 2141// #if OMPT_SUPPORT 2142// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 2143// #endif 2144// 2145// (*pkfn)(>id, &tid, argv[0], ...); 2146// 2147// return 1; 2148// } 2149// 2150// Parameters: 2151// s0: pkfn 2152// s1: gtid 2153// s2: tid 2154// s3: argc 2155// s4: p_argv 2156// s5: exit_frame_ptr 2157// 2158// Locals: 2159// __gtid: gtid param pushed on stack so can pass >id to pkfn 2160// __tid: tid param pushed on stack so can pass &tid to pkfn 2161// 2162// Temp. registers: 2163// 2164// s34: used to calculate the dynamic stack size 2165// s35: used as temporary for stack placement calculation 2166// s36: used as temporary for stack arguments 2167// s37: used as temporary for number of remaining pkfn parms 2168// s38: used to traverse p_argv array 2169// 2170// return: s0 (always 1/TRUE) 2171// 2172 2173__gtid = -4 2174__tid = -8 2175 2176// -- Begin __kmp_invoke_microtask 2177// mark_begin; 2178 .text 2179 .globl __kmp_invoke_microtask 2180 // A function requires 8 bytes align. 2181 .p2align 3 2182 .type __kmp_invoke_microtask,@function 2183__kmp_invoke_microtask: 2184 .cfi_startproc 2185 2186 // First, save fp and lr. VE stores them at caller stack frame. 2187 st %fp, 0(, %sp) 2188 st %lr, 8(, %sp) 2189 or %fp, 0, %sp 2190 .cfi_def_cfa %fp, 0 2191 .cfi_offset %lr, 8 2192 .cfi_offset %fp, 0 2193 2194 // Compute the dynamic stack size: 2195 // 2196 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them 2197 // by reference 2198 // - We need 8 bytes for whole arguments. We have two + 'argc' 2199 // arguments (condider >id and &tid). We need to reserve 2200 // (argc + 2) * 8 bytes. 2201 // - We need 176 bytes for RSA and others 2202 // 2203 // The total number of bytes is then (argc + 2) * 8 + 8 + 176. 2204 // 2205 // |------------------------------| 2206 // | return address of callee | 8(%fp) 2207 // |------------------------------| 2208 // | frame pointer of callee | 0(%fp) 2209 // |------------------------------| <------------------ %fp 2210 // | __tid / __gtid | -8(%fp) / -4(%fp) 2211 // |------------------------------| 2212 // | argc+2 for arguments | 176(%sp) 2213 // |------------------------------| 2214 // | RSA | 2215 // |------------------------------| 2216 // | return address | 2217 // |------------------------------| 2218 // | frame pointer | 2219 // |------------------------------| <------------------ %sp 2220 2221 adds.w.sx %s34, 2, %s3 2222 sll %s34, %s34, 3 2223 lea %s34, 184(, %s34) 2224 subs.l %sp, %sp, %s34 2225 2226 // Align the stack to 16 bytes. 2227 and %sp, -16, %sp 2228 2229 // Save pkfn. 2230 or %s12, 0, %s0 2231 2232 // Call host to allocate stack if it is necessary. 2233 brge.l %sp, %sl, .L_kmp_pass 2234 ld %s61, 24(, %tp) 2235 lea %s63, 0x13b 2236 shm.l %s63, 0(%s61) 2237 shm.l %sl, 8(%s61) 2238 shm.l %sp, 16(%s61) 2239 monc 2240 2241.L_kmp_pass: 2242 lea %s35, 176(, %sp) 2243 adds.w.sx %s37, 0, %s3 2244 or %s38, 0, %s4 2245 2246#if OMPT_SUPPORT 2247 // Save frame pointer into exit_frame. 2248 st %fp, 0(%s5) 2249#endif 2250 2251 // Prepare arguments for the pkfn function (first 8 using s0-s7 2252 // registers, but need to store stack also because of varargs). 2253 2254 stl %s1, __gtid(%fp) 2255 stl %s2, __tid(%fp) 2256 2257 adds.l %s0, __gtid, %fp 2258 st %s0, 0(, %s35) 2259 adds.l %s1, __tid, %fp 2260 st %s1, 8(, %s35) 2261 2262 breq.l 0, %s37, .L_kmp_call 2263 ld %s2, 0(, %s38) 2264 st %s2, 16(, %s35) 2265 2266 breq.l 1, %s37, .L_kmp_call 2267 ld %s3, 8(, %s38) 2268 st %s3, 24(, %s35) 2269 2270 breq.l 2, %s37, .L_kmp_call 2271 ld %s4, 16(, %s38) 2272 st %s4, 32(, %s35) 2273 2274 breq.l 3, %s37, .L_kmp_call 2275 ld %s5, 24(, %s38) 2276 st %s5, 40(, %s35) 2277 2278 breq.l 4, %s37, .L_kmp_call 2279 ld %s6, 32(, %s38) 2280 st %s6, 48(, %s35) 2281 2282 breq.l 5, %s37, .L_kmp_call 2283 ld %s7, 40(, %s38) 2284 st %s7, 56(, %s35) 2285 2286 breq.l 6, %s37, .L_kmp_call 2287 2288 // Prepare any additional argument passed through the stack. 2289 adds.l %s37, -6, %s37 2290 lea %s38, 48(, %s38) 2291 lea %s35, 64(, %s35) 2292.L_kmp_loop: 2293 ld %s36, 0(, %s38) 2294 st %s36, 0(, %s35) 2295 adds.l %s37, -1, %s37 2296 adds.l %s38, 8, %s38 2297 adds.l %s35, 8, %s35 2298 brne.l 0, %s37, .L_kmp_loop 2299 2300.L_kmp_call: 2301 // Call pkfn function. 2302 bsic %lr, (, %s12) 2303 2304 // Return value. 2305 lea %s0, 1 2306 2307 // Restore stack and return. 2308 or %sp, 0, %fp 2309 ld %lr, 8(, %sp) 2310 ld %fp, 0(, %sp) 2311 b.l.t (, %lr) 2312.Lfunc_end0: 2313 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2314 .cfi_endproc 2315 2316// -- End __kmp_invoke_microtask 2317 2318#endif /* KMP_ARCH_VE */ 2319 2320#if KMP_ARCH_S390X 2321 2322//------------------------------------------------------------------------ 2323// 2324// typedef void (*microtask_t)(int *gtid, int *tid, ...); 2325// 2326// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, 2327// void *p_argv[] 2328// #if OMPT_SUPPORT 2329// , 2330// void **exit_frame_ptr 2331// #endif 2332// ) { 2333// #if OMPT_SUPPORT 2334// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); 2335// #endif 2336// 2337// (*pkfn)(>id, &tid, argv[0], ...); 2338// 2339// return 1; 2340// } 2341// 2342// Parameters: 2343// r2: pkfn 2344// r3: gtid 2345// r4: tid 2346// r5: argc 2347// r6: p_argv 2348// SP+160: exit_frame_ptr 2349// 2350// Locals: 2351// __gtid: gtid param pushed on stack so can pass >id to pkfn 2352// __tid: tid param pushed on stack so can pass &tid to pkfn 2353// 2354// Temp. registers: 2355// 2356// r0: used to fetch argv slots 2357// r7: used as temporary for number of remaining pkfn parms 2358// r8: argv 2359// r9: pkfn 2360// r10: stack size 2361// r11: previous fp 2362// r12: stack parameter area 2363// r13: argv slot 2364// 2365// return: r2 (always 1/TRUE) 2366// 2367 2368// -- Begin __kmp_invoke_microtask 2369// mark_begin; 2370 .text 2371 .globl __kmp_invoke_microtask 2372 .p2align 1 2373 .type __kmp_invoke_microtask,@function 2374__kmp_invoke_microtask: 2375 .cfi_startproc 2376 2377 stmg %r6,%r14,48(%r15) 2378 .cfi_offset %r6, -112 2379 .cfi_offset %r7, -104 2380 .cfi_offset %r8, -96 2381 .cfi_offset %r9, -88 2382 .cfi_offset %r10, -80 2383 .cfi_offset %r11, -72 2384 .cfi_offset %r12, -64 2385 .cfi_offset %r13, -56 2386 .cfi_offset %r14, -48 2387 .cfi_offset %r15, -40 2388 lgr %r11,%r15 2389 .cfi_def_cfa %r11, 160 2390 2391 // Compute the dynamic stack size: 2392 // 2393 // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by 2394 // reference 2395 // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' 2396 // function by register. Given that we have 5 of such registers (r[2-6]) 2397 // and two + 'argc' arguments (consider >id and &tid), we need to 2398 // reserve max(0, argc - 3)*8 extra bytes 2399 // 2400 // The total number of bytes is then max(0, argc - 3)*8 + 8 2401 2402 lgr %r10,%r5 2403 aghi %r10,-2 2404 jnm 0f 2405 lghi %r10,0 24060: 2407 sllg %r10,%r10,3 2408 lgr %r12,%r10 2409 aghi %r10,176 2410 sgr %r15,%r10 2411 agr %r12,%r15 2412 stg %r11,0(%r15) 2413 2414 lgr %r9,%r2 // pkfn 2415 2416#if OMPT_SUPPORT 2417 // Save frame pointer into exit_frame 2418 lg %r8,160(%r11) 2419 stg %r11,0(%r8) 2420#endif 2421 2422 // Prepare arguments for the pkfn function (first 5 using r2-r6 registers) 2423 2424 stg %r3,160(%r12) 2425 la %r2,164(%r12) // gid 2426 stg %r4,168(%r12) 2427 la %r3,172(%r12) // tid 2428 lgr %r8,%r6 // argv 2429 2430 // If argc > 0 2431 ltgr %r7,%r5 2432 jz 1f 2433 2434 lg %r4,0(%r8) // argv[0] 2435 aghi %r7,-1 2436 jz 1f 2437 2438 // If argc > 1 2439 lg %r5,8(%r8) // argv[1] 2440 aghi %r7,-1 2441 jz 1f 2442 2443 // If argc > 2 2444 lg %r6,16(%r8) // argv[2] 2445 aghi %r7,-1 2446 jz 1f 2447 2448 lghi %r13,0 // Index [n] 24492: 2450 lg %r0,24(%r13,%r8) // argv[2+n] 2451 stg %r0,160(%r13,%r15) // parm[2+n] 2452 aghi %r13,8 // Next 2453 aghi %r7,-1 2454 jnz 2b 2455 24561: 2457 basr %r14,%r9 // Call pkfn 2458 2459 // Restore stack and return 2460 2461 lgr %r15,%r11 2462 lmg %r6,%r14,48(%r15) 2463 lghi %r2,1 2464 br %r14 2465.Lfunc_end0: 2466 .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask 2467 .cfi_endproc 2468 2469// -- End __kmp_invoke_microtask 2470 2471#endif /* KMP_ARCH_S390X */ 2472 2473#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 2474#ifndef KMP_PREFIX_UNDERSCORE 2475# define KMP_PREFIX_UNDERSCORE(x) x 2476#endif 2477 .data 2478 COMMON .gomp_critical_user_, 32, 3 2479 .data 2480 .align 4 2481 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 2482KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 2483 .4byte .gomp_critical_user_ 2484#ifdef __ELF__ 2485 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4 2486#endif 2487#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */ 2488 2489#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ 2490 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \ 2491 KMP_ARCH_S390X 2492#ifndef KMP_PREFIX_UNDERSCORE 2493# define KMP_PREFIX_UNDERSCORE(x) x 2494#endif 2495 .data 2496 COMMON .gomp_critical_user_, 32, 3 2497 .data 2498 .align 8 2499 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) 2500KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): 2501 .8byte .gomp_critical_user_ 2502#ifdef __ELF__ 2503 .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 2504#endif 2505#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || 2506 KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || 2507 KMP_ARCH_S390X */ 2508 2509#if KMP_OS_LINUX 2510# if KMP_ARCH_ARM || KMP_ARCH_AARCH64 2511.section .note.GNU-stack,"",%progbits 2512# elif !KMP_ARCH_WASM 2513.section .note.GNU-stack,"",@progbits 2514# endif 2515#endif 2516 2517#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) 2518GNU_PROPERTY_BTI_PAC 2519#endif 2520