xref: /llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll (revision f2eeb3dc7b438e4216ac6b970129b607d6de31f9)
126dc2844SDiana Picus; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
27f5d59b3SDiana; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s
37f5d59b3SDiana; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s
47f5d59b3SDiana; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s
57f5d59b3SDiana; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s
626dc2844SDiana Picus
726dc2844SDiana Picusdefine amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) {
826dc2844SDiana Picus; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
926dc2844SDiana Picus; GISEL-GFX11:       ; %bb.0:
1026dc2844SDiana Picus; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126dc2844SDiana Picus; GISEL-GFX11-NEXT:    s_endpgm
1226dc2844SDiana Picus;
1326dc2844SDiana Picus; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack:
1426dc2844SDiana Picus; GISEL-GFX10:       ; %bb.0:
1526dc2844SDiana Picus; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626dc2844SDiana Picus; GISEL-GFX10-NEXT:    s_endpgm
1726dc2844SDiana Picus;
1826dc2844SDiana Picus; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack:
1926dc2844SDiana Picus; DAGISEL-GFX11:       ; %bb.0:
2026dc2844SDiana Picus; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2126dc2844SDiana Picus; DAGISEL-GFX11-NEXT:    s_endpgm
2226dc2844SDiana Picus;
2326dc2844SDiana Picus; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack:
2426dc2844SDiana Picus; DAGISEL-GFX10:       ; %bb.0:
2526dc2844SDiana Picus; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2626dc2844SDiana Picus; DAGISEL-GFX10-NEXT:    s_endpgm
2726dc2844SDiana Picus  ret void
2826dc2844SDiana Picus}
297f5d59b3SDiana
307f5d59b3SDianadefine amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
317f5d59b3SDiana; GISEL-GFX11-LABEL: cs_to_chain_preserve:
327f5d59b3SDiana; GISEL-GFX11:       ; %bb.0:
337f5d59b3SDiana; GISEL-GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
347f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
357f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
367f5d59b3SDiana; GISEL-GFX11-NEXT:    s_nop
377f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
387f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
397f5d59b3SDiana; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
407f5d59b3SDiana; GISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
417f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
427f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
437f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
447f5d59b3SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
457f5d59b3SDiana;
467f5d59b3SDiana; GISEL-GFX10-LABEL: cs_to_chain_preserve:
477f5d59b3SDiana; GISEL-GFX10:       ; %bb.0:
487f5d59b3SDiana; GISEL-GFX10-NEXT:    s_getpc_b64 s[100:101]
497f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s100, s0
507f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, v0
517f5d59b3SDiana; GISEL-GFX10-NEXT:    s_load_dwordx4 s[100:103], s[100:101], 0x10
527f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v1
537f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v2
547f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
557f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
567f5d59b3SDiana; GISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
577f5d59b3SDiana; GISEL-GFX10-NEXT:    s_bitset0_b32 s103, 21
587f5d59b3SDiana; GISEL-GFX10-NEXT:    s_add_u32 s100, s100, s3
597f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
607f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
617f5d59b3SDiana; GISEL-GFX10-NEXT:    s_nop
627f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
637f5d59b3SDiana; GISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
647f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
657f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
667f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
677f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
687f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
697f5d59b3SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
707f5d59b3SDiana;
717f5d59b3SDiana; DAGISEL-GFX11-LABEL: cs_to_chain_preserve:
727f5d59b3SDiana; DAGISEL-GFX11:       ; %bb.0:
737f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2
747f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
757f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
767f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_nop
777f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
787f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
797f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
807f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1
817f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
827f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
837f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
847f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
857f5d59b3SDiana;
867f5d59b3SDiana; DAGISEL-GFX10-LABEL: cs_to_chain_preserve:
877f5d59b3SDiana; DAGISEL-GFX10:       ; %bb.0:
887f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_getpc_b64 s[100:101]
897f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s100, s0
907f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, v0
917f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_load_dwordx4 s[100:103], s[100:101], 0x10
927f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v9, v1
937f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v10, v2
947f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
957f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
967f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt lgkmcnt(0)
977f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_bitset0_b32 s103, 21
987f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_add_u32 s100, s100, s3
997f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
1007f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
1017f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_nop
1027f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
1037f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_addc_u32 s101, s101, 0
1047f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v3
1057f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b64 s[48:49], s[100:101]
1067f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b64 s[50:51], s[102:103]
1077f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
1087f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
1097f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
1107f5d59b3SDiana  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
1117f5d59b3SDiana  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
1127f5d59b3SDiana  unreachable
1137f5d59b3SDiana}
1147f5d59b3SDiana
1157f5d59b3SDianadefine amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
1167f5d59b3SDiana; GISEL-GFX11-LABEL: chain_to_chain_preserve:
1177f5d59b3SDiana; GISEL-GFX11:       ; %bb.0:
1187f5d59b3SDiana; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1197f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
1207f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
1217f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
1227f5d59b3SDiana; GISEL-GFX11-NEXT:    s_nop
1237f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
1247f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
1257f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
1267f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
1277f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
1287f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
1297f5d59b3SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
1307f5d59b3SDiana;
1317f5d59b3SDiana; GISEL-GFX10-LABEL: chain_to_chain_preserve:
1327f5d59b3SDiana; GISEL-GFX10:       ; %bb.0:
1337f5d59b3SDiana; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
1357f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
1367f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
1377f5d59b3SDiana; GISEL-GFX10-NEXT:    s_nop
1387f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
1397f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
1407f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
1417f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
1427f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
1437f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
1447f5d59b3SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
1457f5d59b3SDiana;
1467f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_to_chain_preserve:
1477f5d59b3SDiana; DAGISEL-GFX11:       ; %bb.0:
1487f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1497f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
1507f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
1517f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
1527f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_nop
1537f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
1547f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
1557f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
1567f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
1577f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
1587f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
1597f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
1607f5d59b3SDiana;
1617f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_to_chain_preserve:
1627f5d59b3SDiana; DAGISEL-GFX10:       ; %bb.0:
1637f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1647f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
1657f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
1667f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
1677f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_nop
1687f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
1697f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
1707f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
1717f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
1727f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
1737f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
1747f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
1757f5d59b3SDiana  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
1767f5d59b3SDiana  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
1777f5d59b3SDiana  unreachable
1787f5d59b3SDiana}
1797f5d59b3SDiana
1807f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) {
1817f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
1827f5d59b3SDiana; GISEL-GFX11:       ; %bb.0:
1837f5d59b3SDiana; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
1857f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
1861fa58c77SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
1877f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
1887f5d59b3SDiana; GISEL-GFX11-NEXT:    s_nop
1897f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
190bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
1917f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
1927f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
1937f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
1947f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
1957f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
1967f5d59b3SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
1977f5d59b3SDiana;
1987f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
1997f5d59b3SDiana; GISEL-GFX10:       ; %bb.0:
2007f5d59b3SDiana; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
2027f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
2031fa58c77SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
2047f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
2057f5d59b3SDiana; GISEL-GFX10-NEXT:    s_nop
2067f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
207bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
2087f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
2097f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
2107f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
2117f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
2127f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
2137f5d59b3SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
2147f5d59b3SDiana;
2157f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve:
2167f5d59b3SDiana; DAGISEL-GFX11:       ; %bb.0:
2177f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
2197f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
2207f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
2217f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
2227f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_nop
2237f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
224bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
22539830feaSDiana Picus; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
226eb3c02fdSDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
2271fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
2287f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
2297f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
2307f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
2317f5d59b3SDiana;
2327f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve:
2337f5d59b3SDiana; DAGISEL-GFX10:       ; %bb.0:
2347f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
2367f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
2377f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
2387f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
2397f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_nop
2407f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
241bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
24239830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee@abs32@hi
243eb3c02fdSDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
2441fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee@abs32@lo
2457f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
2467f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
2477f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
2487f5d59b3SDiana  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
2497f5d59b3SDiana  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
2507f5d59b3SDiana  unreachable
2517f5d59b3SDiana}
2527f5d59b3SDiana
2537f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain(<3 x i32> inreg %a, <3 x i32> %b) {
2547f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain:
2557f5d59b3SDiana; GISEL-GFX11:       ; %bb.0:
2567f5d59b3SDiana; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
2587f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
2591fa58c77SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
2607f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
2617f5d59b3SDiana; GISEL-GFX11-NEXT:    s_nop
2627f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
263bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
2647f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
2657f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
2667f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
2677f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
2687f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
2697f5d59b3SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
2707f5d59b3SDiana;
2717f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain:
2727f5d59b3SDiana; GISEL-GFX10:       ; %bb.0:
2737f5d59b3SDiana; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
2757f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
2761fa58c77SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
2777f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
2787f5d59b3SDiana; GISEL-GFX10-NEXT:    s_nop
2797f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
280bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
2817f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
2827f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
2837f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
2847f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
2857f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
2867f5d59b3SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
2877f5d59b3SDiana;
2887f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain:
2897f5d59b3SDiana; DAGISEL-GFX11:       ; %bb.0:
2907f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
2927f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
2937f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
2947f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
2957f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_nop
2967f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
297bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
29839830feaSDiana Picus; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
299eb3c02fdSDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
3001fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
3017f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
3027f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
3037f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
3047f5d59b3SDiana;
3057f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain:
3067f5d59b3SDiana; DAGISEL-GFX10:       ; %bb.0:
3077f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
3097f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
3107f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
3117f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
3127f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_nop
3137f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
314bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
31539830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
316eb3c02fdSDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
3171fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
3187f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
3197f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
3207f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
3217f5d59b3SDiana  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
3227f5d59b3SDiana  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
3237f5d59b3SDiana  unreachable
3247f5d59b3SDiana}
3257f5d59b3SDiana
3261fa58c77SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) {
3271fa58c77SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_wwm:
3281fa58c77SDiana; GISEL-GFX11:       ; %bb.0:
3291fa58c77SDiana; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
3311fa58c77SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
33216cda01dSCarl Ritson; GISEL-GFX11-NEXT:    s_or_saveexec_b32 s0, -1
33316cda01dSCarl Ritson; GISEL-GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
33416cda01dSCarl Ritson; GISEL-GFX11-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s0
33516cda01dSCarl Ritson; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, s0
3361fa58c77SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
3371fa58c77SDiana; GISEL-GFX11-NEXT:    s_nop
3381fa58c77SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
339bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
3401fa58c77SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v2, v1
3411fa58c77SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
3421fa58c77SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
3431fa58c77SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
3441fa58c77SDiana; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3451fa58c77SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v2
3461fa58c77SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
3471fa58c77SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
3481fa58c77SDiana;
3491fa58c77SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_wwm:
3501fa58c77SDiana; GISEL-GFX10:       ; %bb.0:
3511fa58c77SDiana; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
3531fa58c77SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
35416cda01dSCarl Ritson; GISEL-GFX10-NEXT:    s_or_saveexec_b32 s0, -1
35516cda01dSCarl Ritson; GISEL-GFX10-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s0
35616cda01dSCarl Ritson; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, s0
3571fa58c77SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
3581fa58c77SDiana; GISEL-GFX10-NEXT:    s_nop
3591fa58c77SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
360bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
3611fa58c77SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v2, v1
3621fa58c77SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
3631fa58c77SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
3641fa58c77SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
3651fa58c77SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v2
3661fa58c77SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
3671fa58c77SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
3681fa58c77SDiana;
3691fa58c77SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_wwm:
3701fa58c77SDiana; DAGISEL-GFX11:       ; %bb.0:
3711fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
37316cda01dSCarl Ritson; DAGISEL-GFX11-NEXT:    s_or_saveexec_b32 s4, -1
3741fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
37516cda01dSCarl Ritson; DAGISEL-GFX11-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s4
37616cda01dSCarl Ritson; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, s4
3771fa58c77SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
3781fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_nop
3791fa58c77SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
380bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
3811fa58c77SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v2, v1
3821fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
3831fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
3841fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
3851fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3861fa58c77SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v2
3871fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
3881fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
3891fa58c77SDiana;
3901fa58c77SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_wwm:
3911fa58c77SDiana; DAGISEL-GFX10:       ; %bb.0:
3921fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
39416cda01dSCarl Ritson; DAGISEL-GFX10-NEXT:    s_or_saveexec_b32 s4, -1
3951fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
39616cda01dSCarl Ritson; DAGISEL-GFX10-NEXT:    v_cndmask_b32_e64 v1, 4, 3, s4
39716cda01dSCarl Ritson; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, s4
3981fa58c77SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
3991fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_nop
4001fa58c77SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
401bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
4021fa58c77SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v2, v1
4031fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
4041fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
4051fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
4061fa58c77SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v2
4071fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
4081fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
4091fa58c77SDiana  %i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4)
4101fa58c77SDiana  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
4111fa58c77SDiana  %w = call i32 @llvm.amdgcn.wwm(i32 %i)
4121fa58c77SDiana  %c = insertelement <3 x i32> %b, i32 %w, i32 0
4131fa58c77SDiana  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0)
4141fa58c77SDiana  unreachable
4151fa58c77SDiana}
4161fa58c77SDiana
4177f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) {
4187f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7:
4197f5d59b3SDiana; GISEL-GFX11:       ; %bb.0:
4207f5d59b3SDiana; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4211fa58c77SDiana; GISEL-GFX11-NEXT:    s_clause 0x1
422bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b32 off, v11, off offset:4
423bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off
4247f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v11, v8
425eb3c02fdSDiana; GISEL-GFX11-NEXT:    s_mov_b32 s3, s0
4267f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
4277f5d59b3SDiana; GISEL-GFX11-NEXT:    s_nop
4287f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
4297f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
4307f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
4317f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v11
4321fa58c77SDiana; GISEL-GFX11-NEXT:    s_clause 0x1
433bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off
434bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_load_b32 v11, off, off offset:4
435eb3c02fdSDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s3
4367f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
4377f5d59b3SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
4387f5d59b3SDiana;
4397f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7:
4407f5d59b3SDiana; GISEL-GFX10:       ; %bb.0:
4417f5d59b3SDiana; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v11, off, s[48:51], 0 offset:4 ; 4-byte Folded Spill
443bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
4447f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v11, v8
445eb3c02fdSDiana; GISEL-GFX10-NEXT:    s_mov_b32 s3, s0
4467f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
4477f5d59b3SDiana; GISEL-GFX10-NEXT:    s_nop
4487f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
4497f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
4507f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
4517f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v11
4521fa58c77SDiana; GISEL-GFX10-NEXT:    s_clause 0x1
453bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0
454bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_load_dword v11, off, s[48:51], 0 offset:4
455eb3c02fdSDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s3
4567f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
4577f5d59b3SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
4587f5d59b3SDiana;
4597f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7:
4607f5d59b3SDiana; DAGISEL-GFX11:       ; %bb.0:
4617f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4621fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_clause 0x1
463bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v11, off offset:4
464bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off
4657f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v11, v8
466eb3c02fdSDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s3, s0
4677f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
4687f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_nop
4697f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
4707f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
4717f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
4727f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v11
4731fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_clause 0x1
474bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off
475bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_load_b32 v11, off, off offset:4
476eb3c02fdSDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s3
4777f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
4787f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
4797f5d59b3SDiana;
4807f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7:
4817f5d59b3SDiana; DAGISEL-GFX10:       ; %bb.0:
4827f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v11, off, s[48:51], 0 offset:4 ; 4-byte Folded Spill
484bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
4857f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v11, v8
486eb3c02fdSDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s3, s0
4877f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
4887f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_nop
4897f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
4907f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_callee@abs32@hi
4917f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_callee@abs32@lo
4927f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v11
4931fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_clause 0x1
494bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0
495bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_load_dword v11, off, s[48:51], 0 offset:4
496eb3c02fdSDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s3
4977f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
4987f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
4997f5d59b3SDiana  call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"()
5007f5d59b3SDiana  call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0)
5017f5d59b3SDiana  unreachable
5027f5d59b3SDiana}
5037f5d59b3SDiana
5047f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) {
5057f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args:
5067f5d59b3SDiana; GISEL-GFX11:       ; %bb.0:
5077f5d59b3SDiana; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
5097f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s2, s0
5101fa58c77SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
5117f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMSTART
5127f5d59b3SDiana; GISEL-GFX11-NEXT:    s_nop
5137f5d59b3SDiana; GISEL-GFX11-NEXT:    ;;#ASMEND
514bc6955f1SDiana Picus; GISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
5157f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
5167f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
5177f5d59b3SDiana; GISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
5187f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 s0, s2
5197f5d59b3SDiana; GISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
5207f5d59b3SDiana; GISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
5217f5d59b3SDiana;
5227f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args:
5237f5d59b3SDiana; GISEL-GFX10:       ; %bb.0:
5247f5d59b3SDiana; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
5267f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s2, s0
5271fa58c77SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
5287f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMSTART
5297f5d59b3SDiana; GISEL-GFX10-NEXT:    s_nop
5307f5d59b3SDiana; GISEL-GFX10-NEXT:    ;;#ASMEND
531bc6955f1SDiana Picus; GISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
5327f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
5337f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
5347f5d59b3SDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
5357f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 s0, s2
5367f5d59b3SDiana; GISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
5377f5d59b3SDiana; GISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
5387f5d59b3SDiana;
5397f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args:
5407f5d59b3SDiana; DAGISEL-GFX11:       ; %bb.0:
5417f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b32 off, v16, off ; 4-byte Folded Spill
5437f5d59b3SDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v1, v8
5447f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s2, s0
5457f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMSTART
5467f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_nop
5477f5d59b3SDiana; DAGISEL-GFX11-NEXT:    ;;#ASMEND
548bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    scratch_load_b32 v16, off, off ; 4-byte Folded Reload
54939830feaSDiana Picus; DAGISEL-GFX11-NEXT:    s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
550eb3c02fdSDiana; DAGISEL-GFX11-NEXT:    v_mov_b32_e32 v8, v1
5511fa58c77SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
5527f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 s0, s2
5537f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_mov_b32 exec_lo, -1
5547f5d59b3SDiana; DAGISEL-GFX11-NEXT:    s_setpc_b64 s[4:5]
5557f5d59b3SDiana;
5567f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args:
5577f5d59b3SDiana; DAGISEL-GFX10:       ; %bb.0:
5587f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill
5607f5d59b3SDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v1, v8
5617f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s2, s0
5627f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMSTART
5637f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_nop
5647f5d59b3SDiana; DAGISEL-GFX10-NEXT:    ;;#ASMEND
565bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload
56639830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_mov_b32 s5, chain_preserve_callee_2@abs32@hi
567eb3c02fdSDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v8, v1
5681fa58c77SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s4, chain_preserve_callee_2@abs32@lo
5697f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 s0, s2
5707f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_mov_b32 exec_lo, -1
5717f5d59b3SDiana; DAGISEL-GFX10-NEXT:    s_setpc_b64 s[4:5]
5727f5d59b3SDiana  %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
5737f5d59b3SDiana  %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
5747f5d59b3SDiana  call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"()
5757f5d59b3SDiana  call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_preserve_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0)
5767f5d59b3SDiana  unreachable
5777f5d59b3SDiana}
5787f5d59b3SDiana
5797f5d59b3SDiana; Note that amdgpu_cs_chain_preserve functions are not allowed to call
5807f5d59b3SDiana; llvm.amdgcn.cs.chain with more vgpr args than they received as parameters.
5817f5d59b3SDiana
58239830feaSDiana Picusdefine amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_dont_realign_stack(i32 %idx) {
58339830feaSDiana Picus; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack:
58439830feaSDiana Picus; GISEL-GFX11:       ; %bb.0:
58539830feaSDiana Picus; GISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58639830feaSDiana Picus; GISEL-GFX11-NEXT:    s_mov_b32 s3, 4
58739830feaSDiana Picus; GISEL-GFX11-NEXT:    s_mov_b32 s2, 3
58839830feaSDiana Picus; GISEL-GFX11-NEXT:    s_mov_b32 s1, 2
58939830feaSDiana Picus; GISEL-GFX11-NEXT:    s_mov_b32 s0, 1
59039830feaSDiana Picus; GISEL-GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v8
591eb3c02fdSDiana; GISEL-GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
592*f2eeb3dcSMatt Arsenault; GISEL-GFX11-NEXT:    v_mov_b32_e32 v4, v0
59339830feaSDiana Picus; GISEL-GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
59439830feaSDiana Picus; GISEL-GFX11-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2
59539830feaSDiana Picus; GISEL-GFX11-NEXT:    scratch_store_b128 v4, v[0:3], off dlc
59639830feaSDiana Picus; GISEL-GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
59739830feaSDiana Picus; GISEL-GFX11-NEXT:    s_endpgm
59839830feaSDiana Picus;
59939830feaSDiana Picus; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack:
60039830feaSDiana Picus; GISEL-GFX10:       ; %bb.0:
60139830feaSDiana Picus; GISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60239830feaSDiana Picus; GISEL-GFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v8
60339830feaSDiana Picus; GISEL-GFX10-NEXT:    v_mov_b32_e32 v1, 1
604eb3c02fdSDiana; GISEL-GFX10-NEXT:    v_mov_b32_e32 v2, 2
60539830feaSDiana Picus; GISEL-GFX10-NEXT:    v_mov_b32_e32 v3, 3
60639830feaSDiana Picus; GISEL-GFX10-NEXT:    v_mov_b32_e32 v4, 4
60739830feaSDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v1, v0, s[48:51], 0 offen
60839830feaSDiana Picus; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
60939830feaSDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v2, v0, s[48:51], 0 offen offset:4
61039830feaSDiana Picus; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
61139830feaSDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v3, v0, s[48:51], 0 offen offset:8
61239830feaSDiana Picus; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
61339830feaSDiana Picus; GISEL-GFX10-NEXT:    buffer_store_dword v4, v0, s[48:51], 0 offen offset:12
61439830feaSDiana Picus; GISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
61539830feaSDiana Picus; GISEL-GFX10-NEXT:    s_endpgm
61639830feaSDiana Picus;
61739830feaSDiana Picus; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack:
61839830feaSDiana Picus; DAGISEL-GFX11:       ; %bb.0:
61939830feaSDiana Picus; DAGISEL-GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62039830feaSDiana Picus; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
62139830feaSDiana Picus; DAGISEL-GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
622bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT:    v_lshl_add_u32 v4, v8, 4, 0
62339830feaSDiana Picus; DAGISEL-GFX11-NEXT:    scratch_store_b128 v4, v[0:3], off dlc
62439830feaSDiana Picus; DAGISEL-GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
62539830feaSDiana Picus; DAGISEL-GFX11-NEXT:    s_endpgm
62639830feaSDiana Picus;
62739830feaSDiana Picus; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack:
62839830feaSDiana Picus; DAGISEL-GFX10:       ; %bb.0:
62939830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63039830feaSDiana Picus; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v0, 4
631bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT:    v_lshl_add_u32 v1, v8, 4, 0
632eb3c02fdSDiana; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v2, 3
63339830feaSDiana Picus; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v3, 2
63439830feaSDiana Picus; DAGISEL-GFX10-NEXT:    v_mov_b32_e32 v4, 1
63539830feaSDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v0, v1, s[48:51], 0 offen offset:12
63639830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
63739830feaSDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v2, v1, s[48:51], 0 offen offset:8
63839830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
63939830feaSDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v3, v1, s[48:51], 0 offen offset:4
64039830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
64139830feaSDiana Picus; DAGISEL-GFX10-NEXT:    buffer_store_dword v4, v1, s[48:51], 0 offen
64239830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
64339830feaSDiana Picus; DAGISEL-GFX10-NEXT:    s_endpgm
64439830feaSDiana Picus  %alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5)
64539830feaSDiana Picus  %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx
64639830feaSDiana Picus  store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
64739830feaSDiana Picus  ret void
64839830feaSDiana Picus}
64939830feaSDiana Picus
6507f5d59b3SDianadeclare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
6517f5d59b3SDianadeclare amdgpu_cs_chain_preserve void @chain_preserve_callee(<3 x i32> inreg, <3 x i32>)
6527f5d59b3SDianadeclare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
6537f5d59b3SDiana
6547f5d59b3SDianadeclare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
6557f5d59b3SDianadeclare amdgpu_cs_chain_preserve void @chain_preserve_callee_2(<2 x i32> inreg, <2 x i32>)
6561fa58c77SDiana
6571fa58c77SDianadeclare i32 @llvm.amdgcn.set.inactive(i32, i32)
6581fa58c77SDianadeclare i32 @llvm.amdgcn.wwm(i32)
659