126dc2844SDiana Picus; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 27f5d59b3SDiana; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s 37f5d59b3SDiana; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s 47f5d59b3SDiana; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s 57f5d59b3SDiana; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s 626dc2844SDiana Picus 726dc2844SDiana Picusdefine amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) { 826dc2844SDiana Picus; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack: 926dc2844SDiana Picus; GISEL-GFX11: ; %bb.0: 1026dc2844SDiana Picus; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1126dc2844SDiana Picus; GISEL-GFX11-NEXT: s_endpgm 1226dc2844SDiana Picus; 1326dc2844SDiana Picus; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack: 1426dc2844SDiana Picus; GISEL-GFX10: ; %bb.0: 1526dc2844SDiana Picus; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1626dc2844SDiana Picus; GISEL-GFX10-NEXT: s_endpgm 1726dc2844SDiana Picus; 1826dc2844SDiana Picus; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack: 1926dc2844SDiana Picus; DAGISEL-GFX11: ; %bb.0: 2026dc2844SDiana Picus; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2126dc2844SDiana Picus; DAGISEL-GFX11-NEXT: s_endpgm 2226dc2844SDiana Picus; 2326dc2844SDiana Picus; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack: 2426dc2844SDiana Picus; DAGISEL-GFX10: ; %bb.0: 2526dc2844SDiana Picus; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2626dc2844SDiana Picus; DAGISEL-GFX10-NEXT: s_endpgm 2726dc2844SDiana Picus ret void 2826dc2844SDiana Picus} 297f5d59b3SDiana 307f5d59b3SDianadefine amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { 317f5d59b3SDiana; GISEL-GFX11-LABEL: cs_to_chain_preserve: 327f5d59b3SDiana; GISEL-GFX11: ; %bb.0: 337f5d59b3SDiana; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 347f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 357f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 367f5d59b3SDiana; GISEL-GFX11-NEXT: s_nop 377f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 387f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 397f5d59b3SDiana; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 407f5d59b3SDiana; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 417f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 427f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 437f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 447f5d59b3SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 457f5d59b3SDiana; 467f5d59b3SDiana; GISEL-GFX10-LABEL: cs_to_chain_preserve: 477f5d59b3SDiana; GISEL-GFX10: ; %bb.0: 487f5d59b3SDiana; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] 497f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 507f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 517f5d59b3SDiana; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 527f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 537f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 547f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 557f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 567f5d59b3SDiana; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 577f5d59b3SDiana; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 587f5d59b3SDiana; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 597f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 607f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 617f5d59b3SDiana; GISEL-GFX10-NEXT: s_nop 627f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 637f5d59b3SDiana; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 647f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 657f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] 667f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 677f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] 687f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 697f5d59b3SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 707f5d59b3SDiana; 717f5d59b3SDiana; DAGISEL-GFX11-LABEL: cs_to_chain_preserve: 727f5d59b3SDiana; DAGISEL-GFX11: ; %bb.0: 737f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 747f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 757f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 767f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_nop 777f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 787f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 797f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 807f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 817f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 827f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 837f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 847f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 857f5d59b3SDiana; 867f5d59b3SDiana; DAGISEL-GFX10-LABEL: cs_to_chain_preserve: 877f5d59b3SDiana; DAGISEL-GFX10: ; %bb.0: 887f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] 897f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 907f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 917f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 927f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 937f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 947f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 957f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 967f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 977f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 987f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 997f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 1007f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 1017f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_nop 1027f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 1037f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 1047f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 1057f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] 1067f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] 1077f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 1087f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 1097f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 1107f5d59b3SDiana call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 1117f5d59b3SDiana call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 1127f5d59b3SDiana unreachable 1137f5d59b3SDiana} 1147f5d59b3SDiana 1157f5d59b3SDianadefine amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { 1167f5d59b3SDiana; GISEL-GFX11-LABEL: chain_to_chain_preserve: 1177f5d59b3SDiana; GISEL-GFX11: ; %bb.0: 1187f5d59b3SDiana; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1197f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 1207f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 1217f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 1227f5d59b3SDiana; GISEL-GFX11-NEXT: s_nop 1237f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 1247f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 1257f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 1267f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 1277f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 1287f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 1297f5d59b3SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 1307f5d59b3SDiana; 1317f5d59b3SDiana; GISEL-GFX10-LABEL: chain_to_chain_preserve: 1327f5d59b3SDiana; GISEL-GFX10: ; %bb.0: 1337f5d59b3SDiana; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1347f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 1357f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 1367f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 1377f5d59b3SDiana; GISEL-GFX10-NEXT: s_nop 1387f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 1397f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 1407f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 1417f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 1427f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 1437f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 1447f5d59b3SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 1457f5d59b3SDiana; 1467f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_to_chain_preserve: 1477f5d59b3SDiana; DAGISEL-GFX11: ; %bb.0: 1487f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1497f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 1507f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 1517f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 1527f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_nop 1537f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 1547f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 1557f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 1567f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 1577f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 1587f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 1597f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 1607f5d59b3SDiana; 1617f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_to_chain_preserve: 1627f5d59b3SDiana; DAGISEL-GFX10: ; %bb.0: 1637f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1647f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 1657f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 1667f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 1677f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_nop 1687f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 1697f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 1707f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 1717f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 1727f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 1737f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 1747f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 1757f5d59b3SDiana call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 1767f5d59b3SDiana call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 1777f5d59b3SDiana unreachable 1787f5d59b3SDiana} 1797f5d59b3SDiana 1807f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { 1817f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: 1827f5d59b3SDiana; GISEL-GFX11: ; %bb.0: 1837f5d59b3SDiana; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 1857f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 1861fa58c77SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 1877f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 1887f5d59b3SDiana; GISEL-GFX11-NEXT: s_nop 1897f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 190bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 1917f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 1927f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 1937f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 1947f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 1957f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 1967f5d59b3SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 1977f5d59b3SDiana; 1987f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve: 1997f5d59b3SDiana; GISEL-GFX10: ; %bb.0: 2007f5d59b3SDiana; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 2027f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 2031fa58c77SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 2047f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 2057f5d59b3SDiana; GISEL-GFX10-NEXT: s_nop 2067f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 207bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 2087f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 2097f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 2107f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 2117f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 2127f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 2137f5d59b3SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 2147f5d59b3SDiana; 2157f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: 2167f5d59b3SDiana; DAGISEL-GFX11: ; %bb.0: 2177f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 2197f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 2207f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 2217f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 2227f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_nop 2237f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 224bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 22539830feaSDiana Picus; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 226eb3c02fdSDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 2271fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 2287f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 2297f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 2307f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 2317f5d59b3SDiana; 2327f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve: 2337f5d59b3SDiana; DAGISEL-GFX10: ; %bb.0: 2347f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 2367f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 2377f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 2387f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 2397f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_nop 2407f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 241bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 24239830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi 243eb3c02fdSDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 2441fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo 2457f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 2467f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 2477f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 2487f5d59b3SDiana call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 2497f5d59b3SDiana call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 2507f5d59b3SDiana unreachable 2517f5d59b3SDiana} 2527f5d59b3SDiana 2537f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { 2547f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain: 2557f5d59b3SDiana; GISEL-GFX11: ; %bb.0: 2567f5d59b3SDiana; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 2587f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 2591fa58c77SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 2607f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 2617f5d59b3SDiana; GISEL-GFX11-NEXT: s_nop 2627f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 263bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 2647f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 2657f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 2667f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 2677f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 2687f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 2697f5d59b3SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 2707f5d59b3SDiana; 2717f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain: 2727f5d59b3SDiana; GISEL-GFX10: ; %bb.0: 2737f5d59b3SDiana; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 2757f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 2761fa58c77SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 2777f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 2787f5d59b3SDiana; GISEL-GFX10-NEXT: s_nop 2797f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 280bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 2817f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 2827f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 2837f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 2847f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 2857f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 2867f5d59b3SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 2877f5d59b3SDiana; 2887f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain: 2897f5d59b3SDiana; DAGISEL-GFX11: ; %bb.0: 2907f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 2927f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 2937f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 2947f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 2957f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_nop 2967f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 297bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 29839830feaSDiana Picus; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 299eb3c02fdSDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 3001fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 3017f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 3027f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 3037f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 3047f5d59b3SDiana; 3057f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain: 3067f5d59b3SDiana; DAGISEL-GFX10: ; %bb.0: 3077f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 3097f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 3107f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 3117f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 3127f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_nop 3137f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 314bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 31539830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 316eb3c02fdSDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 3171fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 3187f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 3197f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 3207f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 3217f5d59b3SDiana call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 3227f5d59b3SDiana call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 3237f5d59b3SDiana unreachable 3247f5d59b3SDiana} 3257f5d59b3SDiana 3261fa58c77SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) { 3271fa58c77SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_wwm: 3281fa58c77SDiana; GISEL-GFX11: ; %bb.0: 3291fa58c77SDiana; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 3311fa58c77SDiana; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 33216cda01dSCarl Ritson; GISEL-GFX11-NEXT: s_or_saveexec_b32 s0, -1 33316cda01dSCarl Ritson; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 33416cda01dSCarl Ritson; GISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0 33516cda01dSCarl Ritson; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, s0 3361fa58c77SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 3371fa58c77SDiana; GISEL-GFX11-NEXT: s_nop 3381fa58c77SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 339bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 3401fa58c77SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1 3411fa58c77SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 3421fa58c77SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 3431fa58c77SDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 3441fa58c77SDiana; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3451fa58c77SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2 3461fa58c77SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 3471fa58c77SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 3481fa58c77SDiana; 3491fa58c77SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_wwm: 3501fa58c77SDiana; GISEL-GFX10: ; %bb.0: 3511fa58c77SDiana; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 3531fa58c77SDiana; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 35416cda01dSCarl Ritson; GISEL-GFX10-NEXT: s_or_saveexec_b32 s0, -1 35516cda01dSCarl Ritson; GISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0 35616cda01dSCarl Ritson; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, s0 3571fa58c77SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 3581fa58c77SDiana; GISEL-GFX10-NEXT: s_nop 3591fa58c77SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 360bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 3611fa58c77SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1 3621fa58c77SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 3631fa58c77SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 3641fa58c77SDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 3651fa58c77SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2 3661fa58c77SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 3671fa58c77SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 3681fa58c77SDiana; 3691fa58c77SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_wwm: 3701fa58c77SDiana; DAGISEL-GFX11: ; %bb.0: 3711fa58c77SDiana; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 37316cda01dSCarl Ritson; DAGISEL-GFX11-NEXT: s_or_saveexec_b32 s4, -1 3741fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 37516cda01dSCarl Ritson; DAGISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4 37616cda01dSCarl Ritson; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, s4 3771fa58c77SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 3781fa58c77SDiana; DAGISEL-GFX11-NEXT: s_nop 3791fa58c77SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 380bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 3811fa58c77SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1 3821fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 3831fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 3841fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 3851fa58c77SDiana; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3861fa58c77SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2 3871fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 3881fa58c77SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 3891fa58c77SDiana; 3901fa58c77SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_wwm: 3911fa58c77SDiana; DAGISEL-GFX10: ; %bb.0: 3921fa58c77SDiana; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 39416cda01dSCarl Ritson; DAGISEL-GFX10-NEXT: s_or_saveexec_b32 s4, -1 3951fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 39616cda01dSCarl Ritson; DAGISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4 39716cda01dSCarl Ritson; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, s4 3981fa58c77SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 3991fa58c77SDiana; DAGISEL-GFX10-NEXT: s_nop 4001fa58c77SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 401bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 4021fa58c77SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1 4031fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 4041fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 4051fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 4061fa58c77SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2 4071fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 4081fa58c77SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 4091fa58c77SDiana %i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4) 4101fa58c77SDiana call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 4111fa58c77SDiana %w = call i32 @llvm.amdgcn.wwm(i32 %i) 4121fa58c77SDiana %c = insertelement <3 x i32> %b, i32 %w, i32 0 4131fa58c77SDiana call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0) 4141fa58c77SDiana unreachable 4151fa58c77SDiana} 4161fa58c77SDiana 4177f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) { 4187f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: 4197f5d59b3SDiana; GISEL-GFX11: ; %bb.0: 4207f5d59b3SDiana; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4211fa58c77SDiana; GISEL-GFX11-NEXT: s_clause 0x1 422bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 423bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off 4247f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 425eb3c02fdSDiana; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 4267f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 4277f5d59b3SDiana; GISEL-GFX11-NEXT: s_nop 4287f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 4297f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 4307f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 4317f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 4321fa58c77SDiana; GISEL-GFX11-NEXT: s_clause 0x1 433bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off 434bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_load_b32 v11, off, off offset:4 435eb3c02fdSDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 4367f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 4377f5d59b3SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 4387f5d59b3SDiana; 4397f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: 4407f5d59b3SDiana; GISEL-GFX10: ; %bb.0: 4417f5d59b3SDiana; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 442bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], 0 offset:4 ; 4-byte Folded Spill 443bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 4447f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 445eb3c02fdSDiana; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 4467f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 4477f5d59b3SDiana; GISEL-GFX10-NEXT: s_nop 4487f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 4497f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 4507f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 4517f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 4521fa58c77SDiana; GISEL-GFX10-NEXT: s_clause 0x1 453bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 454bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_load_dword v11, off, s[48:51], 0 offset:4 455eb3c02fdSDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 4567f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 4577f5d59b3SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 4587f5d59b3SDiana; 4597f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: 4607f5d59b3SDiana; DAGISEL-GFX11: ; %bb.0: 4617f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4621fa58c77SDiana; DAGISEL-GFX11-NEXT: s_clause 0x1 463bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 464bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off 4657f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 466eb3c02fdSDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 4677f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 4687f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_nop 4697f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 4707f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 4717f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 4727f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 4731fa58c77SDiana; DAGISEL-GFX11-NEXT: s_clause 0x1 474bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off 475bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_load_b32 v11, off, off offset:4 476eb3c02fdSDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 4777f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 4787f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 4797f5d59b3SDiana; 4807f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: 4817f5d59b3SDiana; DAGISEL-GFX10: ; %bb.0: 4827f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], 0 offset:4 ; 4-byte Folded Spill 484bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 4857f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 486eb3c02fdSDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 4877f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 4887f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_nop 4897f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 4907f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 4917f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 4927f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 4931fa58c77SDiana; DAGISEL-GFX10-NEXT: s_clause 0x1 494bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 495bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_load_dword v11, off, s[48:51], 0 offset:4 496eb3c02fdSDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 4977f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 4987f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 4997f5d59b3SDiana call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"() 5007f5d59b3SDiana call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 5017f5d59b3SDiana unreachable 5027f5d59b3SDiana} 5037f5d59b3SDiana 5047f5d59b3SDianadefine amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) { 5057f5d59b3SDiana; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args: 5067f5d59b3SDiana; GISEL-GFX11: ; %bb.0: 5077f5d59b3SDiana; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 5097f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s2, s0 5101fa58c77SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 5117f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMSTART 5127f5d59b3SDiana; GISEL-GFX11-NEXT: s_nop 5137f5d59b3SDiana; GISEL-GFX11-NEXT: ;;#ASMEND 514bc6955f1SDiana Picus; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 5157f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo 5167f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi 5177f5d59b3SDiana; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 5187f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 s0, s2 5197f5d59b3SDiana; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 5207f5d59b3SDiana; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 5217f5d59b3SDiana; 5227f5d59b3SDiana; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args: 5237f5d59b3SDiana; GISEL-GFX10: ; %bb.0: 5247f5d59b3SDiana; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 525bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 5267f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s2, s0 5271fa58c77SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 5287f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMSTART 5297f5d59b3SDiana; GISEL-GFX10-NEXT: s_nop 5307f5d59b3SDiana; GISEL-GFX10-NEXT: ;;#ASMEND 531bc6955f1SDiana Picus; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 5327f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo 5337f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi 5347f5d59b3SDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 5357f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 s0, s2 5367f5d59b3SDiana; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 5377f5d59b3SDiana; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 5387f5d59b3SDiana; 5397f5d59b3SDiana; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args: 5407f5d59b3SDiana; DAGISEL-GFX11: ; %bb.0: 5417f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill 5437f5d59b3SDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 5447f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0 5457f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMSTART 5467f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_nop 5477f5d59b3SDiana; DAGISEL-GFX11-NEXT: ;;#ASMEND 548bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload 54939830feaSDiana Picus; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi 550eb3c02fdSDiana; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 5511fa58c77SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo 5527f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2 5537f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 5547f5d59b3SDiana; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 5557f5d59b3SDiana; 5567f5d59b3SDiana; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args: 5577f5d59b3SDiana; DAGISEL-GFX10: ; %bb.0: 5587f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill 5607f5d59b3SDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 5617f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0 5627f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMSTART 5637f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_nop 5647f5d59b3SDiana; DAGISEL-GFX10-NEXT: ;;#ASMEND 565bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload 56639830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi 567eb3c02fdSDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 5681fa58c77SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo 5697f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2 5707f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 5717f5d59b3SDiana; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 5727f5d59b3SDiana %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> 5737f5d59b3SDiana %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> 5747f5d59b3SDiana call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 5757f5d59b3SDiana call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_preserve_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0) 5767f5d59b3SDiana unreachable 5777f5d59b3SDiana} 5787f5d59b3SDiana 5797f5d59b3SDiana; Note that amdgpu_cs_chain_preserve functions are not allowed to call 5807f5d59b3SDiana; llvm.amdgcn.cs.chain with more vgpr args than they received as parameters. 5817f5d59b3SDiana 58239830feaSDiana Picusdefine amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_dont_realign_stack(i32 %idx) { 58339830feaSDiana Picus; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: 58439830feaSDiana Picus; GISEL-GFX11: ; %bb.0: 58539830feaSDiana Picus; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58639830feaSDiana Picus; GISEL-GFX11-NEXT: s_mov_b32 s3, 4 58739830feaSDiana Picus; GISEL-GFX11-NEXT: s_mov_b32 s2, 3 58839830feaSDiana Picus; GISEL-GFX11-NEXT: s_mov_b32 s1, 2 58939830feaSDiana Picus; GISEL-GFX11-NEXT: s_mov_b32 s0, 1 59039830feaSDiana Picus; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8 591eb3c02fdSDiana; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 592*f2eeb3dcSMatt Arsenault; GISEL-GFX11-NEXT: v_mov_b32_e32 v4, v0 59339830feaSDiana Picus; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 59439830feaSDiana Picus; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 59539830feaSDiana Picus; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc 59639830feaSDiana Picus; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 59739830feaSDiana Picus; GISEL-GFX11-NEXT: s_endpgm 59839830feaSDiana Picus; 59939830feaSDiana Picus; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: 60039830feaSDiana Picus; GISEL-GFX10: ; %bb.0: 60139830feaSDiana Picus; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60239830feaSDiana Picus; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8 60339830feaSDiana Picus; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1 604eb3c02fdSDiana; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2 60539830feaSDiana Picus; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3 60639830feaSDiana Picus; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4 60739830feaSDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen 60839830feaSDiana Picus; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 60939830feaSDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4 61039830feaSDiana Picus; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 61139830feaSDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8 61239830feaSDiana Picus; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 61339830feaSDiana Picus; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12 61439830feaSDiana Picus; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 61539830feaSDiana Picus; GISEL-GFX10-NEXT: s_endpgm 61639830feaSDiana Picus; 61739830feaSDiana Picus; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: 61839830feaSDiana Picus; DAGISEL-GFX11: ; %bb.0: 61939830feaSDiana Picus; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62039830feaSDiana Picus; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 62139830feaSDiana Picus; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 622bc6955f1SDiana Picus; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, 0 62339830feaSDiana Picus; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc 62439830feaSDiana Picus; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 62539830feaSDiana Picus; DAGISEL-GFX11-NEXT: s_endpgm 62639830feaSDiana Picus; 62739830feaSDiana Picus; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: 62839830feaSDiana Picus; DAGISEL-GFX10: ; %bb.0: 62939830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63039830feaSDiana Picus; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4 631bc6955f1SDiana Picus; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, 0 632eb3c02fdSDiana; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3 63339830feaSDiana Picus; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2 63439830feaSDiana Picus; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1 63539830feaSDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12 63639830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 63739830feaSDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8 63839830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 63939830feaSDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4 64039830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 64139830feaSDiana Picus; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen 64239830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 64339830feaSDiana Picus; DAGISEL-GFX10-NEXT: s_endpgm 64439830feaSDiana Picus %alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5) 64539830feaSDiana Picus %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx 64639830feaSDiana Picus store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32 64739830feaSDiana Picus ret void 64839830feaSDiana Picus} 64939830feaSDiana Picus 6507f5d59b3SDianadeclare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) 6517f5d59b3SDianadeclare amdgpu_cs_chain_preserve void @chain_preserve_callee(<3 x i32> inreg, <3 x i32>) 6527f5d59b3SDianadeclare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>) 6537f5d59b3SDiana 6547f5d59b3SDianadeclare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) 6557f5d59b3SDianadeclare amdgpu_cs_chain_preserve void @chain_preserve_callee_2(<2 x i32> inreg, <2 x i32>) 6561fa58c77SDiana 6571fa58c77SDianadeclare i32 @llvm.amdgcn.set.inactive(i32, i32) 6581fa58c77SDianadeclare i32 @llvm.amdgcn.wwm(i32) 659