1787bef0bSJay Foad; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 29e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX67,GFX6 39e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX67,GFX7 49e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX89 59e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX6789,GFX689,GFX89,GFX9 69e9907f1SFangrui Song; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12 7787bef0bSJay Foad 8787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 9787bef0bSJay Foad; GFX67-LABEL: test_sink_smem_offset_400: 10787bef0bSJay Foad; GFX67: ; %bb.0: ; %entry 11787bef0bSJay Foad; GFX67-NEXT: .LBB0_1: ; %loop 12787bef0bSJay Foad; GFX67-NEXT: ; =>This Inner Loop Header: Depth=1 13787bef0bSJay Foad; GFX67-NEXT: s_waitcnt lgkmcnt(0) 14787bef0bSJay Foad; GFX67-NEXT: s_load_dword s3, s[0:1], 0x64 15787bef0bSJay Foad; GFX67-NEXT: s_add_i32 s2, s2, -1 16787bef0bSJay Foad; GFX67-NEXT: s_cmp_lg_u32 s2, 0 17787bef0bSJay Foad; GFX67-NEXT: s_cbranch_scc1 .LBB0_1 18787bef0bSJay Foad; GFX67-NEXT: ; %bb.2: ; %end 19787bef0bSJay Foad; GFX67-NEXT: s_endpgm 20787bef0bSJay Foad; 21787bef0bSJay Foad; GFX89-LABEL: test_sink_smem_offset_400: 22787bef0bSJay Foad; GFX89: ; %bb.0: ; %entry 23787bef0bSJay Foad; GFX89-NEXT: .LBB0_1: ; %loop 24787bef0bSJay Foad; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1 25787bef0bSJay Foad; GFX89-NEXT: s_waitcnt lgkmcnt(0) 26787bef0bSJay Foad; GFX89-NEXT: s_load_dword s3, s[0:1], 0x190 27787bef0bSJay Foad; GFX89-NEXT: s_add_i32 s2, s2, -1 28787bef0bSJay Foad; GFX89-NEXT: s_cmp_lg_u32 s2, 0 29787bef0bSJay Foad; GFX89-NEXT: s_cbranch_scc1 .LBB0_1 30787bef0bSJay Foad; GFX89-NEXT: ; %bb.2: ; %end 31787bef0bSJay Foad; GFX89-NEXT: s_endpgm 32a278ac57SMirko Brkušanin; 33a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_400: 34a278ac57SMirko Brkušanin; GFX12: ; %bb.0: ; %entry 35a278ac57SMirko Brkušanin; GFX12-NEXT: .LBB0_1: ; %loop 36a278ac57SMirko Brkušanin; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 37ba52f06fSJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 38a278ac57SMirko Brkušanin; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x190 39a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_co_i32 s2, s2, -1 40a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 41a278ac57SMirko Brkušanin; GFX12-NEXT: s_cmp_lg_u32 s2, 0 42a278ac57SMirko Brkušanin; GFX12-NEXT: s_cbranch_scc1 .LBB0_1 43a278ac57SMirko Brkušanin; GFX12-NEXT: ; %bb.2: ; %end 44a278ac57SMirko Brkušanin; GFX12-NEXT: s_endpgm 45787bef0bSJay Foadentry: 46787bef0bSJay Foad %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 400 47787bef0bSJay Foad br label %loop 48787bef0bSJay Foad 49787bef0bSJay Foadloop: 50787bef0bSJay Foad %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 51787bef0bSJay Foad %dec = sub i32 %count, 1 52787bef0bSJay Foad %load = load volatile i32, ptr addrspace(4) %gep 53787bef0bSJay Foad %cond = icmp eq i32 %dec, 0 54787bef0bSJay Foad br i1 %cond, label %end, label %loop 55787bef0bSJay Foad 56787bef0bSJay Foadend: 57787bef0bSJay Foad ret void 58787bef0bSJay Foad} 59787bef0bSJay Foad 60787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_4000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 61787bef0bSJay Foad; GFX6-LABEL: test_sink_smem_offset_4000: 62787bef0bSJay Foad; GFX6: ; %bb.0: ; %entry 63787bef0bSJay Foad; GFX6-NEXT: s_add_u32 s0, s0, 0xfa0 64787bef0bSJay Foad; GFX6-NEXT: s_addc_u32 s1, s1, 0 65787bef0bSJay Foad; GFX6-NEXT: .LBB1_1: ; %loop 66787bef0bSJay Foad; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 67787bef0bSJay Foad; GFX6-NEXT: s_waitcnt lgkmcnt(0) 68787bef0bSJay Foad; GFX6-NEXT: s_load_dword s3, s[0:1], 0x0 69787bef0bSJay Foad; GFX6-NEXT: s_add_i32 s2, s2, -1 70787bef0bSJay Foad; GFX6-NEXT: s_cmp_lg_u32 s2, 0 71787bef0bSJay Foad; GFX6-NEXT: s_cbranch_scc1 .LBB1_1 72787bef0bSJay Foad; GFX6-NEXT: ; %bb.2: ; %end 73787bef0bSJay Foad; GFX6-NEXT: s_endpgm 74787bef0bSJay Foad; 75787bef0bSJay Foad; GFX7-LABEL: test_sink_smem_offset_4000: 76787bef0bSJay Foad; GFX7: ; %bb.0: ; %entry 77787bef0bSJay Foad; GFX7-NEXT: .LBB1_1: ; %loop 78787bef0bSJay Foad; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 79787bef0bSJay Foad; GFX7-NEXT: s_waitcnt lgkmcnt(0) 80787bef0bSJay Foad; GFX7-NEXT: s_load_dword s3, s[0:1], 0x3e8 81787bef0bSJay Foad; GFX7-NEXT: s_add_i32 s2, s2, -1 82787bef0bSJay Foad; GFX7-NEXT: s_cmp_lg_u32 s2, 0 83787bef0bSJay Foad; GFX7-NEXT: s_cbranch_scc1 .LBB1_1 84787bef0bSJay Foad; GFX7-NEXT: ; %bb.2: ; %end 85787bef0bSJay Foad; GFX7-NEXT: s_endpgm 86787bef0bSJay Foad; 87787bef0bSJay Foad; GFX89-LABEL: test_sink_smem_offset_4000: 88787bef0bSJay Foad; GFX89: ; %bb.0: ; %entry 89787bef0bSJay Foad; GFX89-NEXT: .LBB1_1: ; %loop 90787bef0bSJay Foad; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1 91787bef0bSJay Foad; GFX89-NEXT: s_waitcnt lgkmcnt(0) 92787bef0bSJay Foad; GFX89-NEXT: s_load_dword s3, s[0:1], 0xfa0 93787bef0bSJay Foad; GFX89-NEXT: s_add_i32 s2, s2, -1 94787bef0bSJay Foad; GFX89-NEXT: s_cmp_lg_u32 s2, 0 95787bef0bSJay Foad; GFX89-NEXT: s_cbranch_scc1 .LBB1_1 96787bef0bSJay Foad; GFX89-NEXT: ; %bb.2: ; %end 97787bef0bSJay Foad; GFX89-NEXT: s_endpgm 98a278ac57SMirko Brkušanin; 99a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_4000: 100a278ac57SMirko Brkušanin; GFX12: ; %bb.0: ; %entry 101a278ac57SMirko Brkušanin; GFX12-NEXT: .LBB1_1: ; %loop 102a278ac57SMirko Brkušanin; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 103ba52f06fSJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 104a278ac57SMirko Brkušanin; GFX12-NEXT: s_load_b32 s3, s[0:1], 0xfa0 105a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_co_i32 s2, s2, -1 106a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 107a278ac57SMirko Brkušanin; GFX12-NEXT: s_cmp_lg_u32 s2, 0 108a278ac57SMirko Brkušanin; GFX12-NEXT: s_cbranch_scc1 .LBB1_1 109a278ac57SMirko Brkušanin; GFX12-NEXT: ; %bb.2: ; %end 110a278ac57SMirko Brkušanin; GFX12-NEXT: s_endpgm 111787bef0bSJay Foadentry: 112787bef0bSJay Foad %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000 113787bef0bSJay Foad br label %loop 114787bef0bSJay Foad 115787bef0bSJay Foadloop: 116787bef0bSJay Foad %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 117787bef0bSJay Foad %dec = sub i32 %count, 1 118787bef0bSJay Foad %load = load volatile i32, ptr addrspace(4) %gep 119787bef0bSJay Foad %cond = icmp eq i32 %dec, 0 120787bef0bSJay Foad br i1 %cond, label %end, label %loop 121787bef0bSJay Foad 122787bef0bSJay Foadend: 123787bef0bSJay Foad ret void 124787bef0bSJay Foad} 125787bef0bSJay Foad 126787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_4000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 127787bef0bSJay Foad; GFX689-LABEL: test_sink_smem_offset_4000000: 128787bef0bSJay Foad; GFX689: ; %bb.0: ; %entry 129787bef0bSJay Foad; GFX689-NEXT: s_add_u32 s0, s0, 0x3d0900 130787bef0bSJay Foad; GFX689-NEXT: s_addc_u32 s1, s1, 0 131787bef0bSJay Foad; GFX689-NEXT: .LBB2_1: ; %loop 132787bef0bSJay Foad; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1 133787bef0bSJay Foad; GFX689-NEXT: s_waitcnt lgkmcnt(0) 134787bef0bSJay Foad; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0 135787bef0bSJay Foad; GFX689-NEXT: s_add_i32 s2, s2, -1 136787bef0bSJay Foad; GFX689-NEXT: s_cmp_lg_u32 s2, 0 137787bef0bSJay Foad; GFX689-NEXT: s_cbranch_scc1 .LBB2_1 138787bef0bSJay Foad; GFX689-NEXT: ; %bb.2: ; %end 139787bef0bSJay Foad; GFX689-NEXT: s_endpgm 140787bef0bSJay Foad; 141787bef0bSJay Foad; GFX7-LABEL: test_sink_smem_offset_4000000: 142787bef0bSJay Foad; GFX7: ; %bb.0: ; %entry 143787bef0bSJay Foad; GFX7-NEXT: .LBB2_1: ; %loop 144787bef0bSJay Foad; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 145787bef0bSJay Foad; GFX7-NEXT: s_waitcnt lgkmcnt(0) 146787bef0bSJay Foad; GFX7-NEXT: s_load_dword s3, s[0:1], 0xf4240 147787bef0bSJay Foad; GFX7-NEXT: s_add_i32 s2, s2, -1 148787bef0bSJay Foad; GFX7-NEXT: s_cmp_lg_u32 s2, 0 149787bef0bSJay Foad; GFX7-NEXT: s_cbranch_scc1 .LBB2_1 150787bef0bSJay Foad; GFX7-NEXT: ; %bb.2: ; %end 151787bef0bSJay Foad; GFX7-NEXT: s_endpgm 152a278ac57SMirko Brkušanin; 153a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_4000000: 154a278ac57SMirko Brkušanin; GFX12: ; %bb.0: ; %entry 155a278ac57SMirko Brkušanin; GFX12-NEXT: .LBB2_1: ; %loop 156a278ac57SMirko Brkušanin; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 157ba52f06fSJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 158a278ac57SMirko Brkušanin; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x3d0900 159a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_co_i32 s2, s2, -1 160a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 161a278ac57SMirko Brkušanin; GFX12-NEXT: s_cmp_lg_u32 s2, 0 162a278ac57SMirko Brkušanin; GFX12-NEXT: s_cbranch_scc1 .LBB2_1 163a278ac57SMirko Brkušanin; GFX12-NEXT: ; %bb.2: ; %end 164a278ac57SMirko Brkušanin; GFX12-NEXT: s_endpgm 165787bef0bSJay Foadentry: 166787bef0bSJay Foad %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000000 167787bef0bSJay Foad br label %loop 168787bef0bSJay Foad 169787bef0bSJay Foadloop: 170787bef0bSJay Foad %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 171787bef0bSJay Foad %dec = sub i32 %count, 1 172787bef0bSJay Foad %load = load volatile i32, ptr addrspace(4) %gep 173787bef0bSJay Foad %cond = icmp eq i32 %dec, 0 174787bef0bSJay Foad br i1 %cond, label %end, label %loop 175787bef0bSJay Foad 176787bef0bSJay Foadend: 177787bef0bSJay Foad ret void 178787bef0bSJay Foad} 179787bef0bSJay Foad 180a278ac57SMirko Brkušanindefine amdgpu_cs void @test_sink_smem_offset_40000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 181a278ac57SMirko Brkušanin; GFX689-LABEL: test_sink_smem_offset_40000000: 182a278ac57SMirko Brkušanin; GFX689: ; %bb.0: ; %entry 183a278ac57SMirko Brkušanin; GFX689-NEXT: s_add_u32 s0, s0, 0x2625a00 184a278ac57SMirko Brkušanin; GFX689-NEXT: s_addc_u32 s1, s1, 0 185a278ac57SMirko Brkušanin; GFX689-NEXT: .LBB3_1: ; %loop 186a278ac57SMirko Brkušanin; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1 187a278ac57SMirko Brkušanin; GFX689-NEXT: s_waitcnt lgkmcnt(0) 188a278ac57SMirko Brkušanin; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0 189a278ac57SMirko Brkušanin; GFX689-NEXT: s_add_i32 s2, s2, -1 190a278ac57SMirko Brkušanin; GFX689-NEXT: s_cmp_lg_u32 s2, 0 191a278ac57SMirko Brkušanin; GFX689-NEXT: s_cbranch_scc1 .LBB3_1 192a278ac57SMirko Brkušanin; GFX689-NEXT: ; %bb.2: ; %end 193a278ac57SMirko Brkušanin; GFX689-NEXT: s_endpgm 194a278ac57SMirko Brkušanin; 195a278ac57SMirko Brkušanin; GFX7-LABEL: test_sink_smem_offset_40000000: 196a278ac57SMirko Brkušanin; GFX7: ; %bb.0: ; %entry 197a278ac57SMirko Brkušanin; GFX7-NEXT: .LBB3_1: ; %loop 198a278ac57SMirko Brkušanin; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 199a278ac57SMirko Brkušanin; GFX7-NEXT: s_waitcnt lgkmcnt(0) 200a278ac57SMirko Brkušanin; GFX7-NEXT: s_load_dword s3, s[0:1], 0x989680 201a278ac57SMirko Brkušanin; GFX7-NEXT: s_add_i32 s2, s2, -1 202a278ac57SMirko Brkušanin; GFX7-NEXT: s_cmp_lg_u32 s2, 0 203a278ac57SMirko Brkušanin; GFX7-NEXT: s_cbranch_scc1 .LBB3_1 204a278ac57SMirko Brkušanin; GFX7-NEXT: ; %bb.2: ; %end 205a278ac57SMirko Brkušanin; GFX7-NEXT: s_endpgm 206a278ac57SMirko Brkušanin; 207a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_40000000: 208a278ac57SMirko Brkušanin; GFX12: ; %bb.0: ; %entry 209a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x2625a00 210a278ac57SMirko Brkušanin; GFX12-NEXT: .LBB3_1: ; %loop 211a278ac57SMirko Brkušanin; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 212ba52f06fSJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 213a278ac57SMirko Brkušanin; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 214a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_co_i32 s2, s2, -1 215a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 216a278ac57SMirko Brkušanin; GFX12-NEXT: s_cmp_lg_u32 s2, 0 217a278ac57SMirko Brkušanin; GFX12-NEXT: s_cbranch_scc1 .LBB3_1 218a278ac57SMirko Brkušanin; GFX12-NEXT: ; %bb.2: ; %end 219a278ac57SMirko Brkušanin; GFX12-NEXT: s_endpgm 220a278ac57SMirko Brkušaninentry: 221a278ac57SMirko Brkušanin %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000 222a278ac57SMirko Brkušanin br label %loop 223a278ac57SMirko Brkušanin 224a278ac57SMirko Brkušaninloop: 225a278ac57SMirko Brkušanin %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 226a278ac57SMirko Brkušanin %dec = sub i32 %count, 1 227a278ac57SMirko Brkušanin %load = load volatile i32, ptr addrspace(4) %gep 228a278ac57SMirko Brkušanin %cond = icmp eq i32 %dec, 0 229a278ac57SMirko Brkušanin br i1 %cond, label %end, label %loop 230a278ac57SMirko Brkušanin 231a278ac57SMirko Brkušaninend: 232a278ac57SMirko Brkušanin ret void 233a278ac57SMirko Brkušanin} 234a278ac57SMirko Brkušanin 235787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_40000000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 236787bef0bSJay Foad; GFX6789-LABEL: test_sink_smem_offset_40000000000: 237787bef0bSJay Foad; GFX6789: ; %bb.0: ; %entry 238787bef0bSJay Foad; GFX6789-NEXT: s_add_u32 s0, s0, 0x502f9000 239787bef0bSJay Foad; GFX6789-NEXT: s_addc_u32 s1, s1, 9 240a278ac57SMirko Brkušanin; GFX6789-NEXT: .LBB4_1: ; %loop 241787bef0bSJay Foad; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 242787bef0bSJay Foad; GFX6789-NEXT: s_waitcnt lgkmcnt(0) 243787bef0bSJay Foad; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0 244787bef0bSJay Foad; GFX6789-NEXT: s_add_i32 s2, s2, -1 245787bef0bSJay Foad; GFX6789-NEXT: s_cmp_lg_u32 s2, 0 246a278ac57SMirko Brkušanin; GFX6789-NEXT: s_cbranch_scc1 .LBB4_1 247787bef0bSJay Foad; GFX6789-NEXT: ; %bb.2: ; %end 248787bef0bSJay Foad; GFX6789-NEXT: s_endpgm 249a278ac57SMirko Brkušanin; 250a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_40000000000: 251a278ac57SMirko Brkušanin; GFX12: ; %bb.0: ; %entry 252a278ac57SMirko Brkušanin; GFX12-NEXT: s_mov_b32 s4, 0x502f9000 253a278ac57SMirko Brkušanin; GFX12-NEXT: s_mov_b32 s5, 9 254a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 255a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5] 256a278ac57SMirko Brkušanin; GFX12-NEXT: .LBB4_1: ; %loop 257a278ac57SMirko Brkušanin; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 258ba52f06fSJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 259a278ac57SMirko Brkušanin; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 260a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_co_i32 s2, s2, -1 261a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 262a278ac57SMirko Brkušanin; GFX12-NEXT: s_cmp_lg_u32 s2, 0 263a278ac57SMirko Brkušanin; GFX12-NEXT: s_cbranch_scc1 .LBB4_1 264a278ac57SMirko Brkušanin; GFX12-NEXT: ; %bb.2: ; %end 265a278ac57SMirko Brkušanin; GFX12-NEXT: s_endpgm 266787bef0bSJay Foadentry: 267787bef0bSJay Foad %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000000 268787bef0bSJay Foad br label %loop 269787bef0bSJay Foad 270787bef0bSJay Foadloop: 271787bef0bSJay Foad %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 272787bef0bSJay Foad %dec = sub i32 %count, 1 273787bef0bSJay Foad %load = load volatile i32, ptr addrspace(4) %gep 274787bef0bSJay Foad %cond = icmp eq i32 %dec, 0 275787bef0bSJay Foad br i1 %cond, label %end, label %loop 276787bef0bSJay Foad 277787bef0bSJay Foadend: 278787bef0bSJay Foad ret void 279787bef0bSJay Foad} 280787bef0bSJay Foad 281787bef0bSJay Foaddefine amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 282*aaf50bf3SJay Foad; GFX6789-LABEL: test_sink_smem_offset_neg400: 283*aaf50bf3SJay Foad; GFX6789: ; %bb.0: ; %entry 284*aaf50bf3SJay Foad; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70 285*aaf50bf3SJay Foad; GFX6789-NEXT: s_addc_u32 s1, s1, -1 286*aaf50bf3SJay Foad; GFX6789-NEXT: .LBB5_1: ; %loop 287*aaf50bf3SJay Foad; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 288*aaf50bf3SJay Foad; GFX6789-NEXT: s_waitcnt lgkmcnt(0) 289*aaf50bf3SJay Foad; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0 290*aaf50bf3SJay Foad; GFX6789-NEXT: s_add_i32 s2, s2, -1 291*aaf50bf3SJay Foad; GFX6789-NEXT: s_cmp_lg_u32 s2, 0 292*aaf50bf3SJay Foad; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1 293*aaf50bf3SJay Foad; GFX6789-NEXT: ; %bb.2: ; %end 294*aaf50bf3SJay Foad; GFX6789-NEXT: s_endpgm 295a278ac57SMirko Brkušanin; 296a278ac57SMirko Brkušanin; GFX12-LABEL: test_sink_smem_offset_neg400: 297a278ac57SMirko Brkušanin; GFX12: ; %bb.0: ; %entry 2983aef525aSvangthao95; GFX12-NEXT: s_movk_i32 s4, 0xfe70 2993aef525aSvangthao95; GFX12-NEXT: s_mov_b32 s5, -1 3003aef525aSvangthao95; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3013aef525aSvangthao95; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5] 302a278ac57SMirko Brkušanin; GFX12-NEXT: .LBB5_1: ; %loop 303a278ac57SMirko Brkušanin; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 304ba52f06fSJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 3053aef525aSvangthao95; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 306a278ac57SMirko Brkušanin; GFX12-NEXT: s_add_co_i32 s2, s2, -1 307a278ac57SMirko Brkušanin; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 308a278ac57SMirko Brkušanin; GFX12-NEXT: s_cmp_lg_u32 s2, 0 309a278ac57SMirko Brkušanin; GFX12-NEXT: s_cbranch_scc1 .LBB5_1 310a278ac57SMirko Brkušanin; GFX12-NEXT: ; %bb.2: ; %end 311a278ac57SMirko Brkušanin; GFX12-NEXT: s_endpgm 312787bef0bSJay Foadentry: 313787bef0bSJay Foad %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400 314787bef0bSJay Foad br label %loop 315787bef0bSJay Foad 316787bef0bSJay Foadloop: 317787bef0bSJay Foad %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 318787bef0bSJay Foad %dec = sub i32 %count, 1 319787bef0bSJay Foad %load = load volatile i32, ptr addrspace(4) %gep 320787bef0bSJay Foad %cond = icmp eq i32 %dec, 0 321787bef0bSJay Foad br i1 %cond, label %end, label %loop 322787bef0bSJay Foad 323787bef0bSJay Foadend: 324787bef0bSJay Foad ret void 325787bef0bSJay Foad} 326*aaf50bf3SJay Foad 327*aaf50bf3SJay Foad; Same for address space 6, constant 32-bit. 328*aaf50bf3SJay Foaddefine amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) { 329*aaf50bf3SJay Foad; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit: 330*aaf50bf3SJay Foad; GFX6789: ; %bb.0: ; %entry 331*aaf50bf3SJay Foad; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70 332*aaf50bf3SJay Foad; GFX6789-NEXT: s_mov_b32 s3, 0 333*aaf50bf3SJay Foad; GFX6789-NEXT: .LBB6_1: ; %loop 334*aaf50bf3SJay Foad; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 335*aaf50bf3SJay Foad; GFX6789-NEXT: s_waitcnt lgkmcnt(0) 336*aaf50bf3SJay Foad; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0 337*aaf50bf3SJay Foad; GFX6789-NEXT: s_add_i32 s1, s1, -1 338*aaf50bf3SJay Foad; GFX6789-NEXT: s_cmp_lg_u32 s1, 0 339*aaf50bf3SJay Foad; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1 340*aaf50bf3SJay Foad; GFX6789-NEXT: ; %bb.2: ; %end 341*aaf50bf3SJay Foad; GFX6789-NEXT: s_endpgm 342*aaf50bf3SJay Foad; 343*aaf50bf3SJay Foad; GFX12-LABEL: test_sink_smem_offset_neg400_32bit: 344*aaf50bf3SJay Foad; GFX12: ; %bb.0: ; %entry 345*aaf50bf3SJay Foad; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70 346*aaf50bf3SJay Foad; GFX12-NEXT: s_mov_b32 s3, 0 347*aaf50bf3SJay Foad; GFX12-NEXT: .LBB6_1: ; %loop 348*aaf50bf3SJay Foad; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 349*aaf50bf3SJay Foad; GFX12-NEXT: s_wait_kmcnt 0x0 350*aaf50bf3SJay Foad; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0 351*aaf50bf3SJay Foad; GFX12-NEXT: s_add_co_i32 s1, s1, -1 352*aaf50bf3SJay Foad; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 353*aaf50bf3SJay Foad; GFX12-NEXT: s_cmp_lg_u32 s1, 0 354*aaf50bf3SJay Foad; GFX12-NEXT: s_cbranch_scc1 .LBB6_1 355*aaf50bf3SJay Foad; GFX12-NEXT: ; %bb.2: ; %end 356*aaf50bf3SJay Foad; GFX12-NEXT: s_endpgm 357*aaf50bf3SJay Foadentry: 358*aaf50bf3SJay Foad %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400 359*aaf50bf3SJay Foad br label %loop 360*aaf50bf3SJay Foad 361*aaf50bf3SJay Foadloop: 362*aaf50bf3SJay Foad %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 363*aaf50bf3SJay Foad %dec = sub i32 %count, 1 364*aaf50bf3SJay Foad %load = load volatile i32, ptr addrspace(6) %gep 365*aaf50bf3SJay Foad %cond = icmp eq i32 %dec, 0 366*aaf50bf3SJay Foad br i1 %cond, label %end, label %loop 367*aaf50bf3SJay Foad 368*aaf50bf3SJay Foadend: 369*aaf50bf3SJay Foad ret void 370*aaf50bf3SJay Foad} 371*aaf50bf3SJay Foad 372*aaf50bf3SJay Foad;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 373*aaf50bf3SJay Foad; GFX678: {{.*}} 374*aaf50bf3SJay Foad; GFX9: {{.*}} 375