1# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s 2# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s 3 4# The purpose of this test is to make sure we are combining relevant memory 5# operations correctly with/without DLC bit. 6 7--- | 8 define amdgpu_kernel void @test1(ptr addrspace(1) %out) { 9 %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 10 store i32 123, ptr addrspace(1) %out.gep.1 11 store i32 456, ptr addrspace(1) %out 12 ret void 13 } 14 15 define amdgpu_kernel void @test2(ptr addrspace(1) %out) { 16 %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 17 store i32 123, ptr addrspace(1) %out.gep.1 18 store i32 456, ptr addrspace(1) %out 19 ret void 20 } 21 22 define amdgpu_kernel void @test3(ptr addrspace(1) %out) { 23 %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 24 store i32 123, ptr addrspace(1) %out.gep.1 25 store i32 456, ptr addrspace(1) %out 26 ret void 27 } 28 define amdgpu_kernel void @test4(ptr addrspace(1) %out) { 29 %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1 30 store i32 123, ptr addrspace(1) %out.gep.1 31 store i32 456, ptr addrspace(1) %out 32 ret void 33 } 34... 35 36# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1) 37--- 38name: test1 39liveins: 40 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 41body: | 42 bb.0 (%ir-block.0): 43 liveins: $sgpr0_sgpr1 44 45 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 46 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 47 48 $sgpr2 = S_MOV_B32 -1 49 $sgpr3 = S_MOV_B32 61440 50 51 %0:sgpr_64 = COPY $sgpr0_sgpr1 52 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4) 53 %2:sgpr_32 = COPY $sgpr2 54 %3:sgpr_32 = COPY $sgpr3 55 %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2 56 57 %5:vgpr_32 = COPY $vgpr0 58 %6:vgpr_32 = COPY $vgpr1 59 60 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 61 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 62 63 S_ENDPGM 0 64... 65 66# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 67# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 68--- 69name: test2 70liveins: 71 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 72body: | 73 bb.0 (%ir-block.0): 74 liveins: $sgpr0_sgpr1 75 76 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 77 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 78 79 $sgpr2 = S_MOV_B32 -1 80 $sgpr3 = S_MOV_B32 61440 81 82 %0:sgpr_64 = COPY $sgpr0_sgpr1 83 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4) 84 %2:sgpr_32 = COPY $sgpr2 85 %3:sgpr_32 = COPY $sgpr3 86 %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2 87 88 %5:vgpr_32 = COPY $vgpr0 89 %6:vgpr_32 = COPY $vgpr1 90 91 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 92 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 93 94 S_ENDPGM 0 95... 96 97# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 98# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 99--- 100name: test3 101liveins: 102 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 103body: | 104 bb.0 (%ir-block.0): 105 liveins: $sgpr0_sgpr1 106 107 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 108 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 109 110 $sgpr2 = S_MOV_B32 -1 111 $sgpr3 = S_MOV_B32 61440 112 113 %0:sgpr_64 = COPY $sgpr0_sgpr1 114 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4) 115 %2:sgpr_32 = COPY $sgpr2 116 %3:sgpr_32 = COPY $sgpr3 117 %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2 118 119 %5:vgpr_32 = COPY $vgpr0 120 %6:vgpr_32 = COPY $vgpr1 121 122 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 123 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 124 125 S_ENDPGM 0 126... 127 128# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1) 129--- 130name: test4 131liveins: 132 - { reg: '$sgpr0_sgpr1', virtual-reg: '' } 133body: | 134 bb.0 (%ir-block.0): 135 liveins: $sgpr0_sgpr1 136 137 $vgpr0 = V_MOV_B32_e32 123, implicit $exec 138 $vgpr1 = V_MOV_B32_e32 456, implicit $exec 139 140 $sgpr2 = S_MOV_B32 -1 141 $sgpr3 = S_MOV_B32 61440 142 143 %0:sgpr_64 = COPY $sgpr0_sgpr1 144 %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4) 145 %2:sgpr_32 = COPY $sgpr2 146 %3:sgpr_32 = COPY $sgpr3 147 %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2 148 149 %5:vgpr_32 = COPY $vgpr0 150 %6:vgpr_32 = COPY $vgpr1 151 152 BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 153 BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1) 154 155 S_ENDPGM 0 156... 157