xref: /llvm-project/llvm/test/CodeGen/AMDGPU/load-store-opt-dlc.mir (revision da13754103b8880811f4c164d858c6dd3c393927)
1# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s
2# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck %s
3
4# The purpose of this test is to make sure we are combining relevant memory
5# operations correctly with/without DLC bit.
6
7--- |
8  define amdgpu_kernel void @test1(ptr addrspace(1) %out) {
9    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
10    store i32 123, ptr addrspace(1) %out.gep.1
11    store i32 456, ptr addrspace(1) %out
12    ret void
13  }
14
15  define amdgpu_kernel void @test2(ptr addrspace(1) %out) {
16    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
17    store i32 123, ptr addrspace(1) %out.gep.1
18    store i32 456, ptr addrspace(1) %out
19    ret void
20  }
21
22  define amdgpu_kernel void @test3(ptr addrspace(1) %out) {
23    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
24    store i32 123, ptr addrspace(1) %out.gep.1
25    store i32 456, ptr addrspace(1) %out
26    ret void
27  }
28  define amdgpu_kernel void @test4(ptr addrspace(1) %out) {
29    %out.gep.1 = getelementptr i32, ptr addrspace(1) %out, i32 1
30    store i32 123, ptr addrspace(1) %out.gep.1
31    store i32 456, ptr addrspace(1) %out
32    ret void
33  }
34...
35
36# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1)
37---
38name: test1
39liveins:
40  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
41body: |
42  bb.0 (%ir-block.0):
43    liveins: $sgpr0_sgpr1
44
45    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
46    $vgpr1 = V_MOV_B32_e32 456, implicit $exec
47
48    $sgpr2 = S_MOV_B32 -1
49    $sgpr3 = S_MOV_B32 61440
50
51    %0:sgpr_64 = COPY $sgpr0_sgpr1
52    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
53    %2:sgpr_32 = COPY $sgpr2
54    %3:sgpr_32 = COPY $sgpr3
55    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2
56
57    %5:vgpr_32 = COPY $vgpr0
58    %6:vgpr_32 = COPY $vgpr1
59
60    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
61    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
62
63    S_ENDPGM 0
64...
65
66# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
67# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
68---
69name: test2
70liveins:
71  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
72body: |
73  bb.0 (%ir-block.0):
74    liveins: $sgpr0_sgpr1
75
76    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
77    $vgpr1 = V_MOV_B32_e32 456, implicit $exec
78
79    $sgpr2 = S_MOV_B32 -1
80    $sgpr3 = S_MOV_B32 61440
81
82    %0:sgpr_64 = COPY $sgpr0_sgpr1
83    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
84    %2:sgpr_32 = COPY $sgpr2
85    %3:sgpr_32 = COPY $sgpr3
86    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2
87
88    %5:vgpr_32 = COPY $vgpr0
89    %6:vgpr_32 = COPY $vgpr1
90
91    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
92    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
93
94    S_ENDPGM 0
95...
96
97# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
98# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
99---
100name: test3
101liveins:
102  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
103body: |
104  bb.0 (%ir-block.0):
105    liveins: $sgpr0_sgpr1
106
107    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
108    $vgpr1 = V_MOV_B32_e32 456, implicit $exec
109
110    $sgpr2 = S_MOV_B32 -1
111    $sgpr3 = S_MOV_B32 61440
112
113    %0:sgpr_64 = COPY $sgpr0_sgpr1
114    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
115    %2:sgpr_32 = COPY $sgpr2
116    %3:sgpr_32 = COPY $sgpr3
117    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2
118
119    %5:vgpr_32 = COPY $vgpr0
120    %6:vgpr_32 = COPY $vgpr1
121
122    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
123    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
124
125    S_ENDPGM 0
126...
127
128# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 4, 0, implicit $exec :: (store (s64) into %ir.out.gep.1, align 4, addrspace 1)
129---
130name: test4
131liveins:
132  - { reg: '$sgpr0_sgpr1', virtual-reg: '' }
133body: |
134  bb.0 (%ir-block.0):
135    liveins: $sgpr0_sgpr1
136
137    $vgpr0 = V_MOV_B32_e32 123, implicit $exec
138    $vgpr1 = V_MOV_B32_e32 456, implicit $exec
139
140    $sgpr2 = S_MOV_B32 -1
141    $sgpr3 = S_MOV_B32 61440
142
143    %0:sgpr_64 = COPY $sgpr0_sgpr1
144    %1:sgpr_64 = S_LOAD_DWORDX2_IMM %1, 36, 0 :: (dereferenceable invariant load (s64) from `ptr addrspace(4) undef`, addrspace 4)
145    %2:sgpr_32 = COPY $sgpr2
146    %3:sgpr_32 = COPY $sgpr3
147    %4:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %2, %subreg.sub1, %3, %subreg.sub2
148
149    %5:vgpr_32 = COPY $vgpr0
150    %6:vgpr_32 = COPY $vgpr1
151
152    BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
153    BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 4, 0, implicit $exec :: (store (s32) into %ir.out.gep.1, addrspace 1)
154
155    S_ENDPGM 0
156...
157