xref: /llvm-project/llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
3
4;; Older intrinsics that take <4 x i32>
5
6define float @llvm_amdgcn_raw_buffer_load_f32(i32 %voffset, i32 %soffset) {
7  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_f32
8  ; GFX908: bb.0 (%ir-block.0):
9  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
10  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
11  ; GFX908-NEXT: {{  $}}
12  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
14  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
15  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
16  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
17  ; GFX908-NEXT: {{  $}}
18  ; GFX908-NEXT: bb.1:
19  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
20  ; GFX908-NEXT: {{  $}}
21  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
22  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
23  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
24  ; GFX908-NEXT: {{  $}}
25  ; GFX908-NEXT: bb.2:
26  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
27  ; GFX908-NEXT: {{  $}}
28  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
29  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
30  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
31  ; GFX908-NEXT: {{  $}}
32  ; GFX908-NEXT: bb.3:
33  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
34  ; GFX908-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
35  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0
36  %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
37  ret float %val
38}
39
40define float @llvm_amdgcn_raw_tbuffer_load_f32(i32 %voffset, i32 %soffset) {
41  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_f32
42  ; GFX908: bb.0 (%ir-block.0):
43  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
44  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
45  ; GFX908-NEXT: {{  $}}
46  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
47  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
48  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
49  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
50  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
51  ; GFX908-NEXT: {{  $}}
52  ; GFX908-NEXT: bb.1:
53  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
54  ; GFX908-NEXT: {{  $}}
55  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
56  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
57  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
58  ; GFX908-NEXT: {{  $}}
59  ; GFX908-NEXT: bb.2:
60  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
61  ; GFX908-NEXT: {{  $}}
62  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
63  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
64  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
65  ; GFX908-NEXT: {{  $}}
66  ; GFX908-NEXT: bb.3:
67  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
68  ; GFX908-NEXT:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
69  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0
70  %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
71  ret float %val
72}
73
74define <2 x float> @llvm_amdgcn_raw_buffer_load_v2f32(i32 %voffset, i32 %soffset) {
75  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v2f32
76  ; GFX908: bb.0 (%ir-block.0):
77  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
78  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
79  ; GFX908-NEXT: {{  $}}
80  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
81  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
82  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
83  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
84  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
85  ; GFX908-NEXT: {{  $}}
86  ; GFX908-NEXT: bb.1:
87  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
88  ; GFX908-NEXT: {{  $}}
89  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
90  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
91  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
92  ; GFX908-NEXT: {{  $}}
93  ; GFX908-NEXT: bb.2:
94  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
95  ; GFX908-NEXT: {{  $}}
96  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8)
97  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
98  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
99  ; GFX908-NEXT: {{  $}}
100  ; GFX908-NEXT: bb.3:
101  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
102  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
103  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
104  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
105  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
106  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
107  %val = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
108  ret <2 x float> %val
109}
110
111define <2 x float> @llvm_amdgcn_raw_tbuffer_load_v2f32(i32 %voffset, i32 %soffset) {
112  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v2f32
113  ; GFX908: bb.0 (%ir-block.0):
114  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
115  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
116  ; GFX908-NEXT: {{  $}}
117  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
118  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
119  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
120  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
121  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
122  ; GFX908-NEXT: {{  $}}
123  ; GFX908-NEXT: bb.1:
124  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
125  ; GFX908-NEXT: {{  $}}
126  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
127  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
128  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
129  ; GFX908-NEXT: {{  $}}
130  ; GFX908-NEXT: bb.2:
131  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
132  ; GFX908-NEXT: {{  $}}
133  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 8)
134  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
135  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
136  ; GFX908-NEXT: {{  $}}
137  ; GFX908-NEXT: bb.3:
138  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
139  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
140  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
141  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
142  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
143  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
144  %val = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
145  ret <2 x float> %val
146}
147
148define <3 x float> @llvm_amdgcn_raw_buffer_load_v3f32(i32 %voffset, i32 %soffset) {
149  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v3f32
150  ; GFX908: bb.0 (%ir-block.0):
151  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
152  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
153  ; GFX908-NEXT: {{  $}}
154  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
155  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
156  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
157  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
158  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
159  ; GFX908-NEXT: {{  $}}
160  ; GFX908-NEXT: bb.1:
161  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
162  ; GFX908-NEXT: {{  $}}
163  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
164  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
165  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
166  ; GFX908-NEXT: {{  $}}
167  ; GFX908-NEXT: bb.2:
168  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
169  ; GFX908-NEXT: {{  $}}
170  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8)
171  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
172  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
173  ; GFX908-NEXT: {{  $}}
174  ; GFX908-NEXT: bb.3:
175  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
176  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
177  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
178  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
179  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
180  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
181  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
182  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
183  %val = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
184  ret <3 x float> %val
185}
186
187define <3 x float> @llvm_amdgcn_raw_tbuffer_load_v3f32(i32 %voffset, i32 %soffset) {
188  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v3f32
189  ; GFX908: bb.0 (%ir-block.0):
190  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
191  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
192  ; GFX908-NEXT: {{  $}}
193  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
194  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
195  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
196  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
197  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
198  ; GFX908-NEXT: {{  $}}
199  ; GFX908-NEXT: bb.1:
200  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
201  ; GFX908-NEXT: {{  $}}
202  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
203  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
204  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
205  ; GFX908-NEXT: {{  $}}
206  ; GFX908-NEXT: bb.2:
207  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
208  ; GFX908-NEXT: {{  $}}
209  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 1, addrspace 8)
210  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
211  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
212  ; GFX908-NEXT: {{  $}}
213  ; GFX908-NEXT: bb.3:
214  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
215  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
216  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
217  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
218  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
219  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
220  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
221  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
222  %val = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
223  ret <3 x float> %val
224}
225
226define <4 x float> @llvm_amdgcn_raw_buffer_load_v4f32(i32 %voffset, i32 %soffset) {
227  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_load_v4f32
228  ; GFX908: bb.0 (%ir-block.0):
229  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
230  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
231  ; GFX908-NEXT: {{  $}}
232  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
233  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
234  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
235  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
236  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
237  ; GFX908-NEXT: {{  $}}
238  ; GFX908-NEXT: bb.1:
239  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
240  ; GFX908-NEXT: {{  $}}
241  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
242  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
243  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
244  ; GFX908-NEXT: {{  $}}
245  ; GFX908-NEXT: bb.2:
246  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
247  ; GFX908-NEXT: {{  $}}
248  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
249  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
250  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
251  ; GFX908-NEXT: {{  $}}
252  ; GFX908-NEXT: bb.3:
253  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
254  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
255  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
256  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
257  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
258  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
259  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
260  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
261  ; GFX908-NEXT:   $vgpr3 = COPY [[COPY5]]
262  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
263  %val = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
264  ret <4 x float> %val
265}
266
267define <4 x float> @llvm_amdgcn_raw_tbuffer_load_v4f32(i32 %voffset, i32 %soffset) {
268  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_load_v4f32
269  ; GFX908: bb.0 (%ir-block.0):
270  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
271  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
272  ; GFX908-NEXT: {{  $}}
273  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
274  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
275  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
276  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
277  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
278  ; GFX908-NEXT: {{  $}}
279  ; GFX908-NEXT: bb.1:
280  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
281  ; GFX908-NEXT: {{  $}}
282  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
283  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
284  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
285  ; GFX908-NEXT: {{  $}}
286  ; GFX908-NEXT: bb.2:
287  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
288  ; GFX908-NEXT: {{  $}}
289  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 8)
290  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
291  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
292  ; GFX908-NEXT: {{  $}}
293  ; GFX908-NEXT: bb.3:
294  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
295  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
296  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
297  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
298  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3
299  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
300  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
301  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
302  ; GFX908-NEXT:   $vgpr3 = COPY [[COPY5]]
303  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
304  %val = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
305  ret <4 x float> %val
306}
307
308define void @llvm_amdgcn_raw_buffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
309  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_f32
310  ; GFX908: bb.0 (%ir-block.0):
311  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
312  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
313  ; GFX908-NEXT: {{  $}}
314  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
315  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
316  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
317  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
318  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
319  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
320  ; GFX908-NEXT: {{  $}}
321  ; GFX908-NEXT: bb.1:
322  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
323  ; GFX908-NEXT: {{  $}}
324  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
325  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
326  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
327  ; GFX908-NEXT: {{  $}}
328  ; GFX908-NEXT: bb.2:
329  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
330  ; GFX908-NEXT: {{  $}}
331  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
332  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
333  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
334  ; GFX908-NEXT: {{  $}}
335  ; GFX908-NEXT: bb.3:
336  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
337  ; GFX908-NEXT:   SI_RETURN
338  call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
339  ret void
340}
341
342define void @llvm_amdgcn_raw_tbuffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
343  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_f32
344  ; GFX908: bb.0 (%ir-block.0):
345  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
346  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
347  ; GFX908-NEXT: {{  $}}
348  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
349  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
350  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
351  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
352  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
353  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
354  ; GFX908-NEXT: {{  $}}
355  ; GFX908-NEXT: bb.1:
356  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
357  ; GFX908-NEXT: {{  $}}
358  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
359  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
360  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
361  ; GFX908-NEXT: {{  $}}
362  ; GFX908-NEXT: bb.2:
363  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
364  ; GFX908-NEXT: {{  $}}
365  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
366  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
367  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
368  ; GFX908-NEXT: {{  $}}
369  ; GFX908-NEXT: bb.3:
370  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
371  ; GFX908-NEXT:   SI_RETURN
372  call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
373  ret void
374}
375
376define void @llvm_amdgcn_raw_buffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
377  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v2f32
378  ; GFX908: bb.0 (%ir-block.0):
379  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
380  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
381  ; GFX908-NEXT: {{  $}}
382  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
383  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
384  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
385  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
386  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
387  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
388  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
389  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
390  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
391  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
392  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
393  ; GFX908-NEXT: {{  $}}
394  ; GFX908-NEXT: bb.1:
395  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
396  ; GFX908-NEXT: {{  $}}
397  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
398  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
399  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
400  ; GFX908-NEXT: {{  $}}
401  ; GFX908-NEXT: bb.2:
402  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
403  ; GFX908-NEXT: {{  $}}
404  ; GFX908-NEXT:   BUFFER_STORE_DWORDX2_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
405  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
406  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
407  ; GFX908-NEXT: {{  $}}
408  ; GFX908-NEXT: bb.3:
409  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
410  ; GFX908-NEXT:   SI_RETURN
411  call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
412  ret void
413}
414
415define void @llvm_amdgcn_raw_tbuffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
416  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v2f32
417  ; GFX908: bb.0 (%ir-block.0):
418  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
419  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
420  ; GFX908-NEXT: {{  $}}
421  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
422  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
423  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
424  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
425  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
426  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
427  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
428  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
429  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
430  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
431  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
432  ; GFX908-NEXT: {{  $}}
433  ; GFX908-NEXT: bb.1:
434  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
435  ; GFX908-NEXT: {{  $}}
436  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
437  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
438  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
439  ; GFX908-NEXT: {{  $}}
440  ; GFX908-NEXT: bb.2:
441  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
442  ; GFX908-NEXT: {{  $}}
443  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
444  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
445  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
446  ; GFX908-NEXT: {{  $}}
447  ; GFX908-NEXT: bb.3:
448  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
449  ; GFX908-NEXT:   SI_RETURN
450  call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
451  ret void
452}
453
454define void @llvm_amdgcn_raw_buffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
455  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v3f32
456  ; GFX908: bb.0 (%ir-block.0):
457  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
458  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
459  ; GFX908-NEXT: {{  $}}
460  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
461  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
462  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
463  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
464  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
465  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
466  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
467  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
468  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
469  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
470  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
471  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
472  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
473  ; GFX908-NEXT: {{  $}}
474  ; GFX908-NEXT: bb.1:
475  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
476  ; GFX908-NEXT: {{  $}}
477  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
478  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
479  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
480  ; GFX908-NEXT: {{  $}}
481  ; GFX908-NEXT: bb.2:
482  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
483  ; GFX908-NEXT: {{  $}}
484  ; GFX908-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8)
485  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
486  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
487  ; GFX908-NEXT: {{  $}}
488  ; GFX908-NEXT: bb.3:
489  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
490  ; GFX908-NEXT:   SI_RETURN
491  call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
492  ret void
493}
494
495define void @llvm_amdgcn_raw_tbuffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
496  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v3f32
497  ; GFX908: bb.0 (%ir-block.0):
498  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
499  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
500  ; GFX908-NEXT: {{  $}}
501  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
502  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
503  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
504  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
505  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
506  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
507  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
508  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
509  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
510  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
511  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
512  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
513  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
514  ; GFX908-NEXT: {{  $}}
515  ; GFX908-NEXT: bb.1:
516  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
517  ; GFX908-NEXT: {{  $}}
518  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
519  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
520  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
521  ; GFX908-NEXT: {{  $}}
522  ; GFX908-NEXT: bb.2:
523  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
524  ; GFX908-NEXT: {{  $}}
525  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8)
526  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
527  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
528  ; GFX908-NEXT: {{  $}}
529  ; GFX908-NEXT: bb.3:
530  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
531  ; GFX908-NEXT:   SI_RETURN
532  call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
533  ret void
534}
535
536define void @llvm_amdgcn_raw_buffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
537  ; GFX908-LABEL: name: llvm_amdgcn_raw_buffer_store_v4f32
538  ; GFX908: bb.0 (%ir-block.0):
539  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
540  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
541  ; GFX908-NEXT: {{  $}}
542  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
543  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
544  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
545  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
546  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
547  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
548  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
549  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
550  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
551  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
552  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
553  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
554  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
555  ; GFX908-NEXT:   [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
556  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
557  ; GFX908-NEXT: {{  $}}
558  ; GFX908-NEXT: bb.1:
559  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
560  ; GFX908-NEXT: {{  $}}
561  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
562  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
563  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
564  ; GFX908-NEXT: {{  $}}
565  ; GFX908-NEXT: bb.2:
566  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
567  ; GFX908-NEXT: {{  $}}
568  ; GFX908-NEXT:   BUFFER_STORE_DWORDX4_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8)
569  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
570  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
571  ; GFX908-NEXT: {{  $}}
572  ; GFX908-NEXT: bb.3:
573  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
574  ; GFX908-NEXT:   SI_RETURN
575  call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0)
576  ret void
577}
578
579define void @llvm_amdgcn_raw_tbuffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
580  ; GFX908-LABEL: name: llvm_amdgcn_raw_tbuffer_store_v4f32
581  ; GFX908: bb.0 (%ir-block.0):
582  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
583  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
584  ; GFX908-NEXT: {{  $}}
585  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
586  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
587  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
588  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
589  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
590  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
591  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
592  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
593  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
594  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
595  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
596  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
597  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
598  ; GFX908-NEXT:   [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
599  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
600  ; GFX908-NEXT: {{  $}}
601  ; GFX908-NEXT: bb.1:
602  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
603  ; GFX908-NEXT: {{  $}}
604  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
605  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
606  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
607  ; GFX908-NEXT: {{  $}}
608  ; GFX908-NEXT: bb.2:
609  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
610  ; GFX908-NEXT: {{  $}}
611  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8)
612  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
613  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
614  ; GFX908-NEXT: {{  $}}
615  ; GFX908-NEXT: bb.3:
616  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
617  ; GFX908-NEXT:   SI_RETURN
618  call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
619  ret void
620}
621
622;; Newer intrinsics that taken addrspace(8) pointers
623
624define float @llvm_amdgcn_raw_ptr_buffer_load_f32(i32 %voffset, i32 %soffset) {
625  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_f32
626  ; GFX908: bb.0 (%ir-block.0):
627  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
628  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
629  ; GFX908-NEXT: {{  $}}
630  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
631  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
632  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
633  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
634  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
635  ; GFX908-NEXT: {{  $}}
636  ; GFX908-NEXT: bb.1:
637  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
638  ; GFX908-NEXT: {{  $}}
639  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
640  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
641  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
642  ; GFX908-NEXT: {{  $}}
643  ; GFX908-NEXT: bb.2:
644  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
645  ; GFX908-NEXT: {{  $}}
646  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from `ptr addrspace(8) poison`, align 1, addrspace 8)
647  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
648  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
649  ; GFX908-NEXT: {{  $}}
650  ; GFX908-NEXT: bb.3:
651  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
652  ; GFX908-NEXT:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
653  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0
654  %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
655  ret float %val
656}
657
658define float @llvm_amdgcn_raw_ptr_tbuffer_load_f32(i32 %voffset, i32 %soffset) {
659  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_f32
660  ; GFX908: bb.0 (%ir-block.0):
661  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
662  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
663  ; GFX908-NEXT: {{  $}}
664  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
665  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
666  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
667  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
668  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
669  ; GFX908-NEXT: {{  $}}
670  ; GFX908-NEXT: bb.1:
671  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
672  ; GFX908-NEXT: {{  $}}
673  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
674  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
675  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
676  ; GFX908-NEXT: {{  $}}
677  ; GFX908-NEXT: bb.2:
678  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
679  ; GFX908-NEXT: {{  $}}
680  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from `ptr addrspace(8) poison`, align 1, addrspace 8)
681  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
682  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
683  ; GFX908-NEXT: {{  $}}
684  ; GFX908-NEXT: bb.3:
685  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
686  ; GFX908-NEXT:   $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
687  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0
688  %val = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
689  ret float %val
690}
691
692define <2 x float> @llvm_amdgcn_raw_ptr_buffer_load_v2f32(i32 %voffset, i32 %soffset) {
693  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v2f32
694  ; GFX908: bb.0 (%ir-block.0):
695  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
696  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
697  ; GFX908-NEXT: {{  $}}
698  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
699  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
700  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
701  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
702  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
703  ; GFX908-NEXT: {{  $}}
704  ; GFX908-NEXT: bb.1:
705  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
706  ; GFX908-NEXT: {{  $}}
707  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
708  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
709  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
710  ; GFX908-NEXT: {{  $}}
711  ; GFX908-NEXT: bb.2:
712  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
713  ; GFX908-NEXT: {{  $}}
714  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from `ptr addrspace(8) poison`, align 1, addrspace 8)
715  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
716  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
717  ; GFX908-NEXT: {{  $}}
718  ; GFX908-NEXT: bb.3:
719  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
720  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
721  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
722  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
723  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
724  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
725  %val = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
726  ret <2 x float> %val
727}
728
729define <2 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v2f32(i32 %voffset, i32 %soffset) {
730  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v2f32
731  ; GFX908: bb.0 (%ir-block.0):
732  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
733  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
734  ; GFX908-NEXT: {{  $}}
735  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
736  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
737  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
738  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
739  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
740  ; GFX908-NEXT: {{  $}}
741  ; GFX908-NEXT: bb.1:
742  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
743  ; GFX908-NEXT: {{  $}}
744  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
745  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
746  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
747  ; GFX908-NEXT: {{  $}}
748  ; GFX908-NEXT: bb.2:
749  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
750  ; GFX908-NEXT: {{  $}}
751  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64) from `ptr addrspace(8) poison`, align 1, addrspace 8)
752  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
753  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
754  ; GFX908-NEXT: {{  $}}
755  ; GFX908-NEXT: bb.3:
756  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
757  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
758  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
759  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
760  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
761  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
762  %val = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
763  ret <2 x float> %val
764}
765
766define <3 x float> @llvm_amdgcn_raw_ptr_buffer_load_v3f32(i32 %voffset, i32 %soffset) {
767  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v3f32
768  ; GFX908: bb.0 (%ir-block.0):
769  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
770  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
771  ; GFX908-NEXT: {{  $}}
772  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
773  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
774  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
775  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
776  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
777  ; GFX908-NEXT: {{  $}}
778  ; GFX908-NEXT: bb.1:
779  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
780  ; GFX908-NEXT: {{  $}}
781  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
782  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
783  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
784  ; GFX908-NEXT: {{  $}}
785  ; GFX908-NEXT: bb.2:
786  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
787  ; GFX908-NEXT: {{  $}}
788  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from `ptr addrspace(8) poison`, align 1, addrspace 8)
789  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
790  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
791  ; GFX908-NEXT: {{  $}}
792  ; GFX908-NEXT: bb.3:
793  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
794  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
795  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
796  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
797  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
798  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
799  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
800  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
801  %val = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
802  ret <3 x float> %val
803}
804
805define <3 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v3f32(i32 %voffset, i32 %soffset) {
806  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v3f32
807  ; GFX908: bb.0 (%ir-block.0):
808  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
809  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
810  ; GFX908-NEXT: {{  $}}
811  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
812  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
813  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
814  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
815  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
816  ; GFX908-NEXT: {{  $}}
817  ; GFX908-NEXT: bb.1:
818  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
819  ; GFX908-NEXT: {{  $}}
820  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
821  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
822  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
823  ; GFX908-NEXT: {{  $}}
824  ; GFX908-NEXT: bb.2:
825  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
826  ; GFX908-NEXT: {{  $}}
827  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96) from `ptr addrspace(8) poison`, align 1, addrspace 8)
828  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
829  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
830  ; GFX908-NEXT: {{  $}}
831  ; GFX908-NEXT: bb.3:
832  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
833  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
834  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
835  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
836  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
837  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
838  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
839  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
840  %val = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
841  ret <3 x float> %val
842}
843
844define <4 x float> @llvm_amdgcn_raw_ptr_buffer_load_v4f32(i32 %voffset, i32 %soffset) {
845  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_load_v4f32
846  ; GFX908: bb.0 (%ir-block.0):
847  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
848  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
849  ; GFX908-NEXT: {{  $}}
850  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
851  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
852  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
853  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
854  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
855  ; GFX908-NEXT: {{  $}}
856  ; GFX908-NEXT: bb.1:
857  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
858  ; GFX908-NEXT: {{  $}}
859  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
860  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
861  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
862  ; GFX908-NEXT: {{  $}}
863  ; GFX908-NEXT: bb.2:
864  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
865  ; GFX908-NEXT: {{  $}}
866  ; GFX908-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8)
867  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
868  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
869  ; GFX908-NEXT: {{  $}}
870  ; GFX908-NEXT: bb.3:
871  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
872  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
873  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
874  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
875  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3
876  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
877  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
878  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
879  ; GFX908-NEXT:   $vgpr3 = COPY [[COPY5]]
880  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
881  %val = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
882  ret <4 x float> %val
883}
884
885define <4 x float> @llvm_amdgcn_raw_ptr_tbuffer_load_v4f32(i32 %voffset, i32 %soffset) {
886  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_load_v4f32
887  ; GFX908: bb.0 (%ir-block.0):
888  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
889  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1
890  ; GFX908-NEXT: {{  $}}
891  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
892  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
893  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
894  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
895  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
896  ; GFX908-NEXT: {{  $}}
897  ; GFX908-NEXT: bb.1:
898  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
899  ; GFX908-NEXT: {{  $}}
900  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
901  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
902  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
903  ; GFX908-NEXT: {{  $}}
904  ; GFX908-NEXT: bb.2:
905  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
906  ; GFX908-NEXT: {{  $}}
907  ; GFX908-NEXT:   [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from `ptr addrspace(8) poison`, align 1, addrspace 8)
908  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
909  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
910  ; GFX908-NEXT: {{  $}}
911  ; GFX908-NEXT: bb.3:
912  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
913  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
914  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
915  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
916  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3
917  ; GFX908-NEXT:   $vgpr0 = COPY [[COPY2]]
918  ; GFX908-NEXT:   $vgpr1 = COPY [[COPY3]]
919  ; GFX908-NEXT:   $vgpr2 = COPY [[COPY4]]
920  ; GFX908-NEXT:   $vgpr3 = COPY [[COPY5]]
921  ; GFX908-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
922  %val = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
923  ret <4 x float> %val
924}
925
926define void @llvm_amdgcn_raw_ptr_buffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
927  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_f32
928  ; GFX908: bb.0 (%ir-block.0):
929  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
930  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
931  ; GFX908-NEXT: {{  $}}
932  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
933  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
934  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
935  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
936  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
937  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
938  ; GFX908-NEXT: {{  $}}
939  ; GFX908-NEXT: bb.1:
940  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
941  ; GFX908-NEXT: {{  $}}
942  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
943  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
944  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
945  ; GFX908-NEXT: {{  $}}
946  ; GFX908-NEXT: bb.2:
947  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
948  ; GFX908-NEXT: {{  $}}
949  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into `ptr addrspace(8) poison`, align 1, addrspace 8)
950  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
951  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
952  ; GFX908-NEXT: {{  $}}
953  ; GFX908-NEXT: bb.3:
954  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
955  ; GFX908-NEXT:   SI_RETURN
956  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
957  ret void
958}
959
960define void @llvm_amdgcn_raw_ptr_tbuffer_store_f32(float %val, i32 %voffset, i32 %soffset) {
961  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_f32
962  ; GFX908: bb.0 (%ir-block.0):
963  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
964  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
965  ; GFX908-NEXT: {{  $}}
966  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr2
967  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
968  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
969  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
970  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
971  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
972  ; GFX908-NEXT: {{  $}}
973  ; GFX908-NEXT: bb.1:
974  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
975  ; GFX908-NEXT: {{  $}}
976  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
977  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
978  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
979  ; GFX908-NEXT: {{  $}}
980  ; GFX908-NEXT: bb.2:
981  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
982  ; GFX908-NEXT: {{  $}}
983  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY2]], [[COPY1]], [[DEF]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into `ptr addrspace(8) poison`, align 1, addrspace 8)
984  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
985  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
986  ; GFX908-NEXT: {{  $}}
987  ; GFX908-NEXT: bb.3:
988  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
989  ; GFX908-NEXT:   SI_RETURN
990  call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
991  ret void
992}
993
994define void @llvm_amdgcn_raw_ptr_buffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
995  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v2f32
996  ; GFX908: bb.0 (%ir-block.0):
997  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
998  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
999  ; GFX908-NEXT: {{  $}}
1000  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1001  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1002  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1003  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1004  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1005  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1006  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
1007  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
1008  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1009  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1010  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1011  ; GFX908-NEXT: {{  $}}
1012  ; GFX908-NEXT: bb.1:
1013  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
1014  ; GFX908-NEXT: {{  $}}
1015  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1016  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1017  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1018  ; GFX908-NEXT: {{  $}}
1019  ; GFX908-NEXT: bb.2:
1020  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
1021  ; GFX908-NEXT: {{  $}}
1022  ; GFX908-NEXT:   BUFFER_STORE_DWORDX2_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1023  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1024  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
1025  ; GFX908-NEXT: {{  $}}
1026  ; GFX908-NEXT: bb.3:
1027  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
1028  ; GFX908-NEXT:   SI_RETURN
1029  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
1030  ret void
1031}
1032
1033define void @llvm_amdgcn_raw_ptr_tbuffer_store_v2f32(<2 x float> %val, i32 %voffset, i32 %soffset) {
1034  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v2f32
1035  ; GFX908: bb.0 (%ir-block.0):
1036  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
1037  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
1038  ; GFX908-NEXT: {{  $}}
1039  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1040  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1041  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1042  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1043  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1044  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1045  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
1046  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
1047  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1048  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1049  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1050  ; GFX908-NEXT: {{  $}}
1051  ; GFX908-NEXT: bb.1:
1052  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
1053  ; GFX908-NEXT: {{  $}}
1054  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1055  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1056  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1057  ; GFX908-NEXT: {{  $}}
1058  ; GFX908-NEXT: bb.2:
1059  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
1060  ; GFX908-NEXT: {{  $}}
1061  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY4]], [[COPY1]], [[DEF2]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s64) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1062  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1063  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
1064  ; GFX908-NEXT: {{  $}}
1065  ; GFX908-NEXT: bb.3:
1066  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
1067  ; GFX908-NEXT:   SI_RETURN
1068  call void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
1069  ret void
1070}
1071
1072define void @llvm_amdgcn_raw_ptr_buffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
1073  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v3f32
1074  ; GFX908: bb.0 (%ir-block.0):
1075  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
1076  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1077  ; GFX908-NEXT: {{  $}}
1078  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1079  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1080  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1081  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1082  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1083  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1084  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1085  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1086  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
1087  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
1088  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1089  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1090  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1091  ; GFX908-NEXT: {{  $}}
1092  ; GFX908-NEXT: bb.1:
1093  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
1094  ; GFX908-NEXT: {{  $}}
1095  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1096  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1097  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1098  ; GFX908-NEXT: {{  $}}
1099  ; GFX908-NEXT: bb.2:
1100  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
1101  ; GFX908-NEXT: {{  $}}
1102  ; GFX908-NEXT:   BUFFER_STORE_DWORDX3_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1103  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1104  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
1105  ; GFX908-NEXT: {{  $}}
1106  ; GFX908-NEXT: bb.3:
1107  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
1108  ; GFX908-NEXT:   SI_RETURN
1109  call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
1110  ret void
1111}
1112
1113define void @llvm_amdgcn_raw_ptr_tbuffer_store_v3f32(<3 x float> %val, i32 %voffset, i32 %soffset) {
1114  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v3f32
1115  ; GFX908: bb.0 (%ir-block.0):
1116  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
1117  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
1118  ; GFX908-NEXT: {{  $}}
1119  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1120  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1121  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1122  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1123  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1124  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1125  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1126  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1127  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2
1128  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE]]
1129  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1130  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1131  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1132  ; GFX908-NEXT: {{  $}}
1133  ; GFX908-NEXT: bb.1:
1134  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
1135  ; GFX908-NEXT: {{  $}}
1136  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1137  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1138  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1139  ; GFX908-NEXT: {{  $}}
1140  ; GFX908-NEXT: bb.2:
1141  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
1142  ; GFX908-NEXT: {{  $}}
1143  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY5]], [[COPY1]], [[DEF3]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1144  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1145  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
1146  ; GFX908-NEXT: {{  $}}
1147  ; GFX908-NEXT: bb.3:
1148  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
1149  ; GFX908-NEXT:   SI_RETURN
1150  call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
1151  ret void
1152}
1153
1154define void @llvm_amdgcn_raw_ptr_buffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
1155  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_buffer_store_v4f32
1156  ; GFX908: bb.0 (%ir-block.0):
1157  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
1158  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1159  ; GFX908-NEXT: {{  $}}
1160  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1161  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1162  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1163  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1164  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1165  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1166  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1167  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1168  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1169  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1170  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1171  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
1172  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1173  ; GFX908-NEXT:   [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1174  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1175  ; GFX908-NEXT: {{  $}}
1176  ; GFX908-NEXT: bb.1:
1177  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
1178  ; GFX908-NEXT: {{  $}}
1179  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1180  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1181  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1182  ; GFX908-NEXT: {{  $}}
1183  ; GFX908-NEXT: bb.2:
1184  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
1185  ; GFX908-NEXT: {{  $}}
1186  ; GFX908-NEXT:   BUFFER_STORE_DWORDX4_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1187  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1188  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
1189  ; GFX908-NEXT: {{  $}}
1190  ; GFX908-NEXT: bb.3:
1191  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
1192  ; GFX908-NEXT:   SI_RETURN
1193  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0)
1194  ret void
1195}
1196
1197define void @llvm_amdgcn_raw_ptr_tbuffer_store_v4f32(<4 x float> %val, i32 %voffset, i32 %soffset) {
1198  ; GFX908-LABEL: name: llvm_amdgcn_raw_ptr_tbuffer_store_v4f32
1199  ; GFX908: bb.0 (%ir-block.0):
1200  ; GFX908-NEXT:   successors: %bb.1(0x80000000)
1201  ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1202  ; GFX908-NEXT: {{  $}}
1203  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1204  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1205  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1206  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1207  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1208  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1209  ; GFX908-NEXT:   [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1210  ; GFX908-NEXT:   [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1211  ; GFX908-NEXT:   [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1212  ; GFX908-NEXT:   [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1213  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1214  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]]
1215  ; GFX908-NEXT:   [[DEF4:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
1216  ; GFX908-NEXT:   [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1217  ; GFX908-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1218  ; GFX908-NEXT: {{  $}}
1219  ; GFX908-NEXT: bb.1:
1220  ; GFX908-NEXT:   successors: %bb.2(0x80000000)
1221  ; GFX908-NEXT: {{  $}}
1222  ; GFX908-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1223  ; GFX908-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY]], implicit $exec
1224  ; GFX908-NEXT:   [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1225  ; GFX908-NEXT: {{  $}}
1226  ; GFX908-NEXT: bb.2:
1227  ; GFX908-NEXT:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
1228  ; GFX908-NEXT: {{  $}}
1229  ; GFX908-NEXT:   TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY6]], [[COPY1]], [[DEF4]], killed [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into `ptr addrspace(8) poison`, align 1, addrspace 8)
1230  ; GFX908-NEXT:   $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1231  ; GFX908-NEXT:   SI_WATERFALL_LOOP %bb.1, implicit $exec
1232  ; GFX908-NEXT: {{  $}}
1233  ; GFX908-NEXT: bb.3:
1234  ; GFX908-NEXT:   $exec = S_MOV_B64 [[S_MOV_B64_]]
1235  ; GFX908-NEXT:   SI_RETURN
1236  call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
1237  ret void
1238}
1239
1240declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 )
1241declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32)
1242declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32)
1243declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32)
1244declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32)
1245declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32)
1246declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32)
1247declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)
1248declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32)
1249declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32, i32)
1250declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32)
1251declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32)
1252declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32)
1253declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32)
1254declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32)
1255declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32)
1256
1257declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32 )
1258declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1259declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32)
1260declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1261declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32)
1262declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1263declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32)
1264declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
1265declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32)
1266declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1267declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
1268declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1269declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
1270declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1271declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
1272declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
1273