xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fold-fi-mubuf.mir (revision 1b560e6ab7fc16c915bb054d3bffe5f77e08c3d5)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination  %s -o - | FileCheck -check-prefix=GCN %s
3
4# Kernels can have no FP
5---
6name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
7tracksRegLiveness: true
8frameInfo:
9  maxAlignment:    4
10  localFrameSize:  4
11stack:
12  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
13machineFunctionInfo:
14  isEntryFunction: true
15  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
16  stackPtrOffsetReg: '$sgpr32'
17body:             |
18  bb.0:
19    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
20
21    ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
22    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
23    ; GCN-NEXT: {{  $}}
24    ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
25    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
26    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
27    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
28    ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
29    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
30    %1:sreg_32_xm0 = S_MOV_B32 0
31    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
32    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, implicit $exec
33    $vgpr0 = COPY %3
34    SI_RETURN_TO_EPILOG $vgpr0
35
36...
37
38---
39name: kernel_no_fold_fi_non_stack_rsrc
40tracksRegLiveness: true
41frameInfo:
42  maxAlignment:    4
43  localFrameSize:  4
44stack:
45  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
46machineFunctionInfo:
47  isEntryFunction: true
48  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
49  stackPtrOffsetReg: '$sgpr32'
50body:             |
51  bb.0:
52    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
53
54    ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc
55    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
56    ; GCN-NEXT: {{  $}}
57    ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
58    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
59    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
60    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
61    ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
62    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
63    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
64    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, implicit $exec
65    $vgpr0 = COPY %3
66    SI_RETURN_TO_EPILOG $vgpr0
67
68...
69
70---
71name: kernel_no_fold_fi_non_stack_soffset
72tracksRegLiveness: true
73frameInfo:
74  maxAlignment:    4
75  localFrameSize:  4
76stack:
77  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
78machineFunctionInfo:
79  isEntryFunction: true
80  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
81  stackPtrOffsetReg: '$sgpr32'
82body:             |
83  bb.0:
84
85    ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset
86    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
87    ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
88    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
89    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
90    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
91    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
92    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
93    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
94    %2:sreg_32_xm0 = S_MOV_B32 0
95
96    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, implicit $exec
97    %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, implicit $exec
98    $vgpr0 = COPY %3
99    S_ENDPGM 0, implicit $vgpr0
100
101...
102
103---
104name: kernel_fold_fi_mubuf
105tracksRegLiveness: true
106frameInfo:
107  maxAlignment:    4
108  localFrameSize:  4
109stack:
110  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
111machineFunctionInfo:
112  isEntryFunction: true
113  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
114  stackPtrOffsetReg: '$sgpr32'
115body:             |
116  bb.0:
117
118    ; GCN-LABEL: name: kernel_fold_fi_mubuf
119    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
120    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
121    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
122    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
123    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
124    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
125    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
126
127    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
128    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
129    $vgpr0 = COPY %2
130    S_ENDPGM 0, implicit $vgpr0
131
132...
133
134
135# Functions have an unswizzled SP/FP relative to the wave offset
136---
137name: function_no_fold_fi_non_stack_rsrc_and_soffset
138tracksRegLiveness: true
139frameInfo:
140  maxAlignment:    4
141  localFrameSize:  4
142stack:
143  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
144machineFunctionInfo:
145  isEntryFunction: false
146  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
147  frameOffsetReg:  '$sgpr32'
148  stackPtrOffsetReg: '$sgpr32'
149body:             |
150  bb.0:
151    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
152
153    ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset
154    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
155    ; GCN-NEXT: {{  $}}
156    ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
157    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
158    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
159    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
160    ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
161    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
162    %1:sreg_32_xm0 = S_MOV_B32 0
163    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
164    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, implicit $exec
165    $vgpr0 = COPY %3
166    SI_RETURN_TO_EPILOG $vgpr0
167
168...
169
170---
171name: function_no_fold_fi_non_stack_rsrc
172tracksRegLiveness: true
173frameInfo:
174  maxAlignment:    4
175  localFrameSize:  4
176stack:
177  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
178machineFunctionInfo:
179  isEntryFunction: false
180  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
181  frameOffsetReg:  '$sgpr32'
182  stackPtrOffsetReg: '$sgpr32'
183body:             |
184  bb.0:
185    liveins: $sgpr12_sgpr13_sgpr14_sgpr15
186
187    ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc
188    ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
189    ; GCN-NEXT: {{  $}}
190    ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
191    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
192    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec
193    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
194    ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
195    %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
196    %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
197    %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, implicit $exec
198    $vgpr0 = COPY %3
199    SI_RETURN_TO_EPILOG $vgpr0
200
201...
202
203---
204name: function_no_fold_fi_non_stack_soffset
205tracksRegLiveness: true
206frameInfo:
207  maxAlignment:    4
208  localFrameSize:  4
209stack:
210  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
211machineFunctionInfo:
212  isEntryFunction: false
213  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
214  frameOffsetReg:  '$sgpr32'
215  stackPtrOffsetReg: '$sgpr32'
216body:             |
217  bb.0:
218
219    ; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset
220    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
221    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
222    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
223    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
224    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
225    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
226    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
227
228    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
229    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
230    $vgpr0 = COPY %2
231    S_ENDPGM 0, implicit $vgpr0
232
233...
234
235---
236name: function_fold_fi_mubuf_wave_relative
237tracksRegLiveness: true
238frameInfo:
239  maxAlignment:    4
240  localFrameSize:  4
241stack:
242  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
243machineFunctionInfo:
244  isEntryFunction: false
245  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
246  frameOffsetReg:  '$sgpr32'
247  stackPtrOffsetReg: '$sgpr32'
248body:             |
249  bb.0:
250
251    ; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative
252    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
253    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
254    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
255    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
256    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
257    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
258    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
259
260    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
261    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
262    $vgpr0 = COPY %2
263    S_ENDPGM 0, implicit $vgpr0
264
265...
266
267---
268name: function_fold_fi_mubuf_stack_relative
269tracksRegLiveness: true
270frameInfo:
271  maxAlignment:    4
272  localFrameSize:  4
273stack:
274  - { id: 0, size: 4, alignment: 4, local-offset: 0 }
275machineFunctionInfo:
276  isEntryFunction: false
277  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
278  frameOffsetReg:  '$sgpr32'
279  stackPtrOffsetReg: '$sgpr32'
280body:             |
281  bb.0:
282
283    ; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative
284    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
285    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
286    ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
287    ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
288    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
289    %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
290    %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
291
292    BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
293    %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
294    $vgpr0 = COPY %2
295    S_ENDPGM 0, implicit $vgpr0
296
297...
298