xref: /llvm-project/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir (revision e1ee07d0ff7a37bf5f52d560a52925c0507471e1)
1# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
2# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
3# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
4# RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 %s
5
6# test for 3 consecutive _sdwa's
7# GFX9-LABEL: name:            test1_add_co_sdwa
8# GFX9: = nsw V_ADD_CO_U32_sdwa
9# GFX9-NEXT: = nuw V_ADDC_U32_e64
10# GFX9: V_ADD_CO_U32_sdwa
11# GFX9-NEXT: V_ADDC_U32_e64
12# GFX9: V_ADD_CO_U32_sdwa
13# GFX9-NEXT: V_ADDC_U32_e64
14---
15name:            test1_add_co_sdwa
16tracksRegLiveness: true
17registers:
18  - { id: 0, class: vgpr_32, preferred-register: '' }
19liveins:
20  - { reg: '$vgpr0', virtual-reg: '%0' }
21  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
22body:             |
23  bb.0:
24    liveins: $vgpr0, $sgpr0_sgpr1
25
26    %1:sgpr_64 = COPY $sgpr0_sgpr1
27    %0:vgpr_32 = COPY $vgpr0
28    %22:sreg_32_xm0 = S_MOV_B32 255
29    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
30    %30:vreg_64 = COPY $sgpr0_sgpr1
31    %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
32    %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
33    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
34    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
35
36    %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
37    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
38    %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
39    %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
40    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
41
42    %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
43    %173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec
44    %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
45    %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
46    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %172, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
47
48...
49
50# test for VCC interference on sdwa, should generate 1 xform only
51# GFX9-LABEL: name:            test2_add_co_sdwa
52# GFX9: V_ADD_CO_U32_sdwa
53# GFX9: V_ADDC_U32_e64
54# GFX9-NOT: V_ADD_CO_U32_sdwa
55# GFX9-NOT: V_ADDC_U32_e32
56---
57name:            test2_add_co_sdwa
58tracksRegLiveness: true
59registers:
60  - { id: 0, class: vgpr_32, preferred-register: '' }
61liveins:
62  - { reg: '$vgpr0', virtual-reg: '%0' }
63  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
64body:             |
65  bb.0:
66    liveins: $vgpr0, $sgpr0_sgpr1
67
68    %1:sgpr_64 = COPY $sgpr0_sgpr1
69    %0:vgpr_32 = COPY $vgpr0
70    %22:sreg_32_xm0 = S_MOV_B32 255
71    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
72    %30:vreg_64 = COPY $sgpr0_sgpr1
73    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
74
75    %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
76    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
77    %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
78    %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
79
80    %64:vgpr_32, dead %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
81    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
82    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
83
84    %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
85    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
86    %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
87    %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
88    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %162, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
89
90...
91
92# test for CarryOut used, should reject
93# GFX9-LABEL: name:            test3_add_co_sdwa
94# GFX9: V_ADD_CO_U32_e64
95# GFX9: V_ADDC_U32_e64
96# GFX9-NOT: V_ADD_CO_U32_sdwa
97# GFX9-NOT: V_ADDC_U32_e32
98---
99name:            test3_add_co_sdwa
100tracksRegLiveness: true
101registers:
102  - { id: 0, class: vgpr_32, preferred-register: '' }
103liveins:
104  - { reg: '$vgpr0', virtual-reg: '%0' }
105  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
106body:             |
107  bb.0:
108    liveins: $vgpr0, $sgpr0_sgpr1
109
110    %1:sgpr_64 = COPY $sgpr0_sgpr1
111    %0:vgpr_32 = COPY $vgpr0
112    %22:sreg_32_xm0 = S_MOV_B32 255
113    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
114    %30:vreg_64 = COPY $sgpr0_sgpr1
115    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
116    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
117    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
118    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
119
120...
121
122# test for CarryIn used more than once, should reject
123# GFX9-LABEL: name:            test4_add_co_sdwa
124# GFX9: V_ADD_CO_U32_e64
125# GFX9: V_ADDC_U32_e64
126# GFX9-NOT: V_ADD_CO_U32_sdwa
127# GFX9-NOT: V_ADDC_U32_e32
128---
129name:            test4_add_co_sdwa
130tracksRegLiveness: true
131registers:
132  - { id: 0, class: vgpr_32, preferred-register: '' }
133liveins:
134  - { reg: '$vgpr0', virtual-reg: '%0' }
135  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
136body:             |
137  bb.0:
138    liveins: $vgpr0, $sgpr0_sgpr1
139
140    %1:sgpr_64 = COPY $sgpr0_sgpr1
141    %0:vgpr_32 = COPY $vgpr0
142    %22:sreg_32_xm0 = S_MOV_B32 255
143    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
144    %30:vreg_64 = COPY $sgpr0_sgpr1
145    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
146    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
147    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
148    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
149
150
151...
152
153# test for simple example, should generate sdwa
154# GFX9-LABEL: name:            test5_add_co_sdwa
155# GFX9: V_ADD_CO_U32_sdwa
156# GFX9: V_ADDC_U32_e64
157---
158name:            test5_add_co_sdwa
159tracksRegLiveness: true
160registers:
161  - { id: 0, class: vgpr_32, preferred-register: '' }
162liveins:
163  - { reg: '$vgpr0', virtual-reg: '%0' }
164  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
165body:             |
166  bb.0:
167    liveins: $vgpr0, $sgpr0_sgpr1
168
169    %1:sgpr_64 = COPY $sgpr0_sgpr1
170    %0:vgpr_32 = COPY $vgpr0
171    %22:sreg_32_xm0 = S_MOV_B32 255
172    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
173    %30:vreg_64 = COPY $sgpr0_sgpr1
174    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
175    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
176    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
177    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
178
179
180...
181
182# test for V_ADD_CO_U32_e64 only, should reject
183# GFX9-LABEL: name:            test6_add_co_sdwa
184# GFX9: V_ADD_CO_U32_e64
185# GFX9-NOT: V_ADD_CO_U32_sdwa
186# GFX9-NOT: V_ADDC_U32_e32
187---
188name:            test6_add_co_sdwa
189tracksRegLiveness: true
190registers:
191  - { id: 0, class: vgpr_32, preferred-register: '' }
192liveins:
193  - { reg: '$vgpr0', virtual-reg: '%0' }
194  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
195body:             |
196  bb.0:
197    liveins: $vgpr0, $sgpr0_sgpr1
198
199    %1:sgpr_64 = COPY $sgpr0_sgpr1
200    %0:vgpr_32 = COPY $vgpr0
201    %22:sreg_32_xm0 = S_MOV_B32 255
202    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
203    %30:vreg_64 = COPY $sgpr0_sgpr1
204    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
205    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
206    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
207
208
209...
210
211# test for V_ADDC_U32_e64 only, should reject
212# GFX9-LABEL: name:            test7_add_co_sdwa
213# GFX9: V_ADDC_U32_e64
214# GFX9-NOT: V_ADD_CO_U32_sdwa
215# GFX9-NOT: V_ADDC_U32_e32
216---
217name:            test7_add_co_sdwa
218tracksRegLiveness: true
219registers:
220  - { id: 0, class: vgpr_32, preferred-register: '' }
221liveins:
222  - { reg: '$vgpr0', virtual-reg: '%0' }
223  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
224body:             |
225  bb.0:
226    liveins: $vgpr0, $sgpr0_sgpr1
227
228    %1:sgpr_64 = COPY $sgpr0_sgpr1
229    %0:vgpr_32 = COPY $vgpr0
230    %22:sreg_32_xm0 = S_MOV_B32 255
231    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
232    %24:sreg_64_xexec = COPY $sgpr0_sgpr1
233
234    %30:vreg_64 = COPY $sgpr0_sgpr1
235    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %24, 0, implicit $exec
236    %62:vreg_64 = REG_SEQUENCE %23, %subreg.sub0, %23, %subreg.sub1
237    GLOBAL_STORE_DWORDX2_SADDR %30.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
238
239
240...
241
242# test for $vcc defined between two adds, should not generate
243# GFX9-LABEL: name:            test8_add_co_sdwa
244# GFX9-NOT: V_ADD_CO_U32_sdwa
245# GFX9: V_ADDC_U32_e64
246---
247name:            test8_add_co_sdwa
248tracksRegLiveness: true
249registers:
250  - { id: 0, class: vgpr_32, preferred-register: '' }
251liveins:
252  - { reg: '$vgpr0', virtual-reg: '%0' }
253  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
254body:             |
255  bb.0:
256    liveins: $vgpr0, $sgpr0_sgpr1
257
258    %1:sgpr_64 = COPY $sgpr0_sgpr1
259    %0:vgpr_32 = COPY $vgpr0
260    %22:sreg_32_xm0 = S_MOV_B32 255
261    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
262    %30:vreg_64 = COPY $sgpr0_sgpr1
263    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
264    $vcc = COPY %30
265    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
266    %31:vreg_64 = COPY $vcc
267    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
268    GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
269
270
271...
272
273# test for non dead $vcc, should not generate
274# GFX9-LABEL: name:            test9_add_co_sdwa
275# GFX9-NOT: V_ADD_CO_U32_sdwa
276# GFX9: V_ADDC_U32_e64
277---
278name:            test9_add_co_sdwa
279tracksRegLiveness: true
280registers:
281  - { id: 0, class: vgpr_32, preferred-register: '' }
282liveins:
283  - { reg: '$vgpr0', virtual-reg: '%0' }
284  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
285body:             |
286  bb.0:
287    liveins: $vgpr0, $sgpr0_sgpr1
288
289    %1:sgpr_64 = COPY $sgpr0_sgpr1
290    %0:vgpr_32 = COPY $vgpr0
291    %22:sreg_32_xm0 = S_MOV_B32 255
292    %30:vreg_64 = COPY $sgpr0_sgpr1
293    $vcc = COPY %30
294    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
295    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
296    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
297    %31:vreg_64 = COPY $vcc
298    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
299    GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
300
301
302...
303
304# test for def $vcc_lo, should not generate
305# GFX9-LABEL: name:            test10_add_co_sdwa
306# GFX9-NOT: V_ADD_CO_U32_sdwa
307# GFX9: V_ADDC_U32_e64
308---
309name:            test10_add_co_sdwa
310tracksRegLiveness: true
311registers:
312  - { id: 0, class: vgpr_32, preferred-register: '' }
313liveins:
314  - { reg: '$vgpr0', virtual-reg: '%0' }
315  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
316body:             |
317  bb.0:
318    liveins: $vgpr0, $sgpr0_sgpr1
319
320    %1:sgpr_64 = COPY $sgpr0_sgpr1
321    %0:vgpr_32 = COPY $vgpr0
322    %22:sreg_32_xm0 = S_MOV_B32 255
323    %30:vreg_64 = COPY $sgpr0_sgpr1
324    $vcc_lo = COPY %30.sub0
325    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
326    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
327    %31:vgpr_32 = COPY $vcc_lo
328    %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
329    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
330    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
331    GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
332
333...
334
335# test for read $vcc_hi, should not generate
336# GFX9-LABEL: name:            test11_add_co_sdwa
337# GFX9-NOT: V_ADD_CO_U32_sdwa
338# GFX9: V_ADDC_U32_e64
339---
340name:            test11_add_co_sdwa
341tracksRegLiveness: true
342registers:
343  - { id: 0, class: vgpr_32, preferred-register: '' }
344liveins:
345  - { reg: '$vgpr0', virtual-reg: '%0' }
346  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
347body:             |
348  bb.0:
349    liveins: $vgpr0, $sgpr0_sgpr1
350
351    %1:sgpr_64 = COPY $sgpr0_sgpr1
352    %0:vgpr_32 = COPY $vgpr0
353    %22:sreg_32_xm0 = S_MOV_B32 255
354    %30:vreg_64 = COPY $sgpr0_sgpr1
355    $vcc_hi = COPY %30.sub0
356    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
357    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
358    %31:vgpr_32 = COPY $vcc_hi
359    %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
360    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
361    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
362    GLOBAL_STORE_DWORDX2_SADDR %32.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
363
364...
365
366# test for $vcc defined and used between adds, should not generate
367# GFX9-LABEL: name:            test12_add_co_sdwa
368# GFX9-NOT: V_ADD_CO_U32_sdwa
369# GFX9: V_ADDC_U32_e64
370---
371name:            test12_add_co_sdwa
372tracksRegLiveness: true
373registers:
374  - { id: 0, class: vgpr_32, preferred-register: '' }
375liveins:
376  - { reg: '$vgpr0', virtual-reg: '%0' }
377  - { reg: '$sgpr0_sgpr1', virtual-reg: '%1' }
378body:             |
379  bb.0:
380    liveins: $vgpr0, $sgpr0_sgpr1
381
382    %1:sgpr_64 = COPY $sgpr0_sgpr1
383    %0:vgpr_32 = COPY $vgpr0
384    %22:sreg_32_xm0 = S_MOV_B32 255
385    %30:vreg_64 = COPY $sgpr0_sgpr1
386    %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
387    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
388    $vcc = COPY %30
389    %31:vreg_64 = COPY killed $vcc
390    %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
391    %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
392    GLOBAL_STORE_DWORDX2_SADDR %31.sub0, %62, %1, 0, 0, implicit $exec, implicit $exec :: (store (s64))
393