xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
3
4# Test that we fold correct element from G_UNMERGE_VALUES into fma
5
6---
7name: test_f32_add_mul
8body: |
9  bb.1:
10    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
11
12    ; GFX10-LABEL: name: test_f32_add_mul
13    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
14    ; GFX10-NEXT: {{  $}}
15    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
16    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
17    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
18    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
19    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
20    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
21    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
22    %0:_(s32) = COPY $vgpr0
23    %1:_(s32) = COPY $vgpr1
24    %ptr:_(p1) = COPY $vgpr2_vgpr3
25    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
26    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
27    %6:_(s32) = G_FMUL %0, %1
28    %7:_(s32) = G_FADD %6, %el1
29    $vgpr0 = COPY %7(s32)
30...
31
32---
33name: test_f32_add_mul_rhs
34machineFunctionInfo:
35  mode:
36    fp32-input-denormals: false
37    fp32-output-denormals: false
38body: |
39  bb.1:
40    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
41
42    ; GFX10-LABEL: name: test_f32_add_mul_rhs
43    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
44    ; GFX10-NEXT: {{  $}}
45    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
46    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
47    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
48    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
49    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
50    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
51    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
52    %0:_(s32) = COPY $vgpr0
53    %1:_(s32) = COPY $vgpr1
54    %ptr:_(p1) = COPY $vgpr2_vgpr3
55    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
56    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
57    %6:_(s32) = G_FMUL %0, %1
58    %7:_(s32) = G_FADD %el1, %6
59    $vgpr0 = COPY %7(s32)
60...
61
62---
63name: test_f16_f32_add_ext_mul
64machineFunctionInfo:
65  mode:
66    fp32-input-denormals: false
67    fp32-output-denormals: false
68body: |
69  bb.1:
70    liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
71
72    ; GFX10-LABEL: name: test_f16_f32_add_ext_mul
73    ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
74    ; GFX10-NEXT: {{  $}}
75    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
76    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
77    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
78    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
79    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
80    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
81    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
82    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
83    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
84    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
85    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
86    %0:_(s32) = COPY $sgpr0
87    %1:_(s16) = G_TRUNC %0(s32)
88    %2:_(s32) = COPY $sgpr1
89    %3:_(s16) = G_TRUNC %2(s32)
90    %ptr:_(p1) = COPY $vgpr0_vgpr1
91    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
92    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
93    %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
94    %9:_(s32) = G_FPEXT %8(s16)
95    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
96    $vgpr0 = COPY %10(s32)
97...
98
99---
100name: test_f16_f32_add_ext_mul_rhs
101machineFunctionInfo:
102  mode:
103    fp32-input-denormals: false
104    fp32-output-denormals: false
105body: |
106  bb.1:
107    liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
108
109    ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs
110    ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
111    ; GFX10-NEXT: {{  $}}
112    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
113    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
114    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
115    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
116    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
117    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
118    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
119    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
120    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
121    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
122    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
123    %0:_(s32) = COPY $sgpr0
124    %1:_(s16) = G_TRUNC %0(s32)
125    %2:_(s32) = COPY $sgpr1
126    %3:_(s16) = G_TRUNC %2(s32)
127    %ptr:_(p1) = COPY $vgpr0_vgpr1
128    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
129    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
130    %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
131    %9:_(s32) = G_FPEXT %8(s16)
132    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
133    $vgpr0 = COPY %10(s32)
134...
135
136---
137name: test_f32_add_fma_mul
138body: |
139  bb.1:
140    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
141
142    ; GFX10-LABEL: name: test_f32_add_fma_mul
143    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
144    ; GFX10-NEXT: {{  $}}
145    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
146    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
147    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
148    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
149    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
150    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
151    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
152    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
153    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
154    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
155    %0:_(s32) = COPY $vgpr0
156    %1:_(s32) = COPY $vgpr1
157    %2:_(s32) = COPY $vgpr2
158    %3:_(s32) = COPY $vgpr3
159    %ptr:_(p1) = COPY $vgpr4_vgpr5
160    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
161    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
162    %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
163    %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
164    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
165    $vgpr0 = COPY %10(s32)
166...
167
168---
169name: test_f32_add_fma_mul_rhs
170body: |
171  bb.1:
172    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
173
174    ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs
175    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
176    ; GFX10-NEXT: {{  $}}
177    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
178    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
179    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
180    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
181    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
182    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
183    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
184    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
185    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
186    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
187    %0:_(s32) = COPY $vgpr0
188    %1:_(s32) = COPY $vgpr1
189    %2:_(s32) = COPY $vgpr2
190    %3:_(s32) = COPY $vgpr3
191    %ptr:_(p1) = COPY $vgpr4_vgpr5
192    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
193    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
194    %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
195    %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
196    %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
197    $vgpr0 = COPY %10(s32)
198...
199
200---
201name: test_f16_f32_add_fma_ext_mul
202machineFunctionInfo:
203  mode:
204    fp32-input-denormals: false
205    fp32-output-denormals: false
206body: |
207  bb.1:
208    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
209
210    ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul
211    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
212    ; GFX10-NEXT: {{  $}}
213    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
214    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
215    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
216    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
217    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
218    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
219    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
220    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
221    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
222    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
223    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
224    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
225    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
226    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
227    %0:_(s32) = COPY $vgpr0
228    %1:_(s32) = COPY $vgpr1
229    %ptr:_(p1) = COPY $vgpr2_vgpr3
230    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
231    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
232    %6:_(s32) = COPY $vgpr4
233    %7:_(s16) = G_TRUNC %6(s32)
234    %8:_(s32) = COPY $vgpr5
235    %9:_(s16) = G_TRUNC %8(s32)
236    %10:_(s16) = G_FMUL %7, %9
237    %11:_(s32) = G_FPEXT %10(s16)
238    %12:_(s32) = G_FMA %0, %1, %11
239    %13:_(s32) = G_FADD %12, %el1
240    $vgpr0 = COPY %13(s32)
241...
242
243---
244name: test_f16_f32_add_ext_fma_mul
245machineFunctionInfo:
246  mode:
247    fp32-input-denormals: false
248    fp32-output-denormals: false
249body: |
250  bb.1:
251    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
252
253    ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul
254    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
255    ; GFX10-NEXT: {{  $}}
256    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
257    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
258    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
259    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
260    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
261    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
262    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
263    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
264    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
265    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
266    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
267    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
268    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
269    ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
270    ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
271    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
272    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
273    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
274    %0:_(s32) = COPY $vgpr0
275    %1:_(s16) = G_TRUNC %0(s32)
276    %2:_(s32) = COPY $vgpr1
277    %3:_(s16) = G_TRUNC %2(s32)
278    %ptr:_(p1) = COPY $vgpr2_vgpr3
279    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
280    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
281    %8:_(s32) = COPY $vgpr4
282    %9:_(s16) = G_TRUNC %8(s32)
283    %10:_(s32) = COPY $vgpr5
284    %11:_(s16) = G_TRUNC %10(s32)
285    %12:_(s16) = G_FMUL %9, %11
286    %13:_(s16) = G_FMUL %1, %3
287    %14:_(s16) = G_FADD %13, %12
288    %15:_(s32) = G_FPEXT %14(s16)
289    %16:_(s32) = G_FADD %15, %el1
290    $vgpr0 = COPY %16(s32)
291...
292
293---
294name: test_f16_f32_add_fma_ext_mul_rhs
295machineFunctionInfo:
296  mode:
297    fp32-input-denormals: false
298    fp32-output-denormals: false
299body: |
300  bb.1:
301    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
302
303    ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs
304    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
305    ; GFX10-NEXT: {{  $}}
306    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
307    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
308    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
309    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
310    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
311    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
312    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
313    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
314    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
315    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
316    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
317    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
318    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
319    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
320    %ptr:_(p1) = COPY $vgpr0_vgpr1
321    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
322    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
323    %4:_(s32) = COPY $vgpr2
324    %5:_(s32) = COPY $vgpr3
325    %6:_(s32) = COPY $vgpr4
326    %7:_(s16) = G_TRUNC %6(s32)
327    %8:_(s32) = COPY $vgpr5
328    %9:_(s16) = G_TRUNC %8(s32)
329    %10:_(s16) = G_FMUL %7, %9
330    %11:_(s32) = G_FPEXT %10(s16)
331    %12:_(s32) = G_FMA %4, %5, %11
332    %13:_(s32) = G_FADD %el1, %12
333    $vgpr0 = COPY %13(s32)
334...
335
336---
337name: test_f16_f32_add_ext_fma_mul_rhs
338machineFunctionInfo:
339  mode:
340    fp32-input-denormals: false
341    fp32-output-denormals: false
342body: |
343  bb.1:
344    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
345
346    ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs
347    ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
348    ; GFX10-NEXT: {{  $}}
349    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
350    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
351    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
352    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
353    ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
354    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
355    ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
356    ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
357    ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
358    ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
359    ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
360    ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
361    ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
362    ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
363    ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
364    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
365    ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
366    ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
367    %ptr:_(p1) = COPY $vgpr0_vgpr1
368    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
369    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
370    %4:_(s32) = COPY $vgpr2
371    %5:_(s16) = G_TRUNC %4(s32)
372    %6:_(s32) = COPY $vgpr3
373    %7:_(s16) = G_TRUNC %6(s32)
374    %8:_(s32) = COPY $vgpr4
375    %9:_(s16) = G_TRUNC %8(s32)
376    %10:_(s32) = COPY $vgpr5
377    %11:_(s16) = G_TRUNC %10(s32)
378    %12:_(s16) = G_FMUL %9, %11
379    %13:_(s16) = G_FMUL %5, %7
380    %14:_(s16) = G_FADD %13, %12
381    %15:_(s32) = G_FPEXT %14(s16)
382    %16:_(s32) = G_FADD %el1, %15
383    $vgpr0 = COPY %16(s32)
384...
385
386---
387name: test_f32_sub_mul
388machineFunctionInfo:
389  mode:
390    fp32-input-denormals: false
391    fp32-output-denormals: false
392body: |
393  bb.1:
394    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
395
396    ; GFX10-LABEL: name: test_f32_sub_mul
397    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
398    ; GFX10-NEXT: {{  $}}
399    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
400    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
401    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
402    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
403    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
404    ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1
405    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]]
406    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
407    %0:_(s32) = COPY $vgpr0
408    %1:_(s32) = COPY $vgpr1
409    %ptr:_(p1) = COPY $vgpr0_vgpr1
410    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
411    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
412    %6:_(s32) = G_FMUL %0, %1
413    %7:_(s32) = G_FSUB %6, %el1
414    $vgpr0 = COPY %7(s32)
415...
416
417---
418name: test_f32_sub_mul_rhs
419machineFunctionInfo:
420  mode:
421    fp32-input-denormals: false
422    fp32-output-denormals: false
423body: |
424  bb.1:
425    liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
426
427    ; GFX10-LABEL: name: test_f32_sub_mul_rhs
428    ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
429    ; GFX10-NEXT: {{  $}}
430    ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
431    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
432    ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
433    ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
434    ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
435    ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
436    ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1
437    ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
438    %0:_(s32) = COPY $vgpr0
439    %1:_(s32) = COPY $vgpr1
440    %ptr:_(p1) = COPY $vgpr2_vgpr3
441    %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
442    %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
443    %6:_(s32) = G_FMUL %0, %1
444    %7:_(s32) = G_FSUB %el1, %6
445    $vgpr0 = COPY %7(s32)
446...
447