xref: /llvm-project/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
3
4attributes #0 = { strictfp }
5
6declare float @llvm.fma.f32(float, float, float)
7declare double @llvm.fma.f64(double, double, double)
8declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
9declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
10
11define float @test_fmla_ss4S_0(float %a, float %b, <4 x float> %v) {
12; CHECK-LABEL: test_fmla_ss4S_0:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    fmadd s0, s1, s2, s0
15; CHECK-NEXT:    ret
16  %tmp1 = extractelement <4 x float> %v, i32 0
17  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
18  ret float %tmp2
19}
20
21define float @test_fmla_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
22; CHECK-LABEL: test_fmla_ss4S_0_swap:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    fmadd s0, s2, s1, s0
25; CHECK-NEXT:    ret
26  %tmp1 = extractelement <4 x float> %v, i32 0
27  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
28  ret float %tmp2
29}
30
31define float @test_fmla_ss4S_3(float %a, float %b, <4 x float> %v) {
32; CHECK-LABEL: test_fmla_ss4S_3:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    fmla s0, s1, v2.s[3]
35; CHECK-NEXT:    ret
36  %tmp1 = extractelement <4 x float> %v, i32 3
37  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
38  ret float %tmp2
39}
40
41define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
42; CHECK-LABEL: test_fmla_ss4S_3_swap:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    fmla s0, s0, v2.s[3]
45; CHECK-NEXT:    ret
46  %tmp1 = extractelement <4 x float> %v, i32 3
47  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
48  ret float %tmp2
49}
50
51define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
52; CHECK-LABEL: test_fmla_ss2S_0:
53; CHECK:       // %bb.0:
54; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
55; CHECK-NEXT:    fmadd s0, s1, s2, s0
56; CHECK-NEXT:    ret
57  %tmp1 = extractelement <2 x float> %v, i32 0
58  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
59  ret float %tmp2
60}
61
62define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
63; CHECK-LABEL: test_fmla_ss2S_0_swap:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
66; CHECK-NEXT:    fmadd s0, s2, s1, s0
67; CHECK-NEXT:    ret
68  %tmp1 = extractelement <2 x float> %v, i32 0
69  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %b, float %a)
70  ret float %tmp2
71}
72
73define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
74; CHECK-LABEL: test_fmla_ss2S_1:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
77; CHECK-NEXT:    fmla s0, s1, v2.s[1]
78; CHECK-NEXT:    ret
79  %tmp1 = extractelement <2 x float> %v, i32 1
80  %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
81  ret float %tmp2
82}
83
84define float @test_fmla_ss4S_3_ext0(float %a, <4 x float> %v) {
85; CHECK-LABEL: test_fmla_ss4S_3_ext0:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    fmla s0, s1, v1.s[3]
88; CHECK-NEXT:    ret
89  %tmp0 = extractelement <4 x float> %v, i32 0
90  %tmp1 = extractelement <4 x float> %v, i32 3
91  %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
92  ret float %tmp2
93}
94
95define float @test_fmla_ss4S_3_ext0_swp(float %a, <4 x float> %v) {
96; CHECK-LABEL: test_fmla_ss4S_3_ext0_swp:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    fmla s0, s1, v1.s[3]
99; CHECK-NEXT:    ret
100  %tmp0 = extractelement <4 x float> %v, i32 0
101  %tmp1 = extractelement <4 x float> %v, i32 3
102  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %tmp0, float %a)
103  ret float %tmp2
104}
105
106define float @test_fmla_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) {
107; CHECK-LABEL: test_fmla_ss4S_0_ext0:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    fmadd s0, s1, s2, s0
110; CHECK-NEXT:    ret
111  %tmp0 = extractelement <4 x float> %v, i32 0
112  %tmp1 = extractelement <4 x float> %w, i32 0
113  %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
114  ret float %tmp2
115}
116
117define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) {
118; CHECK-LABEL: test_fmla_ss2S_3_ext0:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
121; CHECK-NEXT:    fmla s0, s1, v1.s[1]
122; CHECK-NEXT:    ret
123  %tmp0 = extractelement <2 x float> %v, i32 0
124  %tmp1 = extractelement <2 x float> %v, i32 1
125  %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
126  ret float %tmp2
127}
128
129define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) {
130; CHECK-LABEL: test_fmla_ss2S_3_ext0_swp:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
133; CHECK-NEXT:    fmla s0, s1, v1.s[1]
134; CHECK-NEXT:    ret
135  %tmp0 = extractelement <2 x float> %v, i32 0
136  %tmp1 = extractelement <2 x float> %v, i32 1
137  %tmp2 = call float @llvm.fma.f32(float %tmp1, float %tmp0, float %a)
138  ret float %tmp2
139}
140
141define float @test_fmla_ss2S_0_ext0(float %a, <2 x float> %v, <2 x float> %w) {
142; CHECK-LABEL: test_fmla_ss2S_0_ext0:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
145; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
146; CHECK-NEXT:    fmadd s0, s1, s2, s0
147; CHECK-NEXT:    ret
148  %tmp0 = extractelement <2 x float> %v, i32 0
149  %tmp1 = extractelement <2 x float> %w, i32 0
150  %tmp2 = call float @llvm.fma.f32(float %tmp0, float %tmp1, float %a)
151  ret float %tmp2
152}
153
154define double @test_fmla_ddD_0(double %a, double %b, <1 x double> %v) {
155; CHECK-LABEL: test_fmla_ddD_0:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    fmadd d0, d1, d2, d0
158; CHECK-NEXT:    ret
159  %tmp1 = extractelement <1 x double> %v, i32 0
160  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
161  ret double %tmp2
162}
163
164define double @test_fmla_ddD_0_swap(double %a, double %b, <1 x double> %v) {
165; CHECK-LABEL: test_fmla_ddD_0_swap:
166; CHECK:       // %bb.0:
167; CHECK-NEXT:    fmadd d0, d2, d1, d0
168; CHECK-NEXT:    ret
169  %tmp1 = extractelement <1 x double> %v, i32 0
170  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
171  ret double %tmp2
172}
173
174define double @test_fmla_dd2D_0(double %a, double %b, <2 x double> %v) {
175; CHECK-LABEL: test_fmla_dd2D_0:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    fmadd d0, d1, d2, d0
178; CHECK-NEXT:    ret
179  %tmp1 = extractelement <2 x double> %v, i32 0
180  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
181  ret double %tmp2
182}
183
184define double @test_fmla_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
185; CHECK-LABEL: test_fmla_dd2D_0_swap:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    fmadd d0, d2, d1, d0
188; CHECK-NEXT:    ret
189  %tmp1 = extractelement <2 x double> %v, i32 0
190  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
191  ret double %tmp2
192}
193
194define double @test_fmla_dd2D_1(double %a, double %b, <2 x double> %v) {
195; CHECK-LABEL: test_fmla_dd2D_1:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    fmla d0, d1, v2.d[1]
198; CHECK-NEXT:    ret
199  %tmp1 = extractelement <2 x double> %v, i32 1
200  %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
201  ret double %tmp2
202}
203
204define double @test_fmla_dd2D_1_swap(double %a, double %b, <2 x double> %v) {
205; CHECK-LABEL: test_fmla_dd2D_1_swap:
206; CHECK:       // %bb.0:
207; CHECK-NEXT:    fmla d0, d1, v2.d[1]
208; CHECK-NEXT:    ret
209  %tmp1 = extractelement <2 x double> %v, i32 1
210  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
211  ret double %tmp2
212}
213
214define double @test_fmla_ss2D_1_ext0(double %a, <2 x double> %v) {
215; CHECK-LABEL: test_fmla_ss2D_1_ext0:
216; CHECK:       // %bb.0:
217; CHECK-NEXT:    fmla d0, d1, v1.d[1]
218; CHECK-NEXT:    ret
219  %tmp0 = extractelement <2 x double> %v, i32 0
220  %tmp1 = extractelement <2 x double> %v, i32 1
221  %tmp2 = call double @llvm.fma.f64(double %tmp0, double %tmp1, double %a)
222  ret double %tmp2
223}
224
225define double @test_fmla_ss2D_1_ext0_swp(double %a, <2 x double> %v) {
226; CHECK-LABEL: test_fmla_ss2D_1_ext0_swp:
227; CHECK:       // %bb.0:
228; CHECK-NEXT:    fmla d0, d1, v1.d[1]
229; CHECK-NEXT:    ret
230  %tmp0 = extractelement <2 x double> %v, i32 0
231  %tmp1 = extractelement <2 x double> %v, i32 1
232  %tmp2 = call double @llvm.fma.f64(double %tmp1, double %tmp0, double %a)
233  ret double %tmp2
234}
235
236define double @test_fmla_ss2D_0_ext0(double %a, <2 x double> %v, <2 x double> %w) {
237; CHECK-LABEL: test_fmla_ss2D_0_ext0:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    fmadd d0, d1, d2, d0
240; CHECK-NEXT:    ret
241  %tmp0 = extractelement <2 x double> %v, i32 0
242  %tmp1 = extractelement <2 x double> %w, i32 0
243  %tmp2 = call double @llvm.fma.f64(double %tmp0, double %tmp1, double %a)
244  ret double %tmp2
245}
246
247define float @test_fmls_ss4S_0(float %a, float %b, <4 x float> %v) {
248; CHECK-LABEL: test_fmls_ss4S_0:
249; CHECK:       // %bb.0: // %entry
250; CHECK-NEXT:    fmsub s0, s2, s1, s0
251; CHECK-NEXT:    ret
252entry:
253  %fneg = fneg float %b
254  %extract = extractelement <4 x float> %v, i64 0
255  %0 = tail call float @llvm.fma.f32(float %fneg, float %extract, float %a)
256  ret float %0
257}
258
259define float @test_fmls_ss4S_0_swap(float %a, float %b, <4 x float> %v) {
260; CHECK-LABEL: test_fmls_ss4S_0_swap:
261; CHECK:       // %bb.0: // %entry
262; CHECK-NEXT:    fmsub s0, s2, s1, s0
263; CHECK-NEXT:    ret
264entry:
265  %fneg = fneg float %b
266  %extract = extractelement <4 x float> %v, i64 0
267  %0 = tail call float @llvm.fma.f32(float %extract, float %fneg, float %a)
268  ret float %0
269}
270
271define float @test_fmls_ss4S_3(float %a, float %b, <4 x float> %v) {
272; CHECK-LABEL: test_fmls_ss4S_3:
273; CHECK:       // %bb.0:
274; CHECK-NEXT:    mov s1, v2.s[3]
275; CHECK-NEXT:    fmls s0, s1, v2.s[3]
276; CHECK-NEXT:    ret
277  %tmp1 = extractelement <4 x float> %v, i32 3
278  %tmp2 = fsub float -0.0, %tmp1
279  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
280  ret float %tmp3
281}
282
283define float @test_fmls_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
284; CHECK-LABEL: test_fmls_ss4S_3_swap:
285; CHECK:       // %bb.0:
286; CHECK-NEXT:    mov s1, v2.s[3]
287; CHECK-NEXT:    fmls s0, s1, v2.s[3]
288; CHECK-NEXT:    ret
289  %tmp1 = extractelement <4 x float> %v, i32 3
290  %tmp2 = fsub float -0.0, %tmp1
291  %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
292  ret float %tmp3
293}
294
295define float @test_fmls_ss2S_0(float %a, float %b, <2 x float> %v) {
296; CHECK-LABEL: test_fmls_ss2S_0:
297; CHECK:       // %bb.0: // %entry
298; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
299; CHECK-NEXT:    fmsub s0, s2, s1, s0
300; CHECK-NEXT:    ret
301entry:
302  %fneg = fneg float %b
303  %extract = extractelement <2 x float> %v, i64 0
304  %0 = tail call float @llvm.fma.f32(float %fneg, float %extract, float %a)
305  ret float %0
306}
307
308define float @test_fmls_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
309; CHECK-LABEL: test_fmls_ss2S_0_swap:
310; CHECK:       // %bb.0: // %entry
311; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
312; CHECK-NEXT:    fmsub s0, s2, s1, s0
313; CHECK-NEXT:    ret
314entry:
315  %fneg = fneg float %b
316  %extract = extractelement <2 x float> %v, i64 0
317  %0 = tail call float @llvm.fma.f32(float %extract, float %fneg, float %a)
318  ret float %0
319}
320
321define float @test_fmls_ss2S_1(float %a, float %b, <2 x float> %v) {
322; CHECK-LABEL: test_fmls_ss2S_1:
323; CHECK:       // %bb.0:
324; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
325; CHECK-NEXT:    mov s1, v2.s[1]
326; CHECK-NEXT:    fmls s0, s1, v2.s[1]
327; CHECK-NEXT:    ret
328  %tmp1 = extractelement <2 x float> %v, i32 1
329  %tmp2 = fsub float -0.0, %tmp1
330  %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
331  ret float %tmp3
332}
333
334define float @test_fmls_ss4S_3_ext0(float %a, <4 x float> %v) {
335; CHECK-LABEL: test_fmls_ss4S_3_ext0:
336; CHECK:       // %bb.0:
337; CHECK-NEXT:    fmls s0, s1, v1.s[3]
338; CHECK-NEXT:    ret
339  %tmp0 = extractelement <4 x float> %v, i32 0
340  %tmp1 = extractelement <4 x float> %v, i32 3
341  %tmp2 = fsub float -0.0, %tmp1
342  %tmp3 = call float @llvm.fma.f32(float %tmp0, float %tmp2, float %a)
343  ret float %tmp3
344}
345
346define float @test_fmls_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) {
347; CHECK-LABEL: test_fmls_ss4S_0_ext0:
348; CHECK:       // %bb.0:
349; CHECK-NEXT:    fmsub s0, s1, s2, s0
350; CHECK-NEXT:    ret
351  %tmp0 = extractelement <4 x float> %v, i32 0
352  %tmp1 = extractelement <4 x float> %w, i32 0
353  %tmp2 = fsub float -0.0, %tmp1
354  %tmp3 = call float @llvm.fma.f32(float %tmp0, float %tmp2, float %a)
355  ret float %tmp3
356}
357
358define double @test_fmls_ddD_0(double %a, double %b, <1 x double> %v) {
359; CHECK-LABEL: test_fmls_ddD_0:
360; CHECK:       // %bb.0: // %entry
361; CHECK-NEXT:    fmsub d0, d1, d2, d0
362; CHECK-NEXT:    ret
363entry:
364  %fneg = fneg double %b
365  %extract = extractelement <1 x double> %v, i64 0
366  %0 = tail call double @llvm.fma.f64(double %fneg, double %extract, double %a)
367  ret double %0
368}
369
370define double @test_fmls_ddD_0_swap(double %a, double %b, <1 x double> %v) {
371; CHECK-LABEL: test_fmls_ddD_0_swap:
372; CHECK:       // %bb.0: // %entry
373; CHECK-NEXT:    fmsub d0, d2, d1, d0
374; CHECK-NEXT:    ret
375entry:
376  %fneg = fneg double %b
377  %extract = extractelement <1 x double> %v, i64 0
378  %0 = tail call double @llvm.fma.f64(double %extract, double %fneg, double %a)
379  ret double %0
380}
381
382define double @test_fmls_dd2D_0(double %a, double %b, <2 x double> %v) {
383; CHECK-LABEL: test_fmls_dd2D_0:
384; CHECK:       // %bb.0: // %entry
385; CHECK-NEXT:    fmsub d0, d2, d1, d0
386; CHECK-NEXT:    ret
387entry:
388  %fneg = fneg double %b
389  %extract = extractelement <2 x double> %v, i64 0
390  %0 = tail call double @llvm.fma.f64(double %fneg, double %extract, double %a)
391  ret double %0
392}
393
394define double @test_fmls_dd2D_0_swap(double %a, double %b, <2 x double> %v) {
395; CHECK-LABEL: test_fmls_dd2D_0_swap:
396; CHECK:       // %bb.0: // %entry
397; CHECK-NEXT:    fmsub d0, d2, d1, d0
398; CHECK-NEXT:    ret
399entry:
400  %fneg = fneg double %b
401  %extract = extractelement <2 x double> %v, i64 0
402  %0 = tail call double @llvm.fma.f64(double %extract, double %fneg, double %a)
403  ret double %0
404}
405
406define double @test_fmls_dd2D_1(double %a, double %b, <2 x double> %v) {
407; CHECK-LABEL: test_fmls_dd2D_1:
408; CHECK:       // %bb.0:
409; CHECK-NEXT:    mov d1, v2.d[1]
410; CHECK-NEXT:    fmls d0, d1, v2.d[1]
411; CHECK-NEXT:    ret
412  %tmp1 = extractelement <2 x double> %v, i32 1
413  %tmp2 = fsub double -0.0, %tmp1
414  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
415  ret double %tmp3
416}
417
418define double @test_fmls_dd2D_1_swap(double %a, double %b, <2 x double> %v) {
419; CHECK-LABEL: test_fmls_dd2D_1_swap:
420; CHECK:       // %bb.0:
421; CHECK-NEXT:    mov d1, v2.d[1]
422; CHECK-NEXT:    fmls d0, d1, v2.d[1]
423; CHECK-NEXT:    ret
424  %tmp1 = extractelement <2 x double> %v, i32 1
425  %tmp2 = fsub double -0.0, %tmp1
426  %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
427  ret double %tmp3
428}
429
430define double @test_fmls_dd2D_1_ext0(double %a, <2 x double> %v) {
431; CHECK-LABEL: test_fmls_dd2D_1_ext0:
432; CHECK:       // %bb.0:
433; CHECK-NEXT:    fmls d0, d1, v1.d[1]
434; CHECK-NEXT:    ret
435  %tmp0 = extractelement <2 x double> %v, i32 0
436  %tmp1 = extractelement <2 x double> %v, i32 1
437  %tmp2 = fsub double -0.0, %tmp1
438  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp0, double %a)
439  ret double %tmp3
440}
441
442define double @test_fmls_dd2D_0_ext0(double %a, <2 x double> %v, <2 x double> %w) {
443; CHECK-LABEL: test_fmls_dd2D_0_ext0:
444; CHECK:       // %bb.0:
445; CHECK-NEXT:    fmsub d0, d1, d2, d0
446; CHECK-NEXT:    ret
447  %tmp0 = extractelement <2 x double> %v, i32 0
448  %tmp1 = extractelement <2 x double> %w, i32 0
449  %tmp2 = fsub double -0.0, %tmp1
450  %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp0, double %a)
451  ret double %tmp3
452}
453
454define float @test_fmla_ss4S_0_strict(float %a, float %b, <4 x float> %v) #0 {
455; CHECK-LABEL: test_fmla_ss4S_0_strict:
456; CHECK:       // %bb.0:
457; CHECK-NEXT:    fmadd s0, s1, s2, s0
458; CHECK-NEXT:    ret
459  %tmp1 = extractelement <4 x float> %v, i32 0
460  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
461  ret float %tmp2
462}
463
464define float @test_fmla_ss4S_0_swap_strict(float %a, float %b, <4 x float> %v) #0 {
465; CHECK-LABEL: test_fmla_ss4S_0_swap_strict:
466; CHECK:       // %bb.0:
467; CHECK-NEXT:    fmadd s0, s2, s1, s0
468; CHECK-NEXT:    ret
469  %tmp1 = extractelement <4 x float> %v, i32 0
470  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %b, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
471  ret float %tmp2
472}
473
474define float @test_fmla_ss4S_3_strict(float %a, float %b, <4 x float> %v) #0 {
475; CHECK-LABEL: test_fmla_ss4S_3_strict:
476; CHECK:       // %bb.0:
477; CHECK-NEXT:    fmla s0, s1, v2.s[3]
478; CHECK-NEXT:    ret
479  %tmp1 = extractelement <4 x float> %v, i32 3
480  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
481  ret float %tmp2
482}
483
484define float @test_fmla_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #0 {
485; CHECK-LABEL: test_fmla_ss4S_3_swap_strict:
486; CHECK:       // %bb.0:
487; CHECK-NEXT:    fmla s0, s0, v2.s[3]
488; CHECK-NEXT:    ret
489  %tmp1 = extractelement <4 x float> %v, i32 3
490  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
491  ret float %tmp2
492}
493
494define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
495; CHECK-LABEL: test_fmla_ss2S_0_strict:
496; CHECK:       // %bb.0:
497; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
498; CHECK-NEXT:    fmadd s0, s1, s2, s0
499; CHECK-NEXT:    ret
500  %tmp1 = extractelement <2 x float> %v, i32 0
501  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
502  ret float %tmp2
503}
504
505define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
506; CHECK-LABEL: test_fmla_ss2S_0_swap_strict:
507; CHECK:       // %bb.0:
508; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
509; CHECK-NEXT:    fmadd s0, s2, s1, s0
510; CHECK-NEXT:    ret
511  %tmp1 = extractelement <2 x float> %v, i32 0
512  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %b, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
513  ret float %tmp2
514}
515
516define float @test_fmla_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
517; CHECK-LABEL: test_fmla_ss2S_1_strict:
518; CHECK:       // %bb.0:
519; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
520; CHECK-NEXT:    fmla s0, s1, v2.s[1]
521; CHECK-NEXT:    ret
522  %tmp1 = extractelement <2 x float> %v, i32 1
523  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
524  ret float %tmp2
525}
526
527define double @test_fmla_ddD_0_strict(double %a, double %b, <1 x double> %v) #0 {
528; CHECK-LABEL: test_fmla_ddD_0_strict:
529; CHECK:       // %bb.0:
530; CHECK-NEXT:    fmadd d0, d1, d2, d0
531; CHECK-NEXT:    ret
532  %tmp1 = extractelement <1 x double> %v, i32 0
533  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
534  ret double %tmp2
535}
536
537define double @test_fmla_ddD_0_swap_strict(double %a, double %b, <1 x double> %v) #0 {
538; CHECK-LABEL: test_fmla_ddD_0_swap_strict:
539; CHECK:       // %bb.0:
540; CHECK-NEXT:    fmadd d0, d2, d1, d0
541; CHECK-NEXT:    ret
542  %tmp1 = extractelement <1 x double> %v, i32 0
543  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
544  ret double %tmp2
545}
546
547define double @test_fmla_dd2D_0_strict(double %a, double %b, <2 x double> %v) #0 {
548; CHECK-LABEL: test_fmla_dd2D_0_strict:
549; CHECK:       // %bb.0:
550; CHECK-NEXT:    fmadd d0, d1, d2, d0
551; CHECK-NEXT:    ret
552  %tmp1 = extractelement <2 x double> %v, i32 0
553  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
554  ret double %tmp2
555}
556
557define double @test_fmla_dd2D_0_swap_strict(double %a, double %b, <2 x double> %v) #0 {
558; CHECK-LABEL: test_fmla_dd2D_0_swap_strict:
559; CHECK:       // %bb.0:
560; CHECK-NEXT:    fmadd d0, d2, d1, d0
561; CHECK-NEXT:    ret
562  %tmp1 = extractelement <2 x double> %v, i32 0
563  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
564  ret double %tmp2
565}
566
567define double @test_fmla_dd2D_1_strict(double %a, double %b, <2 x double> %v) #0 {
568; CHECK-LABEL: test_fmla_dd2D_1_strict:
569; CHECK:       // %bb.0:
570; CHECK-NEXT:    fmla d0, d1, v2.d[1]
571; CHECK-NEXT:    ret
572  %tmp1 = extractelement <2 x double> %v, i32 1
573  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
574  ret double %tmp2
575}
576
577define double @test_fmla_dd2D_1_swap_strict(double %a, double %b, <2 x double> %v) #0 {
578; CHECK-LABEL: test_fmla_dd2D_1_swap_strict:
579; CHECK:       // %bb.0:
580; CHECK-NEXT:    fmla d0, d1, v2.d[1]
581; CHECK-NEXT:    ret
582  %tmp1 = extractelement <2 x double> %v, i32 1
583  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
584  ret double %tmp2
585}
586
587define float @test_fmls_ss4S_0_strict(float %a, float %b, <4 x float> %v) #0 {
588; CHECK-LABEL: test_fmls_ss4S_0_strict:
589; CHECK:       // %bb.0: // %entry
590; CHECK-NEXT:    fmsub s0, s2, s1, s0
591; CHECK-NEXT:    ret
592entry:
593  %fneg = fneg float %b
594  %extract = extractelement <4 x float> %v, i64 0
595  %0 = tail call float @llvm.experimental.constrained.fma.f32(float %fneg, float %extract, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
596  ret float %0
597}
598
599define float @test_fmls_ss4S_0_swap_strict(float %a, float %b, <4 x float> %v) #0 {
600; CHECK-LABEL: test_fmls_ss4S_0_swap_strict:
601; CHECK:       // %bb.0: // %entry
602; CHECK-NEXT:    fmsub s0, s2, s1, s0
603; CHECK-NEXT:    ret
604entry:
605  %fneg = fneg float %b
606  %extract = extractelement <4 x float> %v, i64 0
607  %0 = tail call float @llvm.experimental.constrained.fma.f32(float %extract, float %fneg, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
608  ret float %0
609}
610
611define float @test_fmls_ss4S_3_strict(float %a, float %b, <4 x float> %v) #0 {
612; CHECK-LABEL: test_fmls_ss4S_3_strict:
613; CHECK:       // %bb.0:
614; CHECK-NEXT:    mov s1, v2.s[3]
615; CHECK-NEXT:    fmls s0, s1, v2.s[3]
616; CHECK-NEXT:    ret
617  %tmp1 = extractelement <4 x float> %v, i32 3
618  %tmp2 = fneg float %tmp1
619  %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
620  ret float %tmp3
621}
622
623define float @test_fmls_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #0 {
624; CHECK-LABEL: test_fmls_ss4S_3_swap_strict:
625; CHECK:       // %bb.0:
626; CHECK-NEXT:    mov s1, v2.s[3]
627; CHECK-NEXT:    fmls s0, s1, v2.s[3]
628; CHECK-NEXT:    ret
629  %tmp1 = extractelement <4 x float> %v, i32 3
630  %tmp2 = fneg float %tmp1
631  %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %tmp2, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
632  ret float %tmp3
633}
634
635define float @test_fmls_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
636; CHECK-LABEL: test_fmls_ss2S_0_strict:
637; CHECK:       // %bb.0: // %entry
638; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
639; CHECK-NEXT:    fmsub s0, s2, s1, s0
640; CHECK-NEXT:    ret
641entry:
642  %fneg = fneg float %b
643  %extract = extractelement <2 x float> %v, i64 0
644  %0 = tail call float @llvm.experimental.constrained.fma.f32(float %fneg, float %extract, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
645  ret float %0
646}
647
648define float @test_fmls_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
649; CHECK-LABEL: test_fmls_ss2S_0_swap_strict:
650; CHECK:       // %bb.0: // %entry
651; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
652; CHECK-NEXT:    fmsub s0, s2, s1, s0
653; CHECK-NEXT:    ret
654entry:
655  %fneg = fneg float %b
656  %extract = extractelement <2 x float> %v, i64 0
657  %0 = tail call float @llvm.experimental.constrained.fma.f32(float %extract, float %fneg, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
658  ret float %0
659}
660
661define float @test_fmls_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
662; CHECK-LABEL: test_fmls_ss2S_1_strict:
663; CHECK:       // %bb.0:
664; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
665; CHECK-NEXT:    mov s1, v2.s[1]
666; CHECK-NEXT:    fmls s0, s1, v2.s[1]
667; CHECK-NEXT:    ret
668  %tmp1 = extractelement <2 x float> %v, i32 1
669  %tmp2 = fneg float %tmp1
670  %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
671  ret float %tmp3
672}
673
674define double @test_fmls_ddD_0_strict(double %a, double %b, <1 x double> %v) #0 {
675; CHECK-LABEL: test_fmls_ddD_0_strict:
676; CHECK:       // %bb.0: // %entry
677; CHECK-NEXT:    fmsub d0, d2, d1, d0
678; CHECK-NEXT:    ret
679entry:
680  %fneg = fneg double %b
681  %extract = extractelement <1 x double> %v, i64 0
682  %0 = tail call double @llvm.experimental.constrained.fma.f64(double %fneg, double %extract, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
683  ret double %0
684}
685
686define double @test_fmls_ddD_0_swap_strict(double %a, double %b, <1 x double> %v) #0 {
687; CHECK-LABEL: test_fmls_ddD_0_swap_strict:
688; CHECK:       // %bb.0: // %entry
689; CHECK-NEXT:    fmsub d0, d2, d1, d0
690; CHECK-NEXT:    ret
691entry:
692  %fneg = fneg double %b
693  %extract = extractelement <1 x double> %v, i64 0
694  %0 = tail call double @llvm.experimental.constrained.fma.f64(double %extract, double %fneg, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
695  ret double %0
696}
697
698define double @test_fmls_dd2D_0_strict(double %a, double %b, <2 x double> %v) #0 {
699; CHECK-LABEL: test_fmls_dd2D_0_strict:
700; CHECK:       // %bb.0: // %entry
701; CHECK-NEXT:    fmsub d0, d2, d1, d0
702; CHECK-NEXT:    ret
703entry:
704  %fneg = fneg double %b
705  %extract = extractelement <2 x double> %v, i64 0
706  %0 = tail call double @llvm.experimental.constrained.fma.f64(double %fneg, double %extract, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
707  ret double %0
708}
709
710define double @test_fmls_dd2D_0_swap_strict(double %a, double %b, <2 x double> %v) #0 {
711; CHECK-LABEL: test_fmls_dd2D_0_swap_strict:
712; CHECK:       // %bb.0: // %entry
713; CHECK-NEXT:    fmsub d0, d2, d1, d0
714; CHECK-NEXT:    ret
715entry:
716  %fneg = fneg double %b
717  %extract = extractelement <2 x double> %v, i64 0
718  %0 = tail call double @llvm.experimental.constrained.fma.f64(double %extract, double %fneg, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
719  ret double %0
720}
721
722define double @test_fmls_dd2D_1_strict(double %a, double %b, <2 x double> %v) #0 {
723; CHECK-LABEL: test_fmls_dd2D_1_strict:
724; CHECK:       // %bb.0:
725; CHECK-NEXT:    mov d1, v2.d[1]
726; CHECK-NEXT:    fmls d0, d1, v2.d[1]
727; CHECK-NEXT:    ret
728  %tmp1 = extractelement <2 x double> %v, i32 1
729  %tmp2 = fneg double %tmp1
730  %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
731  ret double %tmp3
732}
733
734define double @test_fmls_dd2D_1_swap_strict(double %a, double %b, <2 x double> %v) #0 {
735; CHECK-LABEL: test_fmls_dd2D_1_swap_strict:
736; CHECK:       // %bb.0:
737; CHECK-NEXT:    mov d1, v2.d[1]
738; CHECK-NEXT:    fmls d0, d1, v2.d[1]
739; CHECK-NEXT:    ret
740  %tmp1 = extractelement <2 x double> %v, i32 1
741  %tmp2 = fneg double %tmp1
742  %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %tmp2, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
743  ret double %tmp3
744}
745
746