xref: /llvm-project/llvm/test/CodeGen/ARM/fp16-instructions.ll (revision 89ea2648bbdea80193e9da5657db90d411620100)
1; SOFT:
2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft   | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
4
5; SOFTFP:
6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
9
10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
14; HARD:
15; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
16; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
17; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
18
19; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
22
23; FP-CONTRACT=FAST
24; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
26
27
28define float @RetValBug(float %A.coerce) {
29entry:
30  ret float undef
31; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
32; any operands) when FullFP16 is enabled.
33;
34; CHECK-LABEL:            RetValBug:
35; CHECK-HARDFP-FULLFP16:  {{.*}} lr
36}
37
38; 1. VABS: TODO
39
40; 2. VADD
41define float @Add(float %a.coerce, float %b.coerce) {
42entry:
43  %0 = bitcast float %a.coerce to i32
44  %tmp.0.extract.trunc = trunc i32 %0 to i16
45  %1 = bitcast i16 %tmp.0.extract.trunc to half
46  %2 = bitcast float %b.coerce to i32
47  %tmp1.0.extract.trunc = trunc i32 %2 to i16
48  %3 = bitcast i16 %tmp1.0.extract.trunc to half
49  %add = fadd half %1, %3
50  %4 = bitcast half %add to i16
51  %tmp4.0.insert.ext = zext i16 %4 to i32
52  %5 = bitcast i32 %tmp4.0.insert.ext to float
53  ret float %5
54
55; CHECK-LABEL: Add:
56
57; CHECK-SOFT:  bl  __aeabi_h2f
58; CHECK-SOFT:  bl  __aeabi_h2f
59; CHECK-SOFT:  bl  __aeabi_fadd
60; CHECK-SOFT:  bl  __aeabi_f2h
61
62; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
63; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
64; CHECK-SOFTFP-VFP3:  vadd.f32
65; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
66
67; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
68; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
69; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
70; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
71; CHECK-SOFTFP-FP16:  vadd.f32      [[S0]], [[S0]], [[S2]]
72; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
73; CHECK-SOFTFP-FP16:  vmov  r0, s0
74
75; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
76; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
77; CHECK-SOFTFP-FULLFP16:       vadd.f16  [[S0]], [[S2]], [[S0]]
78; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
79
80; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
81; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
82; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
83; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
84; CHECK-HARDFP-VFP3:  vadd.f32
85; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
86; CHECK-HARDFP-VFP3:  vmov  s0, r0
87
88; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
89; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
90; CHECK-HARDFP-FP16:  vadd.f32  [[S0]], [[S0]], [[S2]]
91; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
92
93; CHECK-HARDFP-FULLFP16:       vadd.f16  s0, s0, s1
94}
95
96; 3. VCMP
97define zeroext i1 @VCMP(float %F.coerce, float %G.coerce) {
98entry:
99  %0 = bitcast float %F.coerce to i32
100  %tmp.0.extract.trunc = trunc i32 %0 to i16
101  %1 = bitcast i16 %tmp.0.extract.trunc to half
102  %2 = bitcast float %G.coerce to i32
103  %tmp1.0.extract.trunc = trunc i32 %2 to i16
104  %3 = bitcast i16 %tmp1.0.extract.trunc to half
105  %cmp = fcmp ogt half %1, %3
106  ret i1 %cmp
107
108; CHECK-LABEL:            VCMP:
109
110; CHECK-SOFT:             bl  __aeabi_fcmpgt
111
112; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
113; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
114; CHECK-SOFTFP-VFP3:      vcmpe.f32 s{{.}}, s{{.}}
115
116; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
117; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
118; CHECK-SOFTFP-FP16:      vcmpe.f32 s{{.}}, s{{.}}
119
120; CHECK-SOFTFP-FULLFP16:  vmov.f16  [[S2:s[0-9]]], r0
121; CHECK-SOFTFP-FULLFP16:  vmov.f16 [[S0:s[0-9]]], r1
122; CHECK-SOFTFP-FULLFP16:  vcmpe.f16 [[S2]], [[S0]]
123
124; CHECK-SOFTFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r0
125; CHECK-SOFTFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r1
126; CHECK-HARDFP-FULLFP16:      vcmpe.f16  s0, s1
127}
128
129; 4. VCMPE
130
131; FIXME: enable when constant pool is fixed
132;
133;define i32 @VCMPE_IMM(float %F.coerce) {
134;entry:
135;  %0 = bitcast float %F.coerce to i32
136;  %tmp.0.extract.trunc = trunc i32 %0 to i16
137;  %1 = bitcast i16 %tmp.0.extract.trunc to half
138;  %tmp = fcmp olt half %1, 1.000000e+00
139;  %tmp1 = zext i1 %tmp to i32
140;  ret i32 %tmp1
141;}
142
143define i32 @VCMPE(float %F.coerce, float %G.coerce) {
144entry:
145  %0 = bitcast float %F.coerce to i32
146  %tmp.0.extract.trunc = trunc i32 %0 to i16
147  %1 = bitcast i16 %tmp.0.extract.trunc to half
148  %2 = bitcast float %G.coerce to i32
149  %tmp.1.extract.trunc = trunc i32 %2 to i16
150  %3 = bitcast i16 %tmp.1.extract.trunc to half
151  %tmp = fcmp olt half %1, %3
152  %tmp1 = zext i1 %tmp to i32
153  ret i32 %tmp1
154
155; CHECK-LABEL:  VCMPE:
156}
157
158; 5. VCVT (between floating-point and fixed-point)
159; Only assembly/disassembly support
160
161; 6. VCVT (between floating-point and integer, both directions)
162define i32 @fptosi(i32 %A.coerce) {
163entry:
164  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
165  %0 = bitcast i16 %tmp.0.extract.trunc to half
166  %conv = fptosi half %0 to i32
167  ret i32 %conv
168
169; CHECK-LABEL:                 fptosi:
170
171; CHECK-HARDFP-FULLFP16:       vmov.f16  s0, r0
172; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.s32.f16  s0, s0
173; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
174}
175
176define i32 @fptoui(i32 %A.coerce) {
177entry:
178  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
179  %0 = bitcast i16 %tmp.0.extract.trunc to half
180  %conv = fptoui half %0 to i32
181  ret i32 %conv
182
183; CHECK-HARDFP-FULLFP16:       vcvt.u32.f16  s0, s0
184; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
185}
186
187define float @UintToH(i32 %a, i32 %b) {
188entry:
189  %0 = uitofp i32 %a to half
190  %1 = bitcast half %0 to i16
191  %tmp0.insert.ext = zext i16 %1 to i32
192  %2 = bitcast i32 %tmp0.insert.ext to float
193  ret float %2
194
195; CHECK-LABEL:                 UintToH:
196
197; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
198; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.u32  s0, s0
199}
200
201define float @SintToH(i32 %a, i32 %b) {
202entry:
203  %0 = sitofp i32 %a to half
204  %1 = bitcast half %0 to i16
205  %tmp0.insert.ext = zext i16 %1 to i32
206  %2 = bitcast i32 %tmp0.insert.ext to float
207  ret float %2
208
209; CHECK-LABEL:                 SintToH:
210
211; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
212; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.s32  s0, s0
213}
214
215; TODO:
216; 7.  VCVTA
217; 8.  VCVTM
218; 9.  VCVTN
219; 10. VCVTP
220; 11. VCVTR
221
222; 12. VDIV
223define float @Div(float %a.coerce, float %b.coerce) {
224entry:
225  %0 = bitcast float %a.coerce to i32
226  %tmp.0.extract.trunc = trunc i32 %0 to i16
227  %1 = bitcast i16 %tmp.0.extract.trunc to half
228  %2 = bitcast float %b.coerce to i32
229  %tmp1.0.extract.trunc = trunc i32 %2 to i16
230  %3 = bitcast i16 %tmp1.0.extract.trunc to half
231  %add = fdiv half %1, %3
232  %4 = bitcast half %add to i16
233  %tmp4.0.insert.ext = zext i16 %4 to i32
234  %5 = bitcast i32 %tmp4.0.insert.ext to float
235  ret float %5
236
237; CHECK-LABEL:  Div:
238
239; CHECK-SOFT:  bl  __aeabi_h2f
240; CHECK-SOFT:  bl  __aeabi_h2f
241; CHECK-SOFT:  bl  __aeabi_fdiv
242; CHECK-SOFT:  bl  __aeabi_f2h
243
244; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
245; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
246; CHECK-SOFTFP-VFP3:  vdiv.f32
247; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
248
249; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
250; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
251; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
252; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
253; CHECK-SOFTFP-FP16:  vdiv.f32      [[S0]], [[S0]], [[S2]]
254; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
255; CHECK-SOFTFP-FP16:  vmov  r0, s0
256
257; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
258; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
259; CHECK-SOFTFP-FULLFP16:       vdiv.f16  [[S0]], [[S2]], [[S0]]
260; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
261
262; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
263; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
264; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
265; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
266; CHECK-HARDFP-VFP3:  vdiv.f32
267; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
268; CHECK-HARDFP-VFP3:  vmov  s0, r0
269
270; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
271; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
272; CHECK-HARDFP-FP16:  vdiv.f32  [[S0]], [[S0]], [[S2]]
273; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
274
275; CHECK-HARDFP-FULLFP16:       vdiv.f16  s0, s0, s1
276}
277
278; 13. VFMA
279define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
280entry:
281  %0 = bitcast float %a.coerce to i32
282  %tmp.0.extract.trunc = trunc i32 %0 to i16
283  %1 = bitcast i16 %tmp.0.extract.trunc to half
284  %2 = bitcast float %b.coerce to i32
285  %tmp1.0.extract.trunc = trunc i32 %2 to i16
286  %3 = bitcast i16 %tmp1.0.extract.trunc to half
287  %4 = bitcast float %c.coerce to i32
288  %tmp2.0.extract.trunc = trunc i32 %4 to i16
289  %5 = bitcast i16 %tmp2.0.extract.trunc to half
290  %mul = fmul half %1, %3
291  %add = fadd half %mul, %5
292  %6 = bitcast half %add to i16
293  %tmp4.0.insert.ext = zext i16 %6 to i32
294  %7 = bitcast i32 %tmp4.0.insert.ext to float
295  ret float %7
296
297; CHECK-LABEL:                      VFMA:
298; CHECK-HARDFP-FULLFP16-FAST:       vfma.f16  s2, s0, s1
299; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
300}
301
302; 14. VFMS
303define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
304entry:
305  %0 = bitcast float %a.coerce to i32
306  %tmp.0.extract.trunc = trunc i32 %0 to i16
307  %1 = bitcast i16 %tmp.0.extract.trunc to half
308  %2 = bitcast float %b.coerce to i32
309  %tmp1.0.extract.trunc = trunc i32 %2 to i16
310  %3 = bitcast i16 %tmp1.0.extract.trunc to half
311  %4 = bitcast float %c.coerce to i32
312  %tmp2.0.extract.trunc = trunc i32 %4 to i16
313  %5 = bitcast i16 %tmp2.0.extract.trunc to half
314  %mul = fmul half %1, %3
315  %sub = fsub half %5, %mul
316  %6 = bitcast half %sub to i16
317  %tmp4.0.insert.ext = zext i16 %6 to i32
318  %7 = bitcast i32 %tmp4.0.insert.ext to float
319  ret float %7
320
321; CHECK-LABEL:                      VFMS:
322; CHECK-HARDFP-FULLFP16-FAST:       vfms.f16  s2, s0, s1
323; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
324}
325
326; 15. VFNMA
327define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
328entry:
329  %0 = bitcast float %a.coerce to i32
330  %tmp.0.extract.trunc = trunc i32 %0 to i16
331  %1 = bitcast i16 %tmp.0.extract.trunc to half
332  %2 = bitcast float %b.coerce to i32
333  %tmp1.0.extract.trunc = trunc i32 %2 to i16
334  %3 = bitcast i16 %tmp1.0.extract.trunc to half
335  %4 = bitcast float %c.coerce to i32
336  %tmp2.0.extract.trunc = trunc i32 %4 to i16
337  %5 = bitcast i16 %tmp2.0.extract.trunc to half
338  %mul = fmul half %1, %3
339  %sub = fsub half -0.0, %mul
340  %sub2 = fsub half %sub, %5
341  %6 = bitcast half %sub2 to i16
342  %tmp4.0.insert.ext = zext i16 %6 to i32
343  %7 = bitcast i32 %tmp4.0.insert.ext to float
344  ret float %7
345
346; CHECK-LABEL:                      VFNMA:
347; CHECK-HARDFP-FULLFP16-FAST:       vfnma.f16  s2, s0, s1
348; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
349}
350
351; 16. VFNMS
352define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
353entry:
354  %0 = bitcast float %a.coerce to i32
355  %tmp.0.extract.trunc = trunc i32 %0 to i16
356  %1 = bitcast i16 %tmp.0.extract.trunc to half
357  %2 = bitcast float %b.coerce to i32
358  %tmp1.0.extract.trunc = trunc i32 %2 to i16
359  %3 = bitcast i16 %tmp1.0.extract.trunc to half
360  %4 = bitcast float %c.coerce to i32
361  %tmp2.0.extract.trunc = trunc i32 %4 to i16
362  %5 = bitcast i16 %tmp2.0.extract.trunc to half
363  %mul = fmul half %1, %3
364  %sub2 = fsub half %mul, %5
365  %6 = bitcast half %sub2 to i16
366  %tmp4.0.insert.ext = zext i16 %6 to i32
367  %7 = bitcast i32 %tmp4.0.insert.ext to float
368  ret float %7
369
370; CHECK-LABEL:                      VFNMS:
371; CHECK-HARDFP-FULLFP16-FAST:       vfnms.f16  s2, s0, s1
372; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
373}
374
375; TODO:
376; 17. VMAXNM
377; 18. VMINNM
378
379; 19. VMLA
380define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
381entry:
382  %0 = bitcast float %a.coerce to i32
383  %tmp.0.extract.trunc = trunc i32 %0 to i16
384  %1 = bitcast i16 %tmp.0.extract.trunc to half
385  %2 = bitcast float %b.coerce to i32
386  %tmp1.0.extract.trunc = trunc i32 %2 to i16
387  %3 = bitcast i16 %tmp1.0.extract.trunc to half
388  %4 = bitcast float %c.coerce to i32
389  %tmp2.0.extract.trunc = trunc i32 %4 to i16
390  %5 = bitcast i16 %tmp2.0.extract.trunc to half
391  %mul = fmul half %1, %3
392  %add = fadd half %5, %mul
393  %6 = bitcast half %add to i16
394  %tmp4.0.insert.ext = zext i16 %6 to i32
395  %7 = bitcast i32 %tmp4.0.insert.ext to float
396  ret float %7
397
398; CHECK-LABEL:                 VMLA:
399; CHECK-HARDFP-FULLFP16:       vmla.f16  s2, s0, s1
400; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
401}
402
403; 20. VMLS
404define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
405entry:
406  %0 = bitcast float %a.coerce to i32
407  %tmp.0.extract.trunc = trunc i32 %0 to i16
408  %1 = bitcast i16 %tmp.0.extract.trunc to half
409  %2 = bitcast float %b.coerce to i32
410  %tmp1.0.extract.trunc = trunc i32 %2 to i16
411  %3 = bitcast i16 %tmp1.0.extract.trunc to half
412  %4 = bitcast float %c.coerce to i32
413  %tmp2.0.extract.trunc = trunc i32 %4 to i16
414  %5 = bitcast i16 %tmp2.0.extract.trunc to half
415  %mul = fmul half %1, %3
416  %add = fsub half %5, %mul
417  %6 = bitcast half %add to i16
418  %tmp4.0.insert.ext = zext i16 %6 to i32
419  %7 = bitcast i32 %tmp4.0.insert.ext to float
420  ret float %7
421
422; CHECK-LABEL:                 VMLS:
423; CHECK-HARDFP-FULLFP16:       vmls.f16  s2, s0, s1
424; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
425}
426
427; TODO: fix immediates.
428; 21. VMOV (between general-purpose register and half-precision register)
429; 22. VMOV (immediate)
430
431; 23. VMUL
432define float @Mul(float %a.coerce, float %b.coerce) {
433entry:
434  %0 = bitcast float %a.coerce to i32
435  %tmp.0.extract.trunc = trunc i32 %0 to i16
436  %1 = bitcast i16 %tmp.0.extract.trunc to half
437  %2 = bitcast float %b.coerce to i32
438  %tmp1.0.extract.trunc = trunc i32 %2 to i16
439  %3 = bitcast i16 %tmp1.0.extract.trunc to half
440  %add = fmul half %1, %3
441  %4 = bitcast half %add to i16
442  %tmp4.0.insert.ext = zext i16 %4 to i32
443  %5 = bitcast i32 %tmp4.0.insert.ext to float
444  ret float %5
445
446; CHECK-LABEL:  Mul:
447
448; CHECK-SOFT:  bl  __aeabi_h2f
449; CHECK-SOFT:  bl  __aeabi_h2f
450; CHECK-SOFT:  bl  __aeabi_fmul
451; CHECK-SOFT:  bl  __aeabi_f2h
452
453; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
454; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
455; CHECK-SOFTFP-VFP3:  vmul.f32
456; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
457
458; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
459; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
460; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
461; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
462; CHECK-SOFTFP-FP16:  vmul.f32      [[S0]], [[S0]], [[S2]]
463; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
464; CHECK-SOFTFP-FP16:  vmov  r0, s0
465
466; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
467; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
468; CHECK-SOFTFP-FULLFP16:       vmul.f16  [[S0]], [[S2]], [[S0]]
469; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
470
471; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
472; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
473; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
474; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
475; CHECK-HARDFP-VFP3:  vmul.f32
476; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
477; CHECK-HARDFP-VFP3:  vmov  s0, r0
478
479; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
480; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
481; CHECK-HARDFP-FP16:  vmul.f32  [[S0]], [[S0]], [[S2]]
482; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
483
484; CHECK-HARDFP-FULLFP16:       vmul.f16  s0, s0, s1
485}
486
487; 24. VNEG
488define float @Neg(float %a.coerce) {
489entry:
490  %0 = bitcast float %a.coerce to i32
491  %tmp.0.extract.trunc = trunc i32 %0 to i16
492  %1 = bitcast i16 %tmp.0.extract.trunc to half
493  %2 = fsub half -0.000000e+00, %1
494  %3 = bitcast half %2 to i16
495  %tmp4.0.insert.ext = zext i16 %3 to i32
496  %4 = bitcast i32 %tmp4.0.insert.ext to float
497  ret float %4
498
499; CHECK-LABEL:                 Neg:
500; CHECK-HARDFP-FULLFP16:       vneg.f16  s0, s0
501}
502
503; 25. VNMLA
504define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
505entry:
506  %0 = bitcast float %a.coerce to i32
507  %tmp.0.extract.trunc = trunc i32 %0 to i16
508  %1 = bitcast i16 %tmp.0.extract.trunc to half
509  %2 = bitcast float %b.coerce to i32
510  %tmp1.0.extract.trunc = trunc i32 %2 to i16
511  %3 = bitcast i16 %tmp1.0.extract.trunc to half
512  %4 = bitcast float %c.coerce to i32
513  %tmp2.0.extract.trunc = trunc i32 %4 to i16
514  %5 = bitcast i16 %tmp2.0.extract.trunc to half
515  %add = fmul half %1, %3
516  %add2 = fsub half -0.000000e+00, %add
517  %add3 = fsub half %add2, %5
518  %6 = bitcast half %add3 to i16
519  %tmp4.0.insert.ext = zext i16 %6 to i32
520  %7 = bitcast i32 %tmp4.0.insert.ext to float
521  ret float %7
522
523; CHECK-LABEL:            VNMLA:
524; CHECK-HARDFP-FULLFP16:  vnmla.f16 s2, s0, s1
525; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
526}
527
528; 26. VNMLS
529define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
530entry:
531  %0 = bitcast float %a.coerce to i32
532  %tmp.0.extract.trunc = trunc i32 %0 to i16
533  %1 = bitcast i16 %tmp.0.extract.trunc to half
534  %2 = bitcast float %b.coerce to i32
535  %tmp1.0.extract.trunc = trunc i32 %2 to i16
536  %3 = bitcast i16 %tmp1.0.extract.trunc to half
537  %4 = bitcast float %c.coerce to i32
538  %tmp2.0.extract.trunc = trunc i32 %4 to i16
539  %5 = bitcast i16 %tmp2.0.extract.trunc to half
540  %add = fmul half %1, %3
541  %add2 = fsub half %add, %5
542  %6 = bitcast half %add2 to i16
543  %tmp4.0.insert.ext = zext i16 %6 to i32
544  %7 = bitcast i32 %tmp4.0.insert.ext to float
545  ret float %7
546
547; CHECK-LABEL:            VNMLS:
548; CHECK-HARDFP-FULLFP16:  vnmls.f16 s2, s0, s1
549; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
550}
551
552; 27. VNMUL
553define float @NMul(float %a.coerce, float %b.coerce) {
554entry:
555  %0 = bitcast float %a.coerce to i32
556  %tmp.0.extract.trunc = trunc i32 %0 to i16
557  %1 = bitcast i16 %tmp.0.extract.trunc to half
558  %2 = bitcast float %b.coerce to i32
559  %tmp1.0.extract.trunc = trunc i32 %2 to i16
560  %3 = bitcast i16 %tmp1.0.extract.trunc to half
561  %add = fmul half %1, %3
562  %add2 = fsub half -0.0, %add
563  %4 = bitcast half %add2 to i16
564  %tmp4.0.insert.ext = zext i16 %4 to i32
565  %5 = bitcast i32 %tmp4.0.insert.ext to float
566  ret float %5
567
568; CHECK-LABEL:                 NMul:
569; CHECK-HARDFP-FULLFP16:       vnmul.f16  s0, s0, s1
570}
571
572; 28. VRINTA
573; 29. VRINTM
574; 30. VRINTN
575; 31. VRINTP
576; 32. VRINTR
577; 33. VRINTX
578; 34. VRINTZ
579; 35. VSELEQ
580; 36. VSELGE
581; 37. VSELGT
582; 38. VSELVS
583; 39. VSQRT
584
585; 40. VSUB
586define float @Sub(float %a.coerce, float %b.coerce) {
587entry:
588  %0 = bitcast float %a.coerce to i32
589  %tmp.0.extract.trunc = trunc i32 %0 to i16
590  %1 = bitcast i16 %tmp.0.extract.trunc to half
591  %2 = bitcast float %b.coerce to i32
592  %tmp1.0.extract.trunc = trunc i32 %2 to i16
593  %3 = bitcast i16 %tmp1.0.extract.trunc to half
594  %add = fsub half %1, %3
595  %4 = bitcast half %add to i16
596  %tmp4.0.insert.ext = zext i16 %4 to i32
597  %5 = bitcast i32 %tmp4.0.insert.ext to float
598  ret float %5
599
600; CHECK-LABEL:  Sub:
601
602; CHECK-SOFT:  bl  __aeabi_h2f
603; CHECK-SOFT:  bl  __aeabi_h2f
604; CHECK-SOFT:  bl  __aeabi_fsub
605; CHECK-SOFT:  bl  __aeabi_f2h
606
607; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
608; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
609; CHECK-SOFTFP-VFP3:  vsub.f32
610; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
611
612; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
613; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
614; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
615; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
616; CHECK-SOFTFP-FP16:  vsub.f32      [[S0]], [[S0]], [[S2]]
617; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
618; CHECK-SOFTFP-FP16:  vmov  r0, s0
619
620; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
621; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
622; CHECK-SOFTFP-FULLFP16:       vsub.f16  [[S0]], [[S2]], [[S0]]
623; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
624
625; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
626; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
627; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
628; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
629; CHECK-HARDFP-VFP3:  vsub.f32
630; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
631; CHECK-HARDFP-VFP3:  vmov  s0, r0
632
633; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
634; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
635; CHECK-HARDFP-FP16:  vsub.f32  [[S0]], [[S0]], [[S2]]
636; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
637
638; CHECK-HARDFP-FULLFP16:       vsub.f16  s0, s0, s1
639}
640