xref: /llvm-project/llvm/test/CodeGen/ARM/fp16-instructions.ll (revision 4d5c40492a1b14d1800434496d2cb83c942a5db8)
1; SOFT:
2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft   | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
4
5; SOFTFP:
6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
9
10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16
12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
14; Test fast-isel
15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
17
18; HARD:
19; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
22
23; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
26
27; FP-CONTRACT=FAST
28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
30
31
32define float @RetValBug(float %A.coerce) {
33entry:
34  ret float undef
35; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
36; any operands) when FullFP16 is enabled.
37;
38; CHECK-LABEL:            RetValBug:
39; CHECK-HARDFP-FULLFP16:  {{.*}} lr
40}
41
42; 1. VABS: TODO
43
44; 2. VADD
45define float @Add(float %a.coerce, float %b.coerce) {
46entry:
47  %0 = bitcast float %a.coerce to i32
48  %tmp.0.extract.trunc = trunc i32 %0 to i16
49  %1 = bitcast i16 %tmp.0.extract.trunc to half
50  %2 = bitcast float %b.coerce to i32
51  %tmp1.0.extract.trunc = trunc i32 %2 to i16
52  %3 = bitcast i16 %tmp1.0.extract.trunc to half
53  %add = fadd half %1, %3
54  %4 = bitcast half %add to i16
55  %tmp4.0.insert.ext = zext i16 %4 to i32
56  %5 = bitcast i32 %tmp4.0.insert.ext to float
57  ret float %5
58
59; CHECK-LABEL: Add:
60
61; CHECK-SOFT:  bl  __aeabi_h2f
62; CHECK-SOFT:  bl  __aeabi_h2f
63; CHECK-SOFT:  bl  __aeabi_fadd
64; CHECK-SOFT:  bl  __aeabi_f2h
65
66; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
67; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
68; CHECK-SOFTFP-VFP3:  vadd.f32
69; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
70
71; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
72; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
73; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
74; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
75; CHECK-SOFTFP-FP16:  vadd.f32      [[S0]], [[S0]], [[S2]]
76; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
77; CHECK-SOFTFP-FP16:  vmov  r0, s0
78
79; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
80; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
81; CHECK-SOFTFP-FULLFP16:       vadd.f16  [[S0]], [[S2]], [[S0]]
82; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
83
84; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
85; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
86; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
87; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
88; CHECK-HARDFP-VFP3:  vadd.f32
89; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
90; CHECK-HARDFP-VFP3:  vmov  s0, r0
91
92; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
93; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
94; CHECK-HARDFP-FP16:  vadd.f32  [[S0]], [[S0]], [[S2]]
95; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
96
97; CHECK-HARDFP-FULLFP16:       vadd.f16  s0, s0, s1
98}
99
100; 3. VCMP
101define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
102entry:
103  %0 = bitcast float %F.coerce to i32
104  %tmp.0.extract.trunc = trunc i32 %0 to i16
105  %1 = bitcast i16 %tmp.0.extract.trunc to half
106  %2 = bitcast float %G.coerce to i32
107  %tmp1.0.extract.trunc = trunc i32 %2 to i16
108  %3 = bitcast i16 %tmp1.0.extract.trunc to half
109  %cmp = fcmp une half %1, %3
110  ret i1 %cmp
111
112; CHECK-LABEL:            VCMP1:
113
114; CHECK-SOFT:             bl  __aeabi_fcmpeq
115
116; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
117; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
118; CHECK-SOFTFP-VFP3:      vcmp.f32 s{{.}}, s{{.}}
119
120; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
121; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
122; CHECK-SOFTFP-FP16:      vcmp.f32 s{{.}}, s{{.}}
123
124; CHECK-SOFTFP-FULLFP16:  vmov.f16  [[S2:s[0-9]]], r0
125; CHECK-SOFTFP-FULLFP16:  vmov.f16 [[S0:s[0-9]]], r1
126; CHECK-SOFTFP-FULLFP16:  vcmp.f16 [[S2]], [[S0]]
127
128; CHECK-HARDFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r0
129; CHECK-HARDFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r1
130; CHECK-HARDFP-FULLFP16:      vcmp.f16  s0, s1
131}
132
133; Check VCMPZH
134define zeroext i1 @VCMP2(float %F.coerce) {
135entry:
136  %0 = bitcast float %F.coerce to i32
137  %tmp.0.extract.trunc = trunc i32 %0 to i16
138  %1 = bitcast i16 %tmp.0.extract.trunc to half
139  %cmp = fcmp une half %1, 0.000000e+00
140  ret i1 %cmp
141
142; CHECK-LABEL:             VCMP2:
143
144; CHECK-SOFT:              bl __aeabi_fcmpeq
145; CHECK-SOFTFP-FP16:       vcmp.f32        s0, #0
146; CHECK-SOFTFP-FULLFP16:   vcmp.f16        s0, #0
147; CHECK-HARDFP-FULLFP16:   vcmp.f16        s0, #0
148}
149
150; 4. VCMPE
151define i32 @VCMPE1(float %F.coerce) {
152entry:
153  %0 = bitcast float %F.coerce to i32
154  %tmp.0.extract.trunc = trunc i32 %0 to i16
155  %1 = bitcast i16 %tmp.0.extract.trunc to half
156  %tmp = fcmp olt half %1, 0.000000e+00
157  %tmp1 = zext i1 %tmp to i32
158  ret i32 %tmp1
159
160; CHECK-LABEL:             VCMPE1:
161
162; CHECK-SOFT:              bl  __aeabi_fcmplt
163; CHECK-SOFTFP-FP16:       vcmpe.f32 s0, #0
164; CHECK-SOFTFP-FULLFP16:   vcmpe.f16 s0, #0
165; CHECK-HARDFP-FULLFP16:   vcmpe.f16 s0, #0
166}
167
168define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
169entry:
170  %0 = bitcast float %F.coerce to i32
171  %tmp.0.extract.trunc = trunc i32 %0 to i16
172  %1 = bitcast i16 %tmp.0.extract.trunc to half
173  %2 = bitcast float %G.coerce to i32
174  %tmp.1.extract.trunc = trunc i32 %2 to i16
175  %3 = bitcast i16 %tmp.1.extract.trunc to half
176  %tmp = fcmp olt half %1, %3
177  %tmp1 = zext i1 %tmp to i32
178  ret i32 %tmp1
179
180; CHECK-LABEL:  VCMPE2:
181
182; CHECK-SOFT:              bl  __aeabi_fcmplt
183; CHECK-SOFTFP-FP16:       vcmpe.f32 s{{.}}, s{{.}}
184; CHECK-SOFTFP-FULLFP16:   vcmpe.f16 s{{.}}, s{{.}}
185; CHECK-HARDFP-FULLFP16:   vcmpe.f16 s{{.}}, s{{.}}
186}
187
188; Test lowering of BR_CC
189define hidden i32 @VCMPBRCC() {
190entry:
191  %f = alloca half, align 2
192  br label %for.cond
193
194for.cond:
195  %0 = load half, half* %f, align 2
196  %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
197  br i1 %cmp, label %for.body, label %for.end
198
199for.body:
200  ret i32 1
201
202for.end:
203  ret i32 0
204
205; CHECK-LABEL:            VCMPBRCC:
206
207; CHECK-SOFT:             bl  __aeabi_fcmple
208; CHECK-SOFT:             cmp r0, #0
209
210; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
211; CHECK-SOFTFP-FP16:      vcmpe.f32 [[S2]], s0
212; CHECK-SOFTFP-FP16:      vmrs  APSR_nzcv, fpscr
213
214; CHECK-SOFTFP-FULLFP16:  vcmpe.f16 s{{.}}, s{{.}}
215; CHECK-SOFTFP-FULLFP16:  vmrs  APSR_nzcv, fpscr
216}
217
218; 5. VCVT (between floating-point and fixed-point)
219; Only assembly/disassembly support
220
221; 6. VCVT (between floating-point and integer, both directions)
222define i32 @fptosi(i32 %A.coerce) {
223entry:
224  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
225  %0 = bitcast i16 %tmp.0.extract.trunc to half
226  %conv = fptosi half %0 to i32
227  ret i32 %conv
228
229; CHECK-LABEL:                 fptosi:
230
231; CHECK-HARDFP-FULLFP16:       vmov.f16  s0, r0
232; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.s32.f16  s0, s0
233; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
234}
235
236define i32 @fptoui(i32 %A.coerce) {
237entry:
238  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
239  %0 = bitcast i16 %tmp.0.extract.trunc to half
240  %conv = fptoui half %0 to i32
241  ret i32 %conv
242
243; CHECK-HARDFP-FULLFP16:       vcvt.u32.f16  s0, s0
244; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
245}
246
247define float @UintToH(i32 %a, i32 %b) {
248entry:
249  %0 = uitofp i32 %a to half
250  %1 = bitcast half %0 to i16
251  %tmp0.insert.ext = zext i16 %1 to i32
252  %2 = bitcast i32 %tmp0.insert.ext to float
253  ret float %2
254
255; CHECK-LABEL:                 UintToH:
256
257; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
258; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.u32  s0, s0
259}
260
261define float @SintToH(i32 %a, i32 %b) {
262entry:
263  %0 = sitofp i32 %a to half
264  %1 = bitcast half %0 to i16
265  %tmp0.insert.ext = zext i16 %1 to i32
266  %2 = bitcast i32 %tmp0.insert.ext to float
267  ret float %2
268
269; CHECK-LABEL:                 SintToH:
270
271; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
272; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.s32  s0, s0
273}
274
275define i32 @f2h(float %f) {
276entry:
277  %conv = fptrunc float %f to half
278  %0 = bitcast half %conv to i16
279  %tmp.0.insert.ext = zext i16 %0 to i32
280  ret i32 %tmp.0.insert.ext
281
282; CHECK-LABEL:            f2h:
283; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f32 s0, s0
284}
285
286define float @h2f(i32 %h.coerce) {
287entry:
288  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
289  %0 = bitcast i16 %tmp.0.extract.trunc to half
290  %conv = fpext half %0 to float
291  ret float %conv
292
293; CHECK-LABEL:            h2f:
294; CHECK-HARDFP-FULLFP16:  vcvtb.f32.f16 s0, s0
295}
296
297
298define double @h2d(i32 %h.coerce) {
299entry:
300  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
301  %0 = bitcast i16 %tmp.0.extract.trunc to half
302  %conv = fpext half %0 to double
303  ret double %conv
304
305; CHECK-LABEL:            h2d:
306; CHECK-HARDFP-FULLFP16:  vcvtb.f64.f16 d{{.*}}, s{{.}}
307}
308
309define i32 @d2h(double %d) {
310entry:
311  %conv = fptrunc double %d to half
312  %0 = bitcast half %conv to i16
313  %tmp.0.insert.ext = zext i16 %0 to i32
314  ret i32 %tmp.0.insert.ext
315
316; CHECK-LABEL:            d2h:
317; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f64 s0, d{{.*}}
318}
319
320; TODO:
321; 7.  VCVTA
322; 8.  VCVTM
323; 9.  VCVTN
324; 10. VCVTP
325; 11. VCVTR
326
327; 12. VDIV
328define float @Div(float %a.coerce, float %b.coerce) {
329entry:
330  %0 = bitcast float %a.coerce to i32
331  %tmp.0.extract.trunc = trunc i32 %0 to i16
332  %1 = bitcast i16 %tmp.0.extract.trunc to half
333  %2 = bitcast float %b.coerce to i32
334  %tmp1.0.extract.trunc = trunc i32 %2 to i16
335  %3 = bitcast i16 %tmp1.0.extract.trunc to half
336  %add = fdiv half %1, %3
337  %4 = bitcast half %add to i16
338  %tmp4.0.insert.ext = zext i16 %4 to i32
339  %5 = bitcast i32 %tmp4.0.insert.ext to float
340  ret float %5
341
342; CHECK-LABEL:  Div:
343
344; CHECK-SOFT:  bl  __aeabi_h2f
345; CHECK-SOFT:  bl  __aeabi_h2f
346; CHECK-SOFT:  bl  __aeabi_fdiv
347; CHECK-SOFT:  bl  __aeabi_f2h
348
349; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
350; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
351; CHECK-SOFTFP-VFP3:  vdiv.f32
352; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
353
354; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
355; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
356; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
357; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
358; CHECK-SOFTFP-FP16:  vdiv.f32      [[S0]], [[S0]], [[S2]]
359; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
360; CHECK-SOFTFP-FP16:  vmov  r0, s0
361
362; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
363; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
364; CHECK-SOFTFP-FULLFP16:       vdiv.f16  [[S0]], [[S2]], [[S0]]
365; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
366
367; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
368; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
369; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
370; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
371; CHECK-HARDFP-VFP3:  vdiv.f32
372; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
373; CHECK-HARDFP-VFP3:  vmov  s0, r0
374
375; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
376; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
377; CHECK-HARDFP-FP16:  vdiv.f32  [[S0]], [[S0]], [[S2]]
378; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
379
380; CHECK-HARDFP-FULLFP16:       vdiv.f16  s0, s0, s1
381}
382
383; 13. VFMA
384define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
385entry:
386  %0 = bitcast float %a.coerce to i32
387  %tmp.0.extract.trunc = trunc i32 %0 to i16
388  %1 = bitcast i16 %tmp.0.extract.trunc to half
389  %2 = bitcast float %b.coerce to i32
390  %tmp1.0.extract.trunc = trunc i32 %2 to i16
391  %3 = bitcast i16 %tmp1.0.extract.trunc to half
392  %4 = bitcast float %c.coerce to i32
393  %tmp2.0.extract.trunc = trunc i32 %4 to i16
394  %5 = bitcast i16 %tmp2.0.extract.trunc to half
395  %mul = fmul half %1, %3
396  %add = fadd half %mul, %5
397  %6 = bitcast half %add to i16
398  %tmp4.0.insert.ext = zext i16 %6 to i32
399  %7 = bitcast i32 %tmp4.0.insert.ext to float
400  ret float %7
401
402; CHECK-LABEL:                      VFMA:
403; CHECK-HARDFP-FULLFP16-FAST:       vfma.f16  s2, s0, s1
404; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
405}
406
407; 14. VFMS
408define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
409entry:
410  %0 = bitcast float %a.coerce to i32
411  %tmp.0.extract.trunc = trunc i32 %0 to i16
412  %1 = bitcast i16 %tmp.0.extract.trunc to half
413  %2 = bitcast float %b.coerce to i32
414  %tmp1.0.extract.trunc = trunc i32 %2 to i16
415  %3 = bitcast i16 %tmp1.0.extract.trunc to half
416  %4 = bitcast float %c.coerce to i32
417  %tmp2.0.extract.trunc = trunc i32 %4 to i16
418  %5 = bitcast i16 %tmp2.0.extract.trunc to half
419  %mul = fmul half %1, %3
420  %sub = fsub half %5, %mul
421  %6 = bitcast half %sub to i16
422  %tmp4.0.insert.ext = zext i16 %6 to i32
423  %7 = bitcast i32 %tmp4.0.insert.ext to float
424  ret float %7
425
426; CHECK-LABEL:                      VFMS:
427; CHECK-HARDFP-FULLFP16-FAST:       vfms.f16  s2, s0, s1
428; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
429}
430
431; 15. VFNMA
432define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
433entry:
434  %0 = bitcast float %a.coerce to i32
435  %tmp.0.extract.trunc = trunc i32 %0 to i16
436  %1 = bitcast i16 %tmp.0.extract.trunc to half
437  %2 = bitcast float %b.coerce to i32
438  %tmp1.0.extract.trunc = trunc i32 %2 to i16
439  %3 = bitcast i16 %tmp1.0.extract.trunc to half
440  %4 = bitcast float %c.coerce to i32
441  %tmp2.0.extract.trunc = trunc i32 %4 to i16
442  %5 = bitcast i16 %tmp2.0.extract.trunc to half
443  %mul = fmul half %1, %3
444  %sub = fsub half -0.0, %mul
445  %sub2 = fsub half %sub, %5
446  %6 = bitcast half %sub2 to i16
447  %tmp4.0.insert.ext = zext i16 %6 to i32
448  %7 = bitcast i32 %tmp4.0.insert.ext to float
449  ret float %7
450
451; CHECK-LABEL:                      VFNMA:
452; CHECK-HARDFP-FULLFP16-FAST:       vfnma.f16  s2, s0, s1
453; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
454}
455
456; 16. VFNMS
457define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
458entry:
459  %0 = bitcast float %a.coerce to i32
460  %tmp.0.extract.trunc = trunc i32 %0 to i16
461  %1 = bitcast i16 %tmp.0.extract.trunc to half
462  %2 = bitcast float %b.coerce to i32
463  %tmp1.0.extract.trunc = trunc i32 %2 to i16
464  %3 = bitcast i16 %tmp1.0.extract.trunc to half
465  %4 = bitcast float %c.coerce to i32
466  %tmp2.0.extract.trunc = trunc i32 %4 to i16
467  %5 = bitcast i16 %tmp2.0.extract.trunc to half
468  %mul = fmul half %1, %3
469  %sub2 = fsub half %mul, %5
470  %6 = bitcast half %sub2 to i16
471  %tmp4.0.insert.ext = zext i16 %6 to i32
472  %7 = bitcast i32 %tmp4.0.insert.ext to float
473  ret float %7
474
475; CHECK-LABEL:                      VFNMS:
476; CHECK-HARDFP-FULLFP16-FAST:       vfnms.f16  s2, s0, s1
477; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
478}
479
480; TODO:
481; 17. VMAXNM
482; 18. VMINNM
483
484; 19. VMLA
485define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
486entry:
487  %0 = bitcast float %a.coerce to i32
488  %tmp.0.extract.trunc = trunc i32 %0 to i16
489  %1 = bitcast i16 %tmp.0.extract.trunc to half
490  %2 = bitcast float %b.coerce to i32
491  %tmp1.0.extract.trunc = trunc i32 %2 to i16
492  %3 = bitcast i16 %tmp1.0.extract.trunc to half
493  %4 = bitcast float %c.coerce to i32
494  %tmp2.0.extract.trunc = trunc i32 %4 to i16
495  %5 = bitcast i16 %tmp2.0.extract.trunc to half
496  %mul = fmul half %1, %3
497  %add = fadd half %5, %mul
498  %6 = bitcast half %add to i16
499  %tmp4.0.insert.ext = zext i16 %6 to i32
500  %7 = bitcast i32 %tmp4.0.insert.ext to float
501  ret float %7
502
503; CHECK-LABEL:                 VMLA:
504; CHECK-HARDFP-FULLFP16:       vmla.f16  s2, s0, s1
505; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
506}
507
508; 20. VMLS
509define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
510entry:
511  %0 = bitcast float %a.coerce to i32
512  %tmp.0.extract.trunc = trunc i32 %0 to i16
513  %1 = bitcast i16 %tmp.0.extract.trunc to half
514  %2 = bitcast float %b.coerce to i32
515  %tmp1.0.extract.trunc = trunc i32 %2 to i16
516  %3 = bitcast i16 %tmp1.0.extract.trunc to half
517  %4 = bitcast float %c.coerce to i32
518  %tmp2.0.extract.trunc = trunc i32 %4 to i16
519  %5 = bitcast i16 %tmp2.0.extract.trunc to half
520  %mul = fmul half %1, %3
521  %add = fsub half %5, %mul
522  %6 = bitcast half %add to i16
523  %tmp4.0.insert.ext = zext i16 %6 to i32
524  %7 = bitcast i32 %tmp4.0.insert.ext to float
525  ret float %7
526
527; CHECK-LABEL:                 VMLS:
528; CHECK-HARDFP-FULLFP16:       vmls.f16  s2, s0, s1
529; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
530}
531
532; TODO: fix immediates.
533; 21. VMOV (between general-purpose register and half-precision register)
534
535; 22. VMOV (immediate)
536define i32 @movi(i32 %a.coerce) {
537entry:
538  %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
539  %0 = bitcast i16 %tmp.0.extract.trunc to half
540  %add = fadd half %0, 0xHC000
541  %1 = bitcast half %add to i16
542  %tmp2.0.insert.ext = zext i16 %1 to i32
543  ret i32 %tmp2.0.insert.ext
544
545; CHECK-LABEL:            movi:
546; CHECK-HARDFP-FULLFP16:  vmov.f16  s0, #-2.000000e+00
547}
548
549; 23. VMUL
550define float @Mul(float %a.coerce, float %b.coerce) {
551entry:
552  %0 = bitcast float %a.coerce to i32
553  %tmp.0.extract.trunc = trunc i32 %0 to i16
554  %1 = bitcast i16 %tmp.0.extract.trunc to half
555  %2 = bitcast float %b.coerce to i32
556  %tmp1.0.extract.trunc = trunc i32 %2 to i16
557  %3 = bitcast i16 %tmp1.0.extract.trunc to half
558  %add = fmul half %1, %3
559  %4 = bitcast half %add to i16
560  %tmp4.0.insert.ext = zext i16 %4 to i32
561  %5 = bitcast i32 %tmp4.0.insert.ext to float
562  ret float %5
563
564; CHECK-LABEL:  Mul:
565
566; CHECK-SOFT:  bl  __aeabi_h2f
567; CHECK-SOFT:  bl  __aeabi_h2f
568; CHECK-SOFT:  bl  __aeabi_fmul
569; CHECK-SOFT:  bl  __aeabi_f2h
570
571; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
572; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
573; CHECK-SOFTFP-VFP3:  vmul.f32
574; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
575
576; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
577; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
578; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
579; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
580; CHECK-SOFTFP-FP16:  vmul.f32      [[S0]], [[S0]], [[S2]]
581; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
582; CHECK-SOFTFP-FP16:  vmov  r0, s0
583
584; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
585; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
586; CHECK-SOFTFP-FULLFP16:       vmul.f16  [[S0]], [[S2]], [[S0]]
587; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
588
589; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
590; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
591; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
592; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
593; CHECK-HARDFP-VFP3:  vmul.f32
594; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
595; CHECK-HARDFP-VFP3:  vmov  s0, r0
596
597; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
598; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
599; CHECK-HARDFP-FP16:  vmul.f32  [[S0]], [[S0]], [[S2]]
600; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
601
602; CHECK-HARDFP-FULLFP16:       vmul.f16  s0, s0, s1
603}
604
605; 24. VNEG
606define float @Neg(float %a.coerce) {
607entry:
608  %0 = bitcast float %a.coerce to i32
609  %tmp.0.extract.trunc = trunc i32 %0 to i16
610  %1 = bitcast i16 %tmp.0.extract.trunc to half
611  %2 = fsub half -0.000000e+00, %1
612  %3 = bitcast half %2 to i16
613  %tmp4.0.insert.ext = zext i16 %3 to i32
614  %4 = bitcast i32 %tmp4.0.insert.ext to float
615  ret float %4
616
617; CHECK-LABEL:                 Neg:
618; CHECK-HARDFP-FULLFP16:       vneg.f16  s0, s0
619}
620
621; 25. VNMLA
622define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
623entry:
624  %0 = bitcast float %a.coerce to i32
625  %tmp.0.extract.trunc = trunc i32 %0 to i16
626  %1 = bitcast i16 %tmp.0.extract.trunc to half
627  %2 = bitcast float %b.coerce to i32
628  %tmp1.0.extract.trunc = trunc i32 %2 to i16
629  %3 = bitcast i16 %tmp1.0.extract.trunc to half
630  %4 = bitcast float %c.coerce to i32
631  %tmp2.0.extract.trunc = trunc i32 %4 to i16
632  %5 = bitcast i16 %tmp2.0.extract.trunc to half
633  %add = fmul half %1, %3
634  %add2 = fsub half -0.000000e+00, %add
635  %add3 = fsub half %add2, %5
636  %6 = bitcast half %add3 to i16
637  %tmp4.0.insert.ext = zext i16 %6 to i32
638  %7 = bitcast i32 %tmp4.0.insert.ext to float
639  ret float %7
640
641; CHECK-LABEL:            VNMLA:
642; CHECK-HARDFP-FULLFP16:  vnmla.f16 s2, s0, s1
643; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
644}
645
646; 26. VNMLS
647define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
648entry:
649  %0 = bitcast float %a.coerce to i32
650  %tmp.0.extract.trunc = trunc i32 %0 to i16
651  %1 = bitcast i16 %tmp.0.extract.trunc to half
652  %2 = bitcast float %b.coerce to i32
653  %tmp1.0.extract.trunc = trunc i32 %2 to i16
654  %3 = bitcast i16 %tmp1.0.extract.trunc to half
655  %4 = bitcast float %c.coerce to i32
656  %tmp2.0.extract.trunc = trunc i32 %4 to i16
657  %5 = bitcast i16 %tmp2.0.extract.trunc to half
658  %add = fmul half %1, %3
659  %add2 = fsub half %add, %5
660  %6 = bitcast half %add2 to i16
661  %tmp4.0.insert.ext = zext i16 %6 to i32
662  %7 = bitcast i32 %tmp4.0.insert.ext to float
663  ret float %7
664
665; CHECK-LABEL:            VNMLS:
666; CHECK-HARDFP-FULLFP16:  vnmls.f16 s2, s0, s1
667; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
668}
669
670; 27. VNMUL
671define float @NMul(float %a.coerce, float %b.coerce) {
672entry:
673  %0 = bitcast float %a.coerce to i32
674  %tmp.0.extract.trunc = trunc i32 %0 to i16
675  %1 = bitcast i16 %tmp.0.extract.trunc to half
676  %2 = bitcast float %b.coerce to i32
677  %tmp1.0.extract.trunc = trunc i32 %2 to i16
678  %3 = bitcast i16 %tmp1.0.extract.trunc to half
679  %add = fmul half %1, %3
680  %add2 = fsub half -0.0, %add
681  %4 = bitcast half %add2 to i16
682  %tmp4.0.insert.ext = zext i16 %4 to i32
683  %5 = bitcast i32 %tmp4.0.insert.ext to float
684  ret float %5
685
686; CHECK-LABEL:                 NMul:
687; CHECK-HARDFP-FULLFP16:       vnmul.f16  s0, s0, s1
688}
689
690; 28. VRINTA
691; 29. VRINTM
692; 30. VRINTN
693; 31. VRINTP
694; 32. VRINTR
695; 33. VRINTX
696; 34. VRINTZ
697; 35. VSELEQ
698; 36. VSELGE
699; 37. VSELGT
700; 38. VSELVS
701; 39. VSQRT
702
703; 40. VSUB
704define float @Sub(float %a.coerce, float %b.coerce) {
705entry:
706  %0 = bitcast float %a.coerce to i32
707  %tmp.0.extract.trunc = trunc i32 %0 to i16
708  %1 = bitcast i16 %tmp.0.extract.trunc to half
709  %2 = bitcast float %b.coerce to i32
710  %tmp1.0.extract.trunc = trunc i32 %2 to i16
711  %3 = bitcast i16 %tmp1.0.extract.trunc to half
712  %add = fsub half %1, %3
713  %4 = bitcast half %add to i16
714  %tmp4.0.insert.ext = zext i16 %4 to i32
715  %5 = bitcast i32 %tmp4.0.insert.ext to float
716  ret float %5
717
718; CHECK-LABEL:  Sub:
719
720; CHECK-SOFT:  bl  __aeabi_h2f
721; CHECK-SOFT:  bl  __aeabi_h2f
722; CHECK-SOFT:  bl  __aeabi_fsub
723; CHECK-SOFT:  bl  __aeabi_f2h
724
725; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
726; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
727; CHECK-SOFTFP-VFP3:  vsub.f32
728; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
729
730; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
731; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
732; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
733; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
734; CHECK-SOFTFP-FP16:  vsub.f32      [[S0]], [[S0]], [[S2]]
735; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
736; CHECK-SOFTFP-FP16:  vmov  r0, s0
737
738; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
739; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
740; CHECK-SOFTFP-FULLFP16:       vsub.f16  [[S0]], [[S2]], [[S0]]
741; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
742
743; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
744; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
745; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
746; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
747; CHECK-HARDFP-VFP3:  vsub.f32
748; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
749; CHECK-HARDFP-VFP3:  vmov  s0, r0
750
751; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
752; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
753; CHECK-HARDFP-FP16:  vsub.f32  [[S0]], [[S0]], [[S2]]
754; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
755
756; CHECK-HARDFP-FULLFP16:       vsub.f16  s0, s0, s1
757}
758
759; Check for VSTRH with a FCONSTH, this checks that addressing mode
760; AddrMode5FP16 is supported.
761define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
762entry:
763  %S = alloca half, align 2
764  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
765  %0 = bitcast i16 %tmp.0.extract.trunc to half
766  %S.0.S.0..sroa_cast = bitcast half* %S to i8*
767  store volatile half 0xH3C00, half* %S, align 2
768  %S.0.S.0. = load volatile half, half* %S, align 2
769  %add = fadd half %S.0.S.0., %0
770  %1 = bitcast half %add to i16
771  %tmp2.0.insert.ext = zext i16 %1 to i32
772  ret i32 %tmp2.0.insert.ext
773
774; CHECK-LABEL:            ThumbAddrMode5FP16
775
776; CHECK-SOFTFP-FULLFP16:  vmov.f16    [[S0:s[0-9]]], #1.000000e+00
777; CHECK-SOFTFP-FULLFP16:  vstr.16     [[S0]], [sp, #{{.}}]
778; CHECK-SOFTFP-FULLFP16:  vmov.f16    [[S0_2:s[0-9]]], r0
779; CHECK-SOFTFP-FULLFP16:  vldr.16     [[S2:s[0-9]]], [sp, #{{.}}]
780; CHECK-SOFTFP-FULLFP16:  vadd.f16    s{{.}}, [[S2]], [[S0_2]]
781}
782
783; Test function calls to check store/load reg to/from stack
784define i32 @fn1() {
785entry:
786  %coerce = alloca half, align 2
787  %tmp2 = alloca i32, align 4
788  store half 0xH7C00, half* %coerce, align 2
789  %0 = load i32, i32* %tmp2, align 4
790  %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
791  store half 0xH7C00, half* %coerce, align 2
792  %1 = load i32, i32* %tmp2, align 4
793  %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
794  ret i32 %call3
795
796; CHECK-SPILL-RELOAD-LABEL: fn1:
797; CHECK-SPILL-RELOAD:       vstr.16 s0, [sp, #{{.}}]  @ 2-byte Spill
798; CHECK-SPILL-RELOAD-NEXT:  bl  fn2
799; CHECK-SPILL-RELOAD-NEXT:  vldr.16 s0, [sp, #{{.}}]  @ 2-byte Reload
800}
801
802declare dso_local i32 @fn2(...)
803declare dso_local i32 @fn3(...)
804