xref: /llvm-project/llvm/test/CodeGen/ARM/fp16-instructions.ll (revision 21542cd6f4c62004dfa24b56766d9cd135eb98cd)
1; SOFT:
2; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
3; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft   | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
4
5; SOFTFP:
6; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
7; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-A32
8; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
9
10; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
11; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FP16,CHECK-SOFTFP-FP16-T32
12; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-FULLFP16
13
14; Test fast-isel
15; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
16; RUN: llc < %s -mtriple=thumbv7-none-eabi -mattr=+fullfp16 -O0 | FileCheck %s --check-prefixes=CHECK-SPILL-RELOAD
17
18; HARD:
19; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
20; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
21; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
22
23; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-VFP3
24; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FP16
25; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16
26
27; FP-CONTRACT=FAST
28; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
29; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
30
31; TODO: we can't pass half-precision arguments as "half" types yet. We do
32; that for the time being by passing "float %f.coerce" and the necessary
33; bitconverts/truncates. But when we can pass half types, we do want to use
34; and test that here.
35
36define float @RetValBug(float %A.coerce) {
37entry:
38  ret float undef
39; Check thatLowerReturn can handle undef nodes (i.e. nodes which do not have
40; any operands) when FullFP16 is enabled.
41;
42; CHECK-LABEL:            RetValBug:
43; CHECK-HARDFP-FULLFP16:  {{.*}} lr
44}
45
46; 2. VADD
47define float @Add(float %a.coerce, float %b.coerce) {
48entry:
49  %0 = bitcast float %a.coerce to i32
50  %tmp.0.extract.trunc = trunc i32 %0 to i16
51  %1 = bitcast i16 %tmp.0.extract.trunc to half
52  %2 = bitcast float %b.coerce to i32
53  %tmp1.0.extract.trunc = trunc i32 %2 to i16
54  %3 = bitcast i16 %tmp1.0.extract.trunc to half
55  %add = fadd half %1, %3
56  %4 = bitcast half %add to i16
57  %tmp4.0.insert.ext = zext i16 %4 to i32
58  %5 = bitcast i32 %tmp4.0.insert.ext to float
59  ret float %5
60
61; CHECK-LABEL: Add:
62
63; CHECK-SOFT:  bl  __aeabi_h2f
64; CHECK-SOFT:  bl  __aeabi_h2f
65; CHECK-SOFT:  bl  __aeabi_fadd
66; CHECK-SOFT:  bl  __aeabi_f2h
67
68; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
69; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
70; CHECK-SOFTFP-VFP3:  vadd.f32
71; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
72
73; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
74; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
75; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
76; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
77; CHECK-SOFTFP-FP16:  vadd.f32      [[S0]], [[S0]], [[S2]]
78; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
79; CHECK-SOFTFP-FP16:  vmov  r0, s0
80
81; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
82; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
83; CHECK-SOFTFP-FULLFP16:       vadd.f16  [[S0]], [[S2]], [[S0]]
84; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
85
86; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
87; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
88; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
89; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
90; CHECK-HARDFP-VFP3:  vadd.f32
91; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
92; CHECK-HARDFP-VFP3:  vmov  s0, r0
93
94; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
95; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
96; CHECK-HARDFP-FP16:  vadd.f32  [[S0]], [[S0]], [[S2]]
97; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
98
99; CHECK-HARDFP-FULLFP16:       vadd.f16  s0, s0, s1
100}
101
102; 3. VCMP
103define zeroext i1 @VCMP1(float %F.coerce, float %G.coerce) {
104entry:
105  %0 = bitcast float %F.coerce to i32
106  %tmp.0.extract.trunc = trunc i32 %0 to i16
107  %1 = bitcast i16 %tmp.0.extract.trunc to half
108  %2 = bitcast float %G.coerce to i32
109  %tmp1.0.extract.trunc = trunc i32 %2 to i16
110  %3 = bitcast i16 %tmp1.0.extract.trunc to half
111  %cmp = fcmp une half %1, %3
112  ret i1 %cmp
113
114; CHECK-LABEL:            VCMP1:
115
116; CHECK-SOFT:             bl  __aeabi_fcmpeq
117
118; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
119; CHECK-SOFTFP-VFP3:      bl  __aeabi_h2f
120; CHECK-SOFTFP-VFP3:      vcmp.f32 s{{.}}, s{{.}}
121
122; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
123; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 s{{.}}, s{{.}}
124; CHECK-SOFTFP-FP16:      vcmp.f32 s{{.}}, s{{.}}
125
126; CHECK-SOFTFP-FULLFP16:  vmov.f16  [[S2:s[0-9]]], r0
127; CHECK-SOFTFP-FULLFP16:  vmov.f16 [[S0:s[0-9]]], r1
128; CHECK-SOFTFP-FULLFP16:  vcmp.f16 [[S2]], [[S0]]
129
130; CHECK-HARDFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r0
131; CHECK-HARDFP-FULLFP16-NOT:  vmov.f16  s{{.}}, r1
132; CHECK-HARDFP-FULLFP16:      vcmp.f16  s0, s1
133}
134
135; Check VCMPZH
136define zeroext i1 @VCMP2(float %F.coerce) {
137entry:
138  %0 = bitcast float %F.coerce to i32
139  %tmp.0.extract.trunc = trunc i32 %0 to i16
140  %1 = bitcast i16 %tmp.0.extract.trunc to half
141  %cmp = fcmp une half %1, 0.000000e+00
142  ret i1 %cmp
143
144; CHECK-LABEL:             VCMP2:
145
146; CHECK-SOFT:              bl __aeabi_fcmpeq
147; CHECK-SOFTFP-FP16:       vcmp.f32        s0, #0
148; CHECK-SOFTFP-FULLFP16:   vcmp.f16        s0, #0
149; CHECK-HARDFP-FULLFP16:   vcmp.f16        s0, #0
150}
151
152; 4. VCMPE
153define i32 @VCMPE1(float %F.coerce) {
154entry:
155  %0 = bitcast float %F.coerce to i32
156  %tmp.0.extract.trunc = trunc i32 %0 to i16
157  %1 = bitcast i16 %tmp.0.extract.trunc to half
158  %tmp = fcmp olt half %1, 0.000000e+00
159  %tmp1 = zext i1 %tmp to i32
160  ret i32 %tmp1
161
162; CHECK-LABEL:             VCMPE1:
163
164; CHECK-SOFT:              bl  __aeabi_fcmplt
165; CHECK-SOFTFP-FP16:       vcmpe.f32 s0, #0
166; CHECK-SOFTFP-FULLFP16:   vcmpe.f16 s0, #0
167; CHECK-HARDFP-FULLFP16:   vcmpe.f16 s0, #0
168}
169
170define i32 @VCMPE2(float %F.coerce, float %G.coerce) {
171entry:
172  %0 = bitcast float %F.coerce to i32
173  %tmp.0.extract.trunc = trunc i32 %0 to i16
174  %1 = bitcast i16 %tmp.0.extract.trunc to half
175  %2 = bitcast float %G.coerce to i32
176  %tmp.1.extract.trunc = trunc i32 %2 to i16
177  %3 = bitcast i16 %tmp.1.extract.trunc to half
178  %tmp = fcmp olt half %1, %3
179  %tmp1 = zext i1 %tmp to i32
180  ret i32 %tmp1
181
182; CHECK-LABEL:  VCMPE2:
183
184; CHECK-SOFT:              bl  __aeabi_fcmplt
185; CHECK-SOFTFP-FP16:       vcmpe.f32 s{{.}}, s{{.}}
186; CHECK-SOFTFP-FULLFP16:   vcmpe.f16 s{{.}}, s{{.}}
187; CHECK-HARDFP-FULLFP16:   vcmpe.f16 s{{.}}, s{{.}}
188}
189
190; Test lowering of BR_CC
191define hidden i32 @VCMPBRCC() {
192entry:
193  %f = alloca half, align 2
194  br label %for.cond
195
196for.cond:
197  %0 = load half, half* %f, align 2
198  %cmp = fcmp nnan ninf nsz ole half %0, 0xH6800
199  br i1 %cmp, label %for.body, label %for.end
200
201for.body:
202  ret i32 1
203
204for.end:
205  ret i32 0
206
207; CHECK-LABEL:            VCMPBRCC:
208
209; CHECK-SOFT:             bl  __aeabi_fcmpgt
210; CHECK-SOFT:             cmp r0, #0
211
212; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
213; CHECK-SOFTFP-FP16:      vcmpe.f32 [[S2]], s0
214; CHECK-SOFTFP-FP16:      vmrs  APSR_nzcv, fpscr
215
216; CHECK-SOFTFP-FULLFP16:  vcmpe.f16 s{{.}}, s{{.}}
217; CHECK-SOFTFP-FULLFP16:  vmrs  APSR_nzcv, fpscr
218}
219
220; 5. VCVT (between floating-point and fixed-point)
221; Only assembly/disassembly support
222
223; 6. VCVT (between floating-point and integer, both directions)
224define i32 @fptosi(i32 %A.coerce) {
225entry:
226  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
227  %0 = bitcast i16 %tmp.0.extract.trunc to half
228  %conv = fptosi half %0 to i32
229  ret i32 %conv
230
231; CHECK-LABEL:                 fptosi:
232
233; CHECK-HARDFP-FULLFP16:       vmov.f16  s0, r0
234; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.s32.f16  s0, s0
235; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
236}
237
238define i32 @fptoui(i32 %A.coerce) {
239entry:
240  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
241  %0 = bitcast i16 %tmp.0.extract.trunc to half
242  %conv = fptoui half %0 to i32
243  ret i32 %conv
244
245; CHECK-HARDFP-FULLFP16:       vcvt.u32.f16  s0, s0
246; CHECK-HARDFP-FULLFP16-NEXT:  vmov  r0, s0
247}
248
249define float @UintToH(i32 %a, i32 %b) {
250entry:
251  %0 = uitofp i32 %a to half
252  %1 = bitcast half %0 to i16
253  %tmp0.insert.ext = zext i16 %1 to i32
254  %2 = bitcast i32 %tmp0.insert.ext to float
255  ret float %2
256
257; CHECK-LABEL:                 UintToH:
258
259; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
260; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.u32  s0, s0
261}
262
263define float @SintToH(i32 %a, i32 %b) {
264entry:
265  %0 = sitofp i32 %a to half
266  %1 = bitcast half %0 to i16
267  %tmp0.insert.ext = zext i16 %1 to i32
268  %2 = bitcast i32 %tmp0.insert.ext to float
269  ret float %2
270
271; CHECK-LABEL:                 SintToH:
272
273; CHECK-HARDFP-FULLFP16:       vmov  s0, r0
274; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.s32  s0, s0
275}
276
277define i32 @f2h(float %f) {
278entry:
279  %conv = fptrunc float %f to half
280  %0 = bitcast half %conv to i16
281  %tmp.0.insert.ext = zext i16 %0 to i32
282  ret i32 %tmp.0.insert.ext
283
284; CHECK-LABEL:            f2h:
285; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f32 s0, s0
286}
287
288define float @h2f(i32 %h.coerce) {
289entry:
290  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
291  %0 = bitcast i16 %tmp.0.extract.trunc to half
292  %conv = fpext half %0 to float
293  ret float %conv
294
295; CHECK-LABEL:            h2f:
296; CHECK-HARDFP-FULLFP16:  vcvtb.f32.f16 s0, s0
297}
298
299
300define double @h2d(i32 %h.coerce) {
301entry:
302  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
303  %0 = bitcast i16 %tmp.0.extract.trunc to half
304  %conv = fpext half %0 to double
305  ret double %conv
306
307; CHECK-LABEL:            h2d:
308; CHECK-HARDFP-FULLFP16:  vcvtb.f64.f16 d{{.*}}, s{{.}}
309}
310
311define i32 @d2h(double %d) {
312entry:
313  %conv = fptrunc double %d to half
314  %0 = bitcast half %conv to i16
315  %tmp.0.insert.ext = zext i16 %0 to i32
316  ret i32 %tmp.0.insert.ext
317
318; CHECK-LABEL:            d2h:
319; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f64 s0, d{{.*}}
320}
321
322; TODO:
323; 7.  VCVTA
324; 8.  VCVTM
325; 9.  VCVTN
326; 10. VCVTP
327; 11. VCVTR
328
329; 12. VDIV
330define float @Div(float %a.coerce, float %b.coerce) {
331entry:
332  %0 = bitcast float %a.coerce to i32
333  %tmp.0.extract.trunc = trunc i32 %0 to i16
334  %1 = bitcast i16 %tmp.0.extract.trunc to half
335  %2 = bitcast float %b.coerce to i32
336  %tmp1.0.extract.trunc = trunc i32 %2 to i16
337  %3 = bitcast i16 %tmp1.0.extract.trunc to half
338  %add = fdiv half %1, %3
339  %4 = bitcast half %add to i16
340  %tmp4.0.insert.ext = zext i16 %4 to i32
341  %5 = bitcast i32 %tmp4.0.insert.ext to float
342  ret float %5
343
344; CHECK-LABEL:  Div:
345
346; CHECK-SOFT:  bl  __aeabi_h2f
347; CHECK-SOFT:  bl  __aeabi_h2f
348; CHECK-SOFT:  bl  __aeabi_fdiv
349; CHECK-SOFT:  bl  __aeabi_f2h
350
351; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
352; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
353; CHECK-SOFTFP-VFP3:  vdiv.f32
354; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
355
356; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
357; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
358; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
359; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
360; CHECK-SOFTFP-FP16:  vdiv.f32      [[S0]], [[S0]], [[S2]]
361; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
362; CHECK-SOFTFP-FP16:  vmov  r0, s0
363
364; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
365; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
366; CHECK-SOFTFP-FULLFP16:       vdiv.f16  [[S0]], [[S2]], [[S0]]
367; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
368
369; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
370; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
371; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
372; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
373; CHECK-HARDFP-VFP3:  vdiv.f32
374; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
375; CHECK-HARDFP-VFP3:  vmov  s0, r0
376
377; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
378; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
379; CHECK-HARDFP-FP16:  vdiv.f32  [[S0]], [[S0]], [[S2]]
380; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
381
382; CHECK-HARDFP-FULLFP16:       vdiv.f16  s0, s0, s1
383}
384
385; 13. VFMA
386define float @VFMA(float %a.coerce, float %b.coerce, float %c.coerce) {
387entry:
388  %0 = bitcast float %a.coerce to i32
389  %tmp.0.extract.trunc = trunc i32 %0 to i16
390  %1 = bitcast i16 %tmp.0.extract.trunc to half
391  %2 = bitcast float %b.coerce to i32
392  %tmp1.0.extract.trunc = trunc i32 %2 to i16
393  %3 = bitcast i16 %tmp1.0.extract.trunc to half
394  %4 = bitcast float %c.coerce to i32
395  %tmp2.0.extract.trunc = trunc i32 %4 to i16
396  %5 = bitcast i16 %tmp2.0.extract.trunc to half
397  %mul = fmul half %1, %3
398  %add = fadd half %mul, %5
399  %6 = bitcast half %add to i16
400  %tmp4.0.insert.ext = zext i16 %6 to i32
401  %7 = bitcast i32 %tmp4.0.insert.ext to float
402  ret float %7
403
404; CHECK-LABEL:                      VFMA:
405; CHECK-HARDFP-FULLFP16-FAST:       vfma.f16  s2, s0, s1
406; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
407}
408
409; 14. VFMS
410define float @VFMS(float %a.coerce, float %b.coerce, float %c.coerce) {
411entry:
412  %0 = bitcast float %a.coerce to i32
413  %tmp.0.extract.trunc = trunc i32 %0 to i16
414  %1 = bitcast i16 %tmp.0.extract.trunc to half
415  %2 = bitcast float %b.coerce to i32
416  %tmp1.0.extract.trunc = trunc i32 %2 to i16
417  %3 = bitcast i16 %tmp1.0.extract.trunc to half
418  %4 = bitcast float %c.coerce to i32
419  %tmp2.0.extract.trunc = trunc i32 %4 to i16
420  %5 = bitcast i16 %tmp2.0.extract.trunc to half
421  %mul = fmul half %1, %3
422  %sub = fsub half %5, %mul
423  %6 = bitcast half %sub to i16
424  %tmp4.0.insert.ext = zext i16 %6 to i32
425  %7 = bitcast i32 %tmp4.0.insert.ext to float
426  ret float %7
427
428; CHECK-LABEL:                      VFMS:
429; CHECK-HARDFP-FULLFP16-FAST:       vfms.f16  s2, s0, s1
430; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
431}
432
433; 15. VFNMA
434define float @VFNMA(float %a.coerce, float %b.coerce, float %c.coerce) {
435entry:
436  %0 = bitcast float %a.coerce to i32
437  %tmp.0.extract.trunc = trunc i32 %0 to i16
438  %1 = bitcast i16 %tmp.0.extract.trunc to half
439  %2 = bitcast float %b.coerce to i32
440  %tmp1.0.extract.trunc = trunc i32 %2 to i16
441  %3 = bitcast i16 %tmp1.0.extract.trunc to half
442  %4 = bitcast float %c.coerce to i32
443  %tmp2.0.extract.trunc = trunc i32 %4 to i16
444  %5 = bitcast i16 %tmp2.0.extract.trunc to half
445  %mul = fmul half %1, %3
446  %sub = fsub half -0.0, %mul
447  %sub2 = fsub half %sub, %5
448  %6 = bitcast half %sub2 to i16
449  %tmp4.0.insert.ext = zext i16 %6 to i32
450  %7 = bitcast i32 %tmp4.0.insert.ext to float
451  ret float %7
452
453; CHECK-LABEL:                      VFNMA:
454; CHECK-HARDFP-FULLFP16-FAST:       vfnma.f16  s2, s0, s1
455; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
456}
457
458; 16. VFNMS
459define float @VFNMS(float %a.coerce, float %b.coerce, float %c.coerce) {
460entry:
461  %0 = bitcast float %a.coerce to i32
462  %tmp.0.extract.trunc = trunc i32 %0 to i16
463  %1 = bitcast i16 %tmp.0.extract.trunc to half
464  %2 = bitcast float %b.coerce to i32
465  %tmp1.0.extract.trunc = trunc i32 %2 to i16
466  %3 = bitcast i16 %tmp1.0.extract.trunc to half
467  %4 = bitcast float %c.coerce to i32
468  %tmp2.0.extract.trunc = trunc i32 %4 to i16
469  %5 = bitcast i16 %tmp2.0.extract.trunc to half
470  %mul = fmul half %1, %3
471  %sub2 = fsub half %mul, %5
472  %6 = bitcast half %sub2 to i16
473  %tmp4.0.insert.ext = zext i16 %6 to i32
474  %7 = bitcast i32 %tmp4.0.insert.ext to float
475  ret float %7
476
477; CHECK-LABEL:                      VFNMS:
478; CHECK-HARDFP-FULLFP16-FAST:       vfnms.f16  s2, s0, s1
479; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
480}
481
482; 17. VMAXNM
483; 18. VMINNM
484; Tested in fp16-vminmaxnm.ll and fp16-vminmaxnm-safe.ll
485
486; 19. VMLA
487define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
488entry:
489  %0 = bitcast float %a.coerce to i32
490  %tmp.0.extract.trunc = trunc i32 %0 to i16
491  %1 = bitcast i16 %tmp.0.extract.trunc to half
492  %2 = bitcast float %b.coerce to i32
493  %tmp1.0.extract.trunc = trunc i32 %2 to i16
494  %3 = bitcast i16 %tmp1.0.extract.trunc to half
495  %4 = bitcast float %c.coerce to i32
496  %tmp2.0.extract.trunc = trunc i32 %4 to i16
497  %5 = bitcast i16 %tmp2.0.extract.trunc to half
498  %mul = fmul half %1, %3
499  %add = fadd half %5, %mul
500  %6 = bitcast half %add to i16
501  %tmp4.0.insert.ext = zext i16 %6 to i32
502  %7 = bitcast i32 %tmp4.0.insert.ext to float
503  ret float %7
504
505; CHECK-LABEL:                 VMLA:
506; CHECK-HARDFP-FULLFP16:       vmla.f16  s2, s0, s1
507; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
508}
509
510; 20. VMLS
511define float @VMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
512entry:
513  %0 = bitcast float %a.coerce to i32
514  %tmp.0.extract.trunc = trunc i32 %0 to i16
515  %1 = bitcast i16 %tmp.0.extract.trunc to half
516  %2 = bitcast float %b.coerce to i32
517  %tmp1.0.extract.trunc = trunc i32 %2 to i16
518  %3 = bitcast i16 %tmp1.0.extract.trunc to half
519  %4 = bitcast float %c.coerce to i32
520  %tmp2.0.extract.trunc = trunc i32 %4 to i16
521  %5 = bitcast i16 %tmp2.0.extract.trunc to half
522  %mul = fmul half %1, %3
523  %add = fsub half %5, %mul
524  %6 = bitcast half %add to i16
525  %tmp4.0.insert.ext = zext i16 %6 to i32
526  %7 = bitcast i32 %tmp4.0.insert.ext to float
527  ret float %7
528
529; CHECK-LABEL:                 VMLS:
530; CHECK-HARDFP-FULLFP16:       vmls.f16  s2, s0, s1
531; CHECK-HARDFP-FULLFP16-NEXT:  vmov.f32  s0, s2
532}
533
534; TODO: fix immediates.
535; 21. VMOV (between general-purpose register and half-precision register)
536
537; 22. VMOV (immediate)
538define i32 @movi(i32 %a.coerce) {
539entry:
540  %tmp.0.extract.trunc = trunc i32 %a.coerce to i16
541  %0 = bitcast i16 %tmp.0.extract.trunc to half
542  %add = fadd half %0, 0xHC000
543  %1 = bitcast half %add to i16
544  %tmp2.0.insert.ext = zext i16 %1 to i32
545  ret i32 %tmp2.0.insert.ext
546
547; CHECK-LABEL:            movi:
548; CHECK-HARDFP-FULLFP16:  vmov.f16  s0, #-2.000000e+00
549}
550
551; 23. VMUL
552define float @Mul(float %a.coerce, float %b.coerce) {
553entry:
554  %0 = bitcast float %a.coerce to i32
555  %tmp.0.extract.trunc = trunc i32 %0 to i16
556  %1 = bitcast i16 %tmp.0.extract.trunc to half
557  %2 = bitcast float %b.coerce to i32
558  %tmp1.0.extract.trunc = trunc i32 %2 to i16
559  %3 = bitcast i16 %tmp1.0.extract.trunc to half
560  %add = fmul half %1, %3
561  %4 = bitcast half %add to i16
562  %tmp4.0.insert.ext = zext i16 %4 to i32
563  %5 = bitcast i32 %tmp4.0.insert.ext to float
564  ret float %5
565
566; CHECK-LABEL:  Mul:
567
568; CHECK-SOFT:  bl  __aeabi_h2f
569; CHECK-SOFT:  bl  __aeabi_h2f
570; CHECK-SOFT:  bl  __aeabi_fmul
571; CHECK-SOFT:  bl  __aeabi_f2h
572
573; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
574; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
575; CHECK-SOFTFP-VFP3:  vmul.f32
576; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
577
578; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
579; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
580; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
581; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
582; CHECK-SOFTFP-FP16:  vmul.f32      [[S0]], [[S0]], [[S2]]
583; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
584; CHECK-SOFTFP-FP16:  vmov  r0, s0
585
586; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
587; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
588; CHECK-SOFTFP-FULLFP16:       vmul.f16  [[S0]], [[S2]], [[S0]]
589; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
590
591; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
592; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
593; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
594; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
595; CHECK-HARDFP-VFP3:  vmul.f32
596; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
597; CHECK-HARDFP-VFP3:  vmov  s0, r0
598
599; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
600; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
601; CHECK-HARDFP-FP16:  vmul.f32  [[S0]], [[S0]], [[S2]]
602; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
603
604; CHECK-HARDFP-FULLFP16:       vmul.f16  s0, s0, s1
605}
606
607; 24. VNEG
608define float @Neg(float %a.coerce) {
609entry:
610  %0 = bitcast float %a.coerce to i32
611  %tmp.0.extract.trunc = trunc i32 %0 to i16
612  %1 = bitcast i16 %tmp.0.extract.trunc to half
613  %2 = fsub half -0.000000e+00, %1
614  %3 = bitcast half %2 to i16
615  %tmp4.0.insert.ext = zext i16 %3 to i32
616  %4 = bitcast i32 %tmp4.0.insert.ext to float
617  ret float %4
618
619; CHECK-LABEL:                 Neg:
620; CHECK-HARDFP-FULLFP16:       vneg.f16  s0, s0
621}
622
623; 25. VNMLA
624define float @VNMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
625entry:
626  %0 = bitcast float %a.coerce to i32
627  %tmp.0.extract.trunc = trunc i32 %0 to i16
628  %1 = bitcast i16 %tmp.0.extract.trunc to half
629  %2 = bitcast float %b.coerce to i32
630  %tmp1.0.extract.trunc = trunc i32 %2 to i16
631  %3 = bitcast i16 %tmp1.0.extract.trunc to half
632  %4 = bitcast float %c.coerce to i32
633  %tmp2.0.extract.trunc = trunc i32 %4 to i16
634  %5 = bitcast i16 %tmp2.0.extract.trunc to half
635  %add = fmul half %1, %3
636  %add2 = fsub half -0.000000e+00, %add
637  %add3 = fsub half %add2, %5
638  %6 = bitcast half %add3 to i16
639  %tmp4.0.insert.ext = zext i16 %6 to i32
640  %7 = bitcast i32 %tmp4.0.insert.ext to float
641  ret float %7
642
643; CHECK-LABEL:            VNMLA:
644; CHECK-HARDFP-FULLFP16:  vnmla.f16 s2, s0, s1
645; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
646}
647
648; 26. VNMLS
649define float @VNMLS(float %a.coerce, float %b.coerce, float %c.coerce) {
650entry:
651  %0 = bitcast float %a.coerce to i32
652  %tmp.0.extract.trunc = trunc i32 %0 to i16
653  %1 = bitcast i16 %tmp.0.extract.trunc to half
654  %2 = bitcast float %b.coerce to i32
655  %tmp1.0.extract.trunc = trunc i32 %2 to i16
656  %3 = bitcast i16 %tmp1.0.extract.trunc to half
657  %4 = bitcast float %c.coerce to i32
658  %tmp2.0.extract.trunc = trunc i32 %4 to i16
659  %5 = bitcast i16 %tmp2.0.extract.trunc to half
660  %add = fmul half %1, %3
661  %add2 = fsub half %add, %5
662  %6 = bitcast half %add2 to i16
663  %tmp4.0.insert.ext = zext i16 %6 to i32
664  %7 = bitcast i32 %tmp4.0.insert.ext to float
665  ret float %7
666
667; CHECK-LABEL:            VNMLS:
668; CHECK-HARDFP-FULLFP16:  vnmls.f16 s2, s0, s1
669; CHECK-HARDFP-FULLFP16:  vmov.f32  s0, s2
670}
671
672; 27. VNMUL
673define float @NMul(float %a.coerce, float %b.coerce) {
674entry:
675  %0 = bitcast float %a.coerce to i32
676  %tmp.0.extract.trunc = trunc i32 %0 to i16
677  %1 = bitcast i16 %tmp.0.extract.trunc to half
678  %2 = bitcast float %b.coerce to i32
679  %tmp1.0.extract.trunc = trunc i32 %2 to i16
680  %3 = bitcast i16 %tmp1.0.extract.trunc to half
681  %add = fmul half %1, %3
682  %add2 = fsub half -0.0, %add
683  %4 = bitcast half %add2 to i16
684  %tmp4.0.insert.ext = zext i16 %4 to i32
685  %5 = bitcast i32 %tmp4.0.insert.ext to float
686  ret float %5
687
688; CHECK-LABEL:                 NMul:
689; CHECK-HARDFP-FULLFP16:       vnmul.f16  s0, s0, s1
690}
691
692; 35. VSELEQ
693define half @select_cc1(half* %a0)  {
694  %1 = load half, half* %a0
695  %2 = fcmp nsz oeq half %1, 0xH0001
696  %3 = select i1 %2, half 0xHC000, half 0xH0002
697  ret half %3
698
699; CHECK-LABEL:                 select_cc1:
700
701; CHECK-HARDFP-FULLFP16:       vcmp.f16 s6, s0
702; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
703; CHECK-HARDFP-FULLFP16:       vseleq.f16  s0, s{{.}}, s{{.}}
704
705; CHECK-SOFTFP-FP16-A32:       vcmp.f32 s6, s0
706; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
707; CHECK-SOFTFP-FP16-A32-NEXT:  vmoveq.f32 s{{.}}, s{{.}}
708
709; CHECK-SOFTFP-FP16-T32:       vcmp.f32 s6, s0
710; CHECK-SOFTFP-FP16-T32:       vmrs APSR_nzcv, fpscr
711; CHECK-SOFTFP-FP16-T32:       it eq
712; CHECK-SOFTFP-FP16-T32:       vmoveq.f32 s{{.}}, s{{.}}
713}
714
715; FIXME: more tests need to be added for VSELGE and VSELGT.
716; That is, more combinations of immediate operands that can or can't
717; be encoded as an FP16 immediate need to be added here.
718;
719; 36. VSELGE
720define half @select_cc_ge1(half* %a0)  {
721  %1 = load half, half* %a0
722  %2 = fcmp nsz oge half %1, 0xH0001
723  %3 = select i1 %2, half 0xHC000, half 0xH0002
724  ret half %3
725
726; CHECK-LABEL:                 select_cc_ge1:
727
728; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s6, s0
729; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
730; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
731
732; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
733; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
734; CHECK-SOFTFP-FP16-A32-NEXT:  vmovge.f32 s{{.}}, s{{.}}
735
736; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
737; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
738; CHECK-SOFTFP-FP16-T32-NEXT:  it ge
739; CHECK-SOFTFP-FP16-T32-NEXT:  vmovge.f32 s{{.}}, s{{.}}
740}
741
742define half @select_cc_ge2(half* %a0)  {
743  %1 = load half, half* %a0
744  %2 = fcmp nsz ole half %1, 0xH0001
745  %3 = select i1 %2, half 0xHC000, half 0xH0002
746  ret half %3
747
748; CHECK-LABEL:                 select_cc_ge2:
749
750; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s0, s6
751; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
752; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
753
754; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
755; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
756; CHECK-SOFTFP-FP16-A32-NEXT:  vmovls.f32 s{{.}}, s{{.}}
757
758; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
759; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
760; CHECK-SOFTFP-FP16-T32-NEXT:  it ls
761; CHECK-SOFTFP-FP16-T32-NEXT:  vmovls.f32 s{{.}}, s{{.}}
762}
763
764define half @select_cc_ge3(half* %a0)  {
765  %1 = load half, half* %a0
766  %2 = fcmp nsz ugt half %1, 0xH0001
767  %3 = select i1 %2, half 0xHC000, half 0xH0002
768  ret half %3
769
770; CHECK-LABEL:                 select_cc_ge3:
771
772; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s0, s6
773; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
774; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
775
776; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
777; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
778; CHECK-SOFTFP-FP16-A32-NEXT:  vmovhi.f32 s{{.}}, s{{.}}
779
780; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
781; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
782; CHECK-SOFTFP-FP16-T32-NEXT:  it hi
783; CHECK-SOFTFP-FP16-T32-NEXT:  vmovhi.f32 s{{.}}, s{{.}}
784}
785
786define half @select_cc_ge4(half* %a0)  {
787  %1 = load half, half* %a0
788  %2 = fcmp nsz ult half %1, 0xH0001
789  %3 = select i1 %2, half 0xHC000, half 0xH0002
790  ret half %3
791
792; CHECK-LABEL:                 select_cc_ge4:
793
794; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s6, s0
795; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
796; CHECK-HARDFP-FULLFP16-NEXT:  vselge.f16 s0, s{{.}}, s{{.}}
797
798; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
799; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
800; CHECK-SOFTFP-FP16-A32-NEXT:  vmovlt.f32 s{{.}}, s{{.}}
801
802; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
803; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
804; CHECK-SOFTFP-FP16-T32-NEXT:  it lt
805; CHECK-SOFTFP-FP16-T32-NEXT:  vmovlt.f32 s{{.}}, s{{.}}
806}
807
808; 37. VSELGT
809define half @select_cc_gt1(half* %a0)  {
810  %1 = load half, half* %a0
811  %2 = fcmp nsz ogt half %1, 0xH0001
812  %3 = select i1 %2, half 0xHC000, half 0xH0002
813  ret half %3
814
815; CHECK-LABEL:                 select_cc_gt1:
816
817; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s6, s0
818; CHECK-HARDFP-FULLFP16-NEXT:  vmrs APSR_nzcv, fpscr
819; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
820
821; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
822; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
823; CHECK-SOFTFP-FP16-A32-NEXT:  vmovgt.f32 s{{.}}, s{{.}}
824
825; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
826; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
827; CHECK-SOFTFP-FP16-T32-NEXT:  it gt
828; CHECK-SOFTFP-FP16-T32-NEXT:  vmovgt.f32 s{{.}}, s{{.}}
829}
830
831define half @select_cc_gt2(half* %a0)  {
832  %1 = load half, half* %a0
833  %2 = fcmp nsz uge half %1, 0xH0001
834  %3 = select i1 %2, half 0xHC000, half 0xH0002
835  ret half %3
836
837; CHECK-LABEL:                 select_cc_gt2:
838
839; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s0, s6
840; CHECK-HARDFP-FULLFP16-NEXT:  vmrs  APSR_nzcv, fpscr
841; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
842
843; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
844; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
845; CHECK-SOFTFP-FP16-A32-NEXT:  vmovpl.f32 s{{.}}, s{{.}}
846
847; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
848; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
849; CHECK-SOFTFP-FP16-T32-NEXT:  it pl
850; CHECK-SOFTFP-FP16-T32-NEXT:  vmovpl.f32 s{{.}}, s{{.}}
851}
852
853define half @select_cc_gt3(half* %a0)  {
854  %1 = load half, half* %a0
855  %2 = fcmp nsz ule half %1, 0xH0001
856  %3 = select i1 %2, half 0xHC000, half 0xH0002
857  ret half %3
858
859; CHECK-LABEL:                 select_cc_gt3:
860
861; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s6, s0
862; CHECK-HARDFP-FULLFP16-NEXT:  vmrs  APSR_nzcv, fpscr
863; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
864
865; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
866; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
867; CHECK-SOFTFP-FP16-A32-NEXT:  vmovle.f32 s{{.}}, s{{.}}
868
869; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
870; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
871; CHECK-SOFTFP-FP16-T32-NEXT:  it le
872; CHECK-SOFTFP-FP16-T32-NEXT:  vmovle.f32 s{{.}}, s{{.}}
873}
874
875define half @select_cc_gt4(half* %a0)  {
876  %1 = load half, half* %a0
877  %2 = fcmp nsz olt half %1, 0xH0001
878  %3 = select i1 %2, half 0xHC000, half 0xH0002
879  ret half %3
880
881; CHECK-LABEL:                 select_cc_gt4:
882
883; CHECK-HARDFP-FULLFP16:       vcmpe.f16 s0, s6
884; CHECK-HARDFP-FULLFP16-NEXT:  vmrs  APSR_nzcv, fpscr
885; CHECK-HARDFP-FULLFP16-NEXT:  vselgt.f16  s0, s{{.}}, s{{.}}
886
887; CHECK-SOFTFP-FP16-A32:       vcmpe.f32 s6, s0
888; CHECK-SOFTFP-FP16-A32-NEXT:  vmrs APSR_nzcv, fpscr
889; CHECK-SOFTFP-FP16-A32-NEXT:  vmovmi.f32 s{{.}}, s{{.}}
890
891; CHECK-SOFTFP-FP16-T32:       vcmpe.f32 s6, s0
892; CHECK-SOFTFP-FP16-T32-NEXT:  vmrs APSR_nzcv, fpscr
893; CHECK-SOFTFP-FP16-T32-NEXT:  it mi
894; CHECK-SOFTFP-FP16-T32-NEXT:  vmovmi.f32 s{{.}}, s{{.}}
895}
896
897; 38. VSELVS
898define float @select_cc4(float %a.coerce) {
899entry:
900  %0 = bitcast float %a.coerce to i32
901  %tmp.0.extract.trunc = trunc i32 %0 to i16
902  %1 = bitcast i16 %tmp.0.extract.trunc to half
903
904  %2 = fcmp nsz ueq half %1, 0xH0001
905  %3 = select i1 %2, half 0xHC000, half 0xH0002
906
907  %4 = bitcast half %3 to i16
908  %tmp4.0.insert.ext = zext i16 %4 to i32
909  %5 = bitcast i32 %tmp4.0.insert.ext to float
910  ret float %5
911
912; CHECK-LABEL:                 select_cc4:
913
914; CHECK-HARDFP-FULLFP16:       vldr.16	[[S2:s[0-9]]], .LCPI{{.*}}
915; CHECK-HARDFP-FULLFP16:       vldr.16	[[S4:s[0-9]]], .LCPI{{.*}}
916; CHECK-HARDFP-FULLFP16:       vmov.f16 [[S6:s[0-9]]], #-2.000000e+00
917; CHECK-HARDFP-FULLFP16:       vcmp.f16	s0, [[S2]]
918; CHECK-HARDFP-FULLFP16-NEXT:  vmrs	APSR_nzcv, fpscr
919; CHECK-HARDFP-FULLFP16-NEXT:  vseleq.f16	[[S0:s[0-9]]], [[S6]], [[S4]]
920; CHECK-HARDFP-FULLFP16-NEXT:  vselvs.f16	s0, [[S6]], [[S0]]
921
922; CHECK-SOFTFP-FP16-A32:       vmov	[[S6:s[0-9]]], r0
923; CHECK-SOFTFP-FP16-A32:       vldr	s0, .LCP{{.*}}
924; CHECK-SOFTFP-FP16-A32:       vcvtb.f32.f16	[[S6]], [[S6]]
925; CHECK-SOFTFP-FP16-A32:       vmov.f32	[[S2:s[0-9]]], #-2.000000e+00
926; CHECK-SOFTFP-FP16-A32:       vcmp.f32	[[S6]], s0
927; CHECK-SOFTFP-FP16-A32:       vldr	[[S4:s[0-9]]], .LCPI{{.*}}
928; CHECK-SOFTFP-FP16-A32:       vmrs	APSR_nzcv, fpscr
929; CHECK-SOFTFP-FP16-A32:       vmoveq.f32	[[S4]], [[S2]]
930; CHECK-SOFTFP-FP16-A32-NEXT:  vmovvs.f32	[[S4]], [[S2]]
931; CHECK-SOFTFP-FP16-A32-NEXT:  vcvtb.f16.f32 s0, [[S4]]
932
933; CHECK-SOFTFP-FP16-T32:       vmov	[[S6:s[0-9]]], r0
934; CHECK-SOFTFP-FP16-T32:       vldr	s0, .LCP{{.*}}
935; CHECK-SOFTFP-FP16-T32:       vcvtb.f32.f16	[[S6]], [[S6]]
936; CHECK-SOFTFP-FP16-T32:       vldr	[[S4:s[0-9]]], .LCPI{{.*}}
937; CHECK-SOFTFP-FP16-T32:       vcmp.f32	[[S6]], s0
938; CHECK-SOFTFP-FP16-T32:       vmov.f32	[[S2:s[0-9]]], #-2.000000e+00
939; CHECK-SOFTFP-FP16-T32:       vmrs	APSR_nzcv, fpscr
940; CHECK-SOFTFP-FP16-T32:       it eq
941; CHECK-SOFTFP-FP16-T32:       vmoveq.f32	[[S4]], [[S2]]
942; CHECK-SOFTFP-FP16-T32:       it vs
943; CHECK-SOFTFP-FP16-T32-NEXT:  vmovvs.f32	[[S4]], [[S2]]
944; CHECK-SOFTFP-FP16-T32-NEXT:  vcvtb.f16.f32 s0, [[S4]]
945}
946
947; 40. VSUB
948define float @Sub(float %a.coerce, float %b.coerce) {
949entry:
950  %0 = bitcast float %a.coerce to i32
951  %tmp.0.extract.trunc = trunc i32 %0 to i16
952  %1 = bitcast i16 %tmp.0.extract.trunc to half
953  %2 = bitcast float %b.coerce to i32
954  %tmp1.0.extract.trunc = trunc i32 %2 to i16
955  %3 = bitcast i16 %tmp1.0.extract.trunc to half
956  %add = fsub half %1, %3
957  %4 = bitcast half %add to i16
958  %tmp4.0.insert.ext = zext i16 %4 to i32
959  %5 = bitcast i32 %tmp4.0.insert.ext to float
960  ret float %5
961
962; CHECK-LABEL:  Sub:
963
964; CHECK-SOFT:  bl  __aeabi_h2f
965; CHECK-SOFT:  bl  __aeabi_h2f
966; CHECK-SOFT:  bl  __aeabi_fsub
967; CHECK-SOFT:  bl  __aeabi_f2h
968
969; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
970; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
971; CHECK-SOFTFP-VFP3:  vsub.f32
972; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
973
974; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
975; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
976; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
977; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
978; CHECK-SOFTFP-FP16:  vsub.f32      [[S0]], [[S0]], [[S2]]
979; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
980; CHECK-SOFTFP-FP16:  vmov  r0, s0
981
982; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S0:s[0-9]]], r1
983; CHECK-SOFTFP-FULLFP16:       vmov.f16  [[S2:s[0-9]]], r0
984; CHECK-SOFTFP-FULLFP16:       vsub.f16  [[S0]], [[S2]], [[S0]]
985; CHECK-SOFTFP-FULLFP16-NEXT:  vmov.f16  r0, s0
986
987; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
988; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
989; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
990; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
991; CHECK-HARDFP-VFP3:  vsub.f32
992; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
993; CHECK-HARDFP-VFP3:  vmov  s0, r0
994
995; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
996; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
997; CHECK-HARDFP-FP16:  vsub.f32  [[S0]], [[S0]], [[S2]]
998; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
999
1000; CHECK-HARDFP-FULLFP16:       vsub.f16  s0, s0, s1
1001}
1002
1003; Check for VSTRH with a FCONSTH, this checks that addressing mode
1004; AddrMode5FP16 is supported.
1005define i32 @ThumbAddrMode5FP16(i32 %A.coerce) {
1006entry:
1007  %S = alloca half, align 2
1008  %tmp.0.extract.trunc = trunc i32 %A.coerce to i16
1009  %0 = bitcast i16 %tmp.0.extract.trunc to half
1010  %S.0.S.0..sroa_cast = bitcast half* %S to i8*
1011  store volatile half 0xH3C00, half* %S, align 2
1012  %S.0.S.0. = load volatile half, half* %S, align 2
1013  %add = fadd half %S.0.S.0., %0
1014  %1 = bitcast half %add to i16
1015  %tmp2.0.insert.ext = zext i16 %1 to i32
1016  ret i32 %tmp2.0.insert.ext
1017
1018; CHECK-LABEL:            ThumbAddrMode5FP16
1019
1020; CHECK-SOFTFP-FULLFP16:  vmov.f16    [[S0:s[0-9]]], #1.000000e+00
1021; CHECK-SOFTFP-FULLFP16:  vstr.16     [[S0]], [sp, #{{.}}]
1022; CHECK-SOFTFP-FULLFP16:  vmov.f16    [[S0_2:s[0-9]]], r0
1023; CHECK-SOFTFP-FULLFP16:  vldr.16     [[S2:s[0-9]]], [sp, #{{.}}]
1024; CHECK-SOFTFP-FULLFP16:  vadd.f16    s{{.}}, [[S2]], [[S0_2]]
1025}
1026
1027; Test function calls to check store/load reg to/from stack
1028define i32 @fn1() {
1029entry:
1030  %coerce = alloca half, align 2
1031  %tmp2 = alloca i32, align 4
1032  store half 0xH7C00, half* %coerce, align 2
1033  %0 = load i32, i32* %tmp2, align 4
1034  %call = call i32 bitcast (i32 (...)* @fn2 to i32 (i32)*)(i32 %0)
1035  store half 0xH7C00, half* %coerce, align 2
1036  %1 = load i32, i32* %tmp2, align 4
1037  %call3 = call i32 bitcast (i32 (...)* @fn3 to i32 (i32)*)(i32 %1)
1038  ret i32 %call3
1039
1040; CHECK-SPILL-RELOAD-LABEL: fn1:
1041; CHECK-SPILL-RELOAD:       vstr.16 s0, [sp, #{{.}}]  @ 2-byte Spill
1042; CHECK-SPILL-RELOAD:  bl  fn2
1043; CHECK-SPILL-RELOAD-NEXT:  vldr.16 s0, [sp, #{{.}}]  @ 2-byte Reload
1044}
1045
1046declare dso_local i32 @fn2(...)
1047declare dso_local i32 @fn3(...)
1048