xref: /llvm-project/llvm/test/CodeGen/ARM/vdiv_combine.ll (revision a284bdb31146160352da905a888da738f2661b50)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
3
4; Test signed conversion.
5define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind {
6; CHECK-LABEL: t1:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vcvt.f32.s32 d0, d0, #3
9; CHECK-NEXT:    bx lr
10entry:
11  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
12  %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
13  ret <2 x float> %div.i
14}
15
16; Test unsigned conversion.
17define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind {
18; CHECK-LABEL: t2:
19; CHECK:       @ %bb.0: @ %entry
20; CHECK-NEXT:    vcvt.f32.u32 d0, d0, #3
21; CHECK-NEXT:    bx lr
22entry:
23  %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
24  %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
25  ret <2 x float> %div.i
26}
27
28; Test which should not fold due to non-power of 2.
29define arm_aapcs_vfpcc <2 x float> @t3(<2 x i32> %vecinit2.i) nounwind {
30; CHECK-LABEL: t3:
31; CHECK:       @ %bb.0: @ %entry
32; CHECK-NEXT:    vcvt.f32.s32 d2, d0
33; CHECK-NEXT:    vldr s2, LCPI2_0
34; CHECK-NEXT:    vdiv.f32 s1, s5, s2
35; CHECK-NEXT:    vdiv.f32 s0, s4, s2
36; CHECK-NEXT:    bx lr
37; CHECK-NEXT:    .p2align 2
38; CHECK-NEXT:  @ %bb.1:
39; CHECK-NEXT:    .data_region
40; CHECK-NEXT:  LCPI2_0:
41; CHECK-NEXT:    .long 0x40d9999a @ float 6.80000019
42; CHECK-NEXT:    .end_data_region
43entry:
44  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
45  %div.i = fdiv <2 x float> %vcvt.i, <float 0x401B333340000000, float 0x401B333340000000>
46  ret <2 x float> %div.i
47}
48
49; Test which should not fold due to power of 2 out of range.
50define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind {
51; CHECK-LABEL: t4:
52; CHECK:       @ %bb.0: @ %entry
53; CHECK-NEXT:    vcvt.f32.s32 d16, d0
54; CHECK-NEXT:    vmov.i32 d17, #0x2f000000
55; CHECK-NEXT:    vmul.f32 d0, d16, d17
56; CHECK-NEXT:    bx lr
57entry:
58  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
59  %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
60  ret <2 x float> %div.i
61}
62
63; Test case where const is max power of 2 (i.e., 2^32).
64define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind {
65; CHECK-LABEL: t5:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vcvt.f32.s32 d0, d0, #32
68; CHECK-NEXT:    bx lr
69entry:
70  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
71  %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
72  ret <2 x float> %div.i
73}
74
75; Test quadword.
76define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind {
77; CHECK-LABEL: t6:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #3
80; CHECK-NEXT:    bx lr
81entry:
82  %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
83  %div.i = fdiv <4 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
84  ret <4 x float> %div.i
85}
86
87define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
88; CHECK-LABEL: fix_unsigned_i16_to_float:
89; CHECK:       @ %bb.0:
90; CHECK-NEXT:    vmovl.u16 q8, d0
91; CHECK-NEXT:    vcvt.f32.u32 q0, q8, #1
92; CHECK-NEXT:    bx lr
93  %conv = uitofp <4 x i16> %in to <4 x float>
94  %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
95  ret <4 x float> %shift
96}
97
98define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
99; CHECK-LABEL: fix_signed_i16_to_float:
100; CHECK:       @ %bb.0:
101; CHECK-NEXT:    vmovl.s16 q8, d0
102; CHECK-NEXT:    vcvt.f32.s32 q0, q8, #1
103; CHECK-NEXT:    bx lr
104  %conv = sitofp <4 x i16> %in to <4 x float>
105  %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
106  ret <4 x float> %shift
107}
108
109define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) {
110; CHECK-LABEL: fix_i64_to_float:
111; CHECK:       @ %bb.0:
112; CHECK-NEXT:    push {lr}
113; CHECK-NEXT:    vpush {d8, d9}
114; CHECK-NEXT:    vorr q4, q0, q0
115; CHECK-NEXT:    vmov r0, r1, d9
116; CHECK-NEXT:    bl ___floatundisf
117; CHECK-NEXT:    vmov r2, r1, d8
118; CHECK-NEXT:    vmov s19, r0
119; CHECK-NEXT:    vmov.i32 d8, #0x3f000000
120; CHECK-NEXT:    mov r0, r2
121; CHECK-NEXT:    bl ___floatundisf
122; CHECK-NEXT:    vmov s18, r0
123; CHECK-NEXT:    vmul.f32 d0, d9, d8
124; CHECK-NEXT:    vpop {d8, d9}
125; CHECK-NEXT:    pop {lr}
126; CHECK-NEXT:    bx lr
127  %conv = uitofp <2 x i64> %in to <2 x float>
128  %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
129  ret <2 x float> %shift
130}
131
132define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) {
133; CHECK-LABEL: fix_i64_to_double:
134; CHECK:       @ %bb.0:
135; CHECK-NEXT:    push {lr}
136; CHECK-NEXT:    vpush {d8, d9}
137; CHECK-NEXT:    vorr q4, q0, q0
138; CHECK-NEXT:    vmov r0, r1, d9
139; CHECK-NEXT:    bl ___floatundidf
140; CHECK-NEXT:    vmov r2, r3, d8
141; CHECK-NEXT:    vmov d9, r0, r1
142; CHECK-NEXT:    vmov.f64 d8, #5.000000e-01
143; CHECK-NEXT:    mov r0, r2
144; CHECK-NEXT:    mov r1, r3
145; CHECK-NEXT:    bl ___floatundidf
146; CHECK-NEXT:    vmov d16, r0, r1
147; CHECK-NEXT:    vmul.f64 d1, d9, d8
148; CHECK-NEXT:    vmul.f64 d0, d16, d8
149; CHECK-NEXT:    vpop {d8, d9}
150; CHECK-NEXT:    pop {lr}
151; CHECK-NEXT:    bx lr
152  %conv = uitofp <2 x i64> %in to <2 x double>
153  %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
154  ret <2 x double> %shift
155}
156
157; Don't combine with 8 lanes.  Just make sure things don't crash.
158define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind {
159; CHECK-LABEL: test7:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #3
162; CHECK-NEXT:    vcvt.f32.s32 q1, q1, #3
163; CHECK-NEXT:    bx lr
164entry:
165  %vcvt.i = sitofp <8 x i32> %in to <8 x float>
166  %div.i = fdiv <8 x float> %vcvt.i, <float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0>
167  ret <8 x float> %div.i
168}
169
170; Can combine splat with an undef.
171define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) {
172; CHECK-LABEL: test8:
173; CHECK:       @ %bb.0:
174; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #1
175; CHECK-NEXT:    bx lr
176  %vcvt.i = sitofp <4 x i32> %in to <4 x float>
177  %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
178  ret <4 x float> %div.i
179}
180
181define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) {
182; CHECK-LABEL: test_illegal_int_to_fp:
183; CHECK:       @ %bb.0:
184; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #2
185; CHECK-NEXT:    bx lr
186  %conv = sitofp <3 x i32> %in to <3 x float>
187  %res = fdiv <3 x float> %conv, <float 4.0, float 4.0, float 4.0>
188  ret <3 x float> %res
189}
190
191
192define arm_aapcs_vfpcc <2 x float> @t1_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 {
193; CHECK-LABEL: t1_mul:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    vcvt.f32.s32 d0, d0, #3
196; CHECK-NEXT:    bx lr
197entry:
198  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
199  %div.i = fmul <2 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01>
200  ret <2 x float> %div.i
201}
202
203define arm_aapcs_vfpcc <2 x float> @t2_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 {
204; CHECK-LABEL: t2_mul:
205; CHECK:       @ %bb.0: @ %entry
206; CHECK-NEXT:    vcvt.f32.u32 d0, d0, #3
207; CHECK-NEXT:    bx lr
208entry:
209  %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
210  %div.i = fmul <2 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01>
211  ret <2 x float> %div.i
212}
213
214define arm_aapcs_vfpcc <2 x float> @t4_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 {
215; CHECK-LABEL: t4_mul:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vcvt.f32.s32 d16, d0
218; CHECK-NEXT:    vmov.i32 d17, #0x2f000000
219; CHECK-NEXT:    vmul.f32 d0, d16, d17
220; CHECK-NEXT:    bx lr
221entry:
222  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
223  %div.i = fmul <2 x float> %vcvt.i, <float 0x3DE0000000000000, float 0x3DE0000000000000>
224  ret <2 x float> %div.i
225}
226
227define arm_aapcs_vfpcc <2 x float> @t5_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 {
228; CHECK-LABEL: t5_mul:
229; CHECK:       @ %bb.0: @ %entry
230; CHECK-NEXT:    vcvt.f32.s32 d0, d0, #32
231; CHECK-NEXT:    bx lr
232entry:
233  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
234  %div.i = fmul <2 x float> %vcvt.i, <float 0x3DF0000000000000, float 0x3DF0000000000000>
235  ret <2 x float> %div.i
236}
237
238define arm_aapcs_vfpcc <4 x float> @t6_mul(<4 x i32> %vecinit6.i) local_unnamed_addr #0 {
239; CHECK-LABEL: t6_mul:
240; CHECK:       @ %bb.0: @ %entry
241; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #3
242; CHECK-NEXT:    bx lr
243entry:
244  %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
245  %div.i = fmul <4 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01>
246  ret <4 x float> %div.i
247}
248
249define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float_mul(<4 x i16> %in) local_unnamed_addr #0 {
250; CHECK-LABEL: fix_unsigned_i16_to_float_mul:
251; CHECK:       @ %bb.0:
252; CHECK-NEXT:    vmovl.u16 q8, d0
253; CHECK-NEXT:    vcvt.f32.u32 q0, q8, #1
254; CHECK-NEXT:    bx lr
255  %conv = uitofp <4 x i16> %in to <4 x float>
256  %shift = fmul <4 x float> %conv, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
257  ret <4 x float> %shift
258}
259
260define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float_mul(<4 x i16> %in) local_unnamed_addr #0 {
261; CHECK-LABEL: fix_signed_i16_to_float_mul:
262; CHECK:       @ %bb.0:
263; CHECK-NEXT:    vmovl.s16 q8, d0
264; CHECK-NEXT:    vcvt.f32.s32 q0, q8, #1
265; CHECK-NEXT:    bx lr
266  %conv = sitofp <4 x i16> %in to <4 x float>
267  %shift = fmul <4 x float> %conv, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
268  ret <4 x float> %shift
269}
270
271define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float_mul(<2 x i64> %in) local_unnamed_addr #0 {
272; CHECK-LABEL: fix_i64_to_float_mul:
273; CHECK:       @ %bb.0:
274; CHECK-NEXT:    push {lr}
275; CHECK-NEXT:    vpush {d8, d9}
276; CHECK-NEXT:    vorr q4, q0, q0
277; CHECK-NEXT:    vmov r0, r1, d9
278; CHECK-NEXT:    bl ___floatundisf
279; CHECK-NEXT:    vmov r2, r1, d8
280; CHECK-NEXT:    vmov s19, r0
281; CHECK-NEXT:    vmov.i32 d8, #0x3f000000
282; CHECK-NEXT:    mov r0, r2
283; CHECK-NEXT:    bl ___floatundisf
284; CHECK-NEXT:    vmov s18, r0
285; CHECK-NEXT:    vmul.f32 d0, d9, d8
286; CHECK-NEXT:    vpop {d8, d9}
287; CHECK-NEXT:    pop {lr}
288; CHECK-NEXT:    bx lr
289  %conv = uitofp <2 x i64> %in to <2 x float>
290  %shift = fmul <2 x float> %conv, <float 5.000000e-01, float 5.000000e-01>
291  ret <2 x float> %shift
292}
293
294define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double_mul(<2 x i64> %in) local_unnamed_addr #0 {
295; CHECK-LABEL: fix_i64_to_double_mul:
296; CHECK:       @ %bb.0:
297; CHECK-NEXT:    push {lr}
298; CHECK-NEXT:    vpush {d8, d9}
299; CHECK-NEXT:    vorr q4, q0, q0
300; CHECK-NEXT:    vmov r0, r1, d9
301; CHECK-NEXT:    bl ___floatundidf
302; CHECK-NEXT:    vmov r2, r3, d8
303; CHECK-NEXT:    vmov d9, r0, r1
304; CHECK-NEXT:    vmov.f64 d8, #5.000000e-01
305; CHECK-NEXT:    mov r0, r2
306; CHECK-NEXT:    mov r1, r3
307; CHECK-NEXT:    bl ___floatundidf
308; CHECK-NEXT:    vmov d16, r0, r1
309; CHECK-NEXT:    vmul.f64 d1, d9, d8
310; CHECK-NEXT:    vmul.f64 d0, d16, d8
311; CHECK-NEXT:    vpop {d8, d9}
312; CHECK-NEXT:    pop {lr}
313; CHECK-NEXT:    bx lr
314  %conv = uitofp <2 x i64> %in to <2 x double>
315  %shift = fmul <2 x double> %conv, <double 5.000000e-01, double 5.000000e-01>
316  ret <2 x double> %shift
317}
318
319define arm_aapcs_vfpcc <8 x float> @test7_mul(<8 x i32> %in) local_unnamed_addr #0 {
320; CHECK-LABEL: test7_mul:
321; CHECK:       @ %bb.0: @ %entry
322; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #3
323; CHECK-NEXT:    vcvt.f32.s32 q1, q1, #3
324; CHECK-NEXT:    bx lr
325entry:
326  %vcvt.i = sitofp <8 x i32> %in to <8 x float>
327  %div.i = fmul <8 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01>
328  ret <8 x float> %div.i
329}
330
331define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp_mul(<3 x i32> %in) local_unnamed_addr #0 {
332; CHECK-LABEL: test_illegal_int_to_fp_mul:
333; CHECK:       @ %bb.0:
334; CHECK-NEXT:    vcvt.f32.s32 q0, q0, #2
335; CHECK-NEXT:    bx lr
336  %conv = sitofp <3 x i32> %in to <3 x float>
337  %res = fmul <3 x float> %conv, <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>
338  ret <3 x float> %res
339}
340