xref: /llvm-project/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
3; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON
4
5declare i16 @llvm.umax.i16(i16, i16)
6declare i64 @llvm.umin.i64(i64, i64)
7
8declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
9
10define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
11; CHECK-NEON-LABEL: fmul_pow2_4xfloat:
12; CHECK-NEON:       // %bb.0:
13; CHECK-NEON-NEXT:    movi v1.4s, #1
14; CHECK-NEON-NEXT:    ushl v0.4s, v1.4s, v0.4s
15; CHECK-NEON-NEXT:    fmov v1.4s, #9.00000000
16; CHECK-NEON-NEXT:    ucvtf v0.4s, v0.4s
17; CHECK-NEON-NEXT:    fmul v0.4s, v0.4s, v1.4s
18; CHECK-NEON-NEXT:    ret
19;
20; CHECK-NO-NEON-LABEL: fmul_pow2_4xfloat:
21; CHECK-NO-NEON:       // %bb.0:
22; CHECK-NO-NEON-NEXT:    mov w8, #1 // =0x1
23; CHECK-NO-NEON-NEXT:    fmov s3, #9.00000000
24; CHECK-NO-NEON-NEXT:    lsl w9, w8, w0
25; CHECK-NO-NEON-NEXT:    lsl w10, w8, w1
26; CHECK-NO-NEON-NEXT:    lsl w11, w8, w2
27; CHECK-NO-NEON-NEXT:    lsl w8, w8, w3
28; CHECK-NO-NEON-NEXT:    ucvtf s1, w10
29; CHECK-NO-NEON-NEXT:    ucvtf s0, w9
30; CHECK-NO-NEON-NEXT:    ucvtf s2, w11
31; CHECK-NO-NEON-NEXT:    ucvtf s4, w8
32; CHECK-NO-NEON-NEXT:    fmul s0, s0, s3
33; CHECK-NO-NEON-NEXT:    fmul s1, s1, s3
34; CHECK-NO-NEON-NEXT:    fmul s2, s2, s3
35; CHECK-NO-NEON-NEXT:    fmul s3, s4, s3
36; CHECK-NO-NEON-NEXT:    ret
37  %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
38  %p2_f = uitofp <4 x i32> %p2 to <4 x float>
39  %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
40  ret <4 x float> %r
41}
42
43define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
44; CHECK-NEON-LABEL: fmul_pow2_ldexp_4xfloat:
45; CHECK-NEON:       // %bb.0:
46; CHECK-NEON-NEXT:    sub sp, sp, #48
47; CHECK-NEON-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
48; CHECK-NEON-NEXT:    .cfi_def_cfa_offset 48
49; CHECK-NEON-NEXT:    .cfi_offset w30, -16
50; CHECK-NEON-NEXT:    mov w0, v0.s[1]
51; CHECK-NEON-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
52; CHECK-NEON-NEXT:    fmov s0, #9.00000000
53; CHECK-NEON-NEXT:    bl ldexpf
54; CHECK-NEON-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
55; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
56; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
57; CHECK-NEON-NEXT:    fmov s0, #9.00000000
58; CHECK-NEON-NEXT:    fmov w0, s1
59; CHECK-NEON-NEXT:    bl ldexpf
60; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
61; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
62; CHECK-NEON-NEXT:    mov v0.s[1], v1.s[0]
63; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
64; CHECK-NEON-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
65; CHECK-NEON-NEXT:    mov w0, v0.s[2]
66; CHECK-NEON-NEXT:    fmov s0, #9.00000000
67; CHECK-NEON-NEXT:    bl ldexpf
68; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
69; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
70; CHECK-NEON-NEXT:    mov v1.s[2], v0.s[0]
71; CHECK-NEON-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
72; CHECK-NEON-NEXT:    mov w0, v0.s[3]
73; CHECK-NEON-NEXT:    fmov s0, #9.00000000
74; CHECK-NEON-NEXT:    str q1, [sp] // 16-byte Folded Spill
75; CHECK-NEON-NEXT:    bl ldexpf
76; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
77; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
78; CHECK-NEON-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
79; CHECK-NEON-NEXT:    mov v1.s[3], v0.s[0]
80; CHECK-NEON-NEXT:    mov v0.16b, v1.16b
81; CHECK-NEON-NEXT:    add sp, sp, #48
82; CHECK-NEON-NEXT:    ret
83;
84; CHECK-NO-NEON-LABEL: fmul_pow2_ldexp_4xfloat:
85; CHECK-NO-NEON:       // %bb.0:
86; CHECK-NO-NEON-NEXT:    str d10, [sp, #-64]! // 8-byte Folded Spill
87; CHECK-NO-NEON-NEXT:    stp d9, d8, [sp, #16] // 16-byte Folded Spill
88; CHECK-NO-NEON-NEXT:    stp x30, x21, [sp, #32] // 16-byte Folded Spill
89; CHECK-NO-NEON-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
90; CHECK-NO-NEON-NEXT:    .cfi_def_cfa_offset 64
91; CHECK-NO-NEON-NEXT:    .cfi_offset w19, -8
92; CHECK-NO-NEON-NEXT:    .cfi_offset w20, -16
93; CHECK-NO-NEON-NEXT:    .cfi_offset w21, -24
94; CHECK-NO-NEON-NEXT:    .cfi_offset w30, -32
95; CHECK-NO-NEON-NEXT:    .cfi_offset b8, -40
96; CHECK-NO-NEON-NEXT:    .cfi_offset b9, -48
97; CHECK-NO-NEON-NEXT:    .cfi_offset b10, -64
98; CHECK-NO-NEON-NEXT:    fmov s0, #9.00000000
99; CHECK-NO-NEON-NEXT:    mov w19, w3
100; CHECK-NO-NEON-NEXT:    mov w20, w2
101; CHECK-NO-NEON-NEXT:    mov w21, w1
102; CHECK-NO-NEON-NEXT:    bl ldexpf
103; CHECK-NO-NEON-NEXT:    fmov s8, s0
104; CHECK-NO-NEON-NEXT:    fmov s0, #9.00000000
105; CHECK-NO-NEON-NEXT:    mov w0, w21
106; CHECK-NO-NEON-NEXT:    bl ldexpf
107; CHECK-NO-NEON-NEXT:    fmov s9, s0
108; CHECK-NO-NEON-NEXT:    fmov s0, #9.00000000
109; CHECK-NO-NEON-NEXT:    mov w0, w20
110; CHECK-NO-NEON-NEXT:    bl ldexpf
111; CHECK-NO-NEON-NEXT:    fmov s10, s0
112; CHECK-NO-NEON-NEXT:    fmov s0, #9.00000000
113; CHECK-NO-NEON-NEXT:    mov w0, w19
114; CHECK-NO-NEON-NEXT:    bl ldexpf
115; CHECK-NO-NEON-NEXT:    fmov s3, s0
116; CHECK-NO-NEON-NEXT:    fmov s0, s8
117; CHECK-NO-NEON-NEXT:    fmov s1, s9
118; CHECK-NO-NEON-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
119; CHECK-NO-NEON-NEXT:    fmov s2, s10
120; CHECK-NO-NEON-NEXT:    ldp x30, x21, [sp, #32] // 16-byte Folded Reload
121; CHECK-NO-NEON-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
122; CHECK-NO-NEON-NEXT:    ldr d10, [sp], #64 // 8-byte Folded Reload
123; CHECK-NO-NEON-NEXT:    ret
124  %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
125  ret <4 x float> %r
126}
127
128define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
129; CHECK-NEON-LABEL: fdiv_pow2_4xfloat:
130; CHECK-NEON:       // %bb.0:
131; CHECK-NEON-NEXT:    fmov v1.4s, #9.00000000
132; CHECK-NEON-NEXT:    shl v0.4s, v0.4s, #23
133; CHECK-NEON-NEXT:    sub v0.4s, v1.4s, v0.4s
134; CHECK-NEON-NEXT:    ret
135;
136; CHECK-NO-NEON-LABEL: fdiv_pow2_4xfloat:
137; CHECK-NO-NEON:       // %bb.0:
138; CHECK-NO-NEON-NEXT:    mov w8, #1091567616 // =0x41100000
139; CHECK-NO-NEON-NEXT:    sub w9, w8, w0, lsl #23
140; CHECK-NO-NEON-NEXT:    sub w10, w8, w1, lsl #23
141; CHECK-NO-NEON-NEXT:    sub w11, w8, w2, lsl #23
142; CHECK-NO-NEON-NEXT:    sub w8, w8, w3, lsl #23
143; CHECK-NO-NEON-NEXT:    fmov s0, w9
144; CHECK-NO-NEON-NEXT:    fmov s1, w10
145; CHECK-NO-NEON-NEXT:    fmov s2, w11
146; CHECK-NO-NEON-NEXT:    fmov s3, w8
147; CHECK-NO-NEON-NEXT:    ret
148  %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
149  %p2_f = uitofp <4 x i32> %p2 to <4 x float>
150  %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
151  ret <4 x float> %r
152}
153
154define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
155; CHECK-LABEL: fmul_pow_shl_cnt:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    mov w8, #1 // =0x1
158; CHECK-NEXT:    fmov d1, #9.00000000
159; CHECK-NEXT:    lsl x8, x8, x0
160; CHECK-NEXT:    ucvtf d0, x8
161; CHECK-NEXT:    fmul d0, d0, d1
162; CHECK-NEXT:    ret
163  %shl = shl nuw i64 1, %cnt
164  %conv = uitofp i64 %shl to double
165  %mul = fmul double 9.000000e+00, %conv
166  ret double %mul
167}
168
169define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
170; CHECK-LABEL: fmul_pow_shl_cnt2:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    mov w8, #2 // =0x2
173; CHECK-NEXT:    fmov d1, #-9.00000000
174; CHECK-NEXT:    lsl x8, x8, x0
175; CHECK-NEXT:    ucvtf d0, x8
176; CHECK-NEXT:    fmul d0, d0, d1
177; CHECK-NEXT:    ret
178  %shl = shl nuw i64 2, %cnt
179  %conv = uitofp i64 %shl to double
180  %mul = fmul double -9.000000e+00, %conv
181  ret double %mul
182}
183
184define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
185; CHECK-LABEL: fmul_pow_select:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    mov w8, #1 // =0x1
188; CHECK-NEXT:    tst w1, #0x1
189; CHECK-NEXT:    fmov s1, #9.00000000
190; CHECK-NEXT:    cinc w8, w8, eq
191; CHECK-NEXT:    lsl w8, w8, w0
192; CHECK-NEXT:    ucvtf s0, w8
193; CHECK-NEXT:    fmul s0, s0, s1
194; CHECK-NEXT:    ret
195  %shl2 = shl nuw i32 2, %cnt
196  %shl1 = shl nuw i32 1, %cnt
197  %shl = select i1 %c, i32 %shl1, i32 %shl2
198  %conv = uitofp i32 %shl to float
199  %mul = fmul float 9.000000e+00, %conv
200  ret float %mul
201}
202
203define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
204; CHECK-LABEL: fmul_fly_pow_mul_min_pow2:
205; CHECK:       // %bb.0:
206; CHECK-NEXT:    mov w8, #8 // =0x8
207; CHECK-NEXT:    mov w9, #8192 // =0x2000
208; CHECK-NEXT:    fmov s1, #9.00000000
209; CHECK-NEXT:    lsl x8, x8, x0
210; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
211; CHECK-NEXT:    csel x8, x8, x9, lo
212; CHECK-NEXT:    ucvtf s0, x8
213; CHECK-NEXT:    fmul s0, s0, s1
214; CHECK-NEXT:    ret
215  %shl8 = shl nuw i64 8, %cnt
216  %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
217  %conv = uitofp i64 %shl to float
218  %mul = fmul float 9.000000e+00, %conv
219  ret float %mul
220}
221
222define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
223; CHECK-LABEL: fmul_pow_mul_max_pow2:
224; CHECK:       // %bb.0:
225; CHECK-NEXT:    mov w8, #2 // =0x2
226; CHECK-NEXT:    mov w9, #1 // =0x1
227; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
228; CHECK-NEXT:    fmov d1, #3.00000000
229; CHECK-NEXT:    lsl w8, w8, w0
230; CHECK-NEXT:    lsl w9, w9, w0
231; CHECK-NEXT:    and w8, w8, #0xfffe
232; CHECK-NEXT:    and w9, w9, #0xffff
233; CHECK-NEXT:    cmp w9, w8
234; CHECK-NEXT:    csel w8, w9, w8, hi
235; CHECK-NEXT:    ucvtf d0, w8
236; CHECK-NEXT:    fmul d0, d0, d1
237; CHECK-NEXT:    ret
238  %shl2 = shl nuw i16 2, %cnt
239  %shl1 = shl nuw i16 1, %cnt
240  %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2)
241  %conv = uitofp i16 %shl to double
242  %mul = fmul double 3.000000e+00, %conv
243  ret double %mul
244}
245
246define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
247; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    lsl x8, x0, x1
250; CHECK-NEXT:    fmov d1, #9.00000000
251; CHECK-NEXT:    ucvtf d0, x8
252; CHECK-NEXT:    fmul d0, d0, d1
253; CHECK-NEXT:    ret
254  %shl = shl nuw i64 %v, %cnt
255  %conv = uitofp i64 %shl to double
256  %mul = fmul double 9.000000e+00, %conv
257  ret double %mul
258}
259
260define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
261; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
262; CHECK-NEON:       // %bb.0:
263; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
264; CHECK-NEON-NEXT:    dup v1.2d, x8
265; CHECK-NEON-NEXT:    ushl v0.2d, v1.2d, v0.2d
266; CHECK-NEON-NEXT:    fmov v1.2s, #15.00000000
267; CHECK-NEON-NEXT:    ucvtf v0.2d, v0.2d
268; CHECK-NEON-NEXT:    fcvtn v0.2s, v0.2d
269; CHECK-NEON-NEXT:    fmul v0.2s, v0.2s, v1.2s
270; CHECK-NEON-NEXT:    ret
271;
272; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
273; CHECK-NO-NEON:       // %bb.0:
274; CHECK-NO-NEON-NEXT:    mov w8, #2 // =0x2
275; CHECK-NO-NEON-NEXT:    fmov s2, #15.00000000
276; CHECK-NO-NEON-NEXT:    lsl x9, x8, x0
277; CHECK-NO-NEON-NEXT:    lsl x8, x8, x1
278; CHECK-NO-NEON-NEXT:    ucvtf s1, x8
279; CHECK-NO-NEON-NEXT:    ucvtf s0, x9
280; CHECK-NO-NEON-NEXT:    fmul s0, s0, s2
281; CHECK-NO-NEON-NEXT:    fmul s1, s1, s2
282; CHECK-NO-NEON-NEXT:    ret
283  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
284  %conv = uitofp <2 x i64> %shl to <2 x float>
285  %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv
286  ret <2 x float> %mul
287}
288
289define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
290; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec:
291; CHECK-NEON:       // %bb.0:
292; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
293; CHECK-NEON-NEXT:    dup v1.2d, x8
294; CHECK-NEON-NEXT:    ushl v0.2d, v1.2d, v0.2d
295; CHECK-NEON-NEXT:    fmov v1.2d, #15.00000000
296; CHECK-NEON-NEXT:    ucvtf v0.2d, v0.2d
297; CHECK-NEON-NEXT:    fmul v0.2d, v0.2d, v1.2d
298; CHECK-NEON-NEXT:    ret
299;
300; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec:
301; CHECK-NO-NEON:       // %bb.0:
302; CHECK-NO-NEON-NEXT:    mov w8, #2 // =0x2
303; CHECK-NO-NEON-NEXT:    fmov d2, #15.00000000
304; CHECK-NO-NEON-NEXT:    lsl x9, x8, x0
305; CHECK-NO-NEON-NEXT:    lsl x8, x8, x1
306; CHECK-NO-NEON-NEXT:    ucvtf d1, x8
307; CHECK-NO-NEON-NEXT:    ucvtf d0, x9
308; CHECK-NO-NEON-NEXT:    fmul d0, d0, d2
309; CHECK-NO-NEON-NEXT:    fmul d1, d1, d2
310; CHECK-NO-NEON-NEXT:    ret
311  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
312  %conv = uitofp <2 x i64> %shl to <2 x double>
313  %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
314  ret <2 x double> %mul
315}
316
317define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind {
318; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
319; CHECK-NEON:       // %bb.0:
320; CHECK-NEON-NEXT:    movi v2.4s, #2
321; CHECK-NEON-NEXT:    ushl v0.4s, v2.4s, v0.4s
322; CHECK-NEON-NEXT:    fmov v2.4s, #5.00000000
323; CHECK-NEON-NEXT:    ucvtf v0.4s, v0.4s
324; CHECK-NEON-NEXT:    fmul v0.4s, v0.4s, v2.4s
325; CHECK-NEON-NEXT:    fadd v0.4s, v0.4s, v1.4s
326; CHECK-NEON-NEXT:    ret
327;
328; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
329; CHECK-NO-NEON:       // %bb.0:
330; CHECK-NO-NEON-NEXT:    mov w8, #2 // =0x2
331; CHECK-NO-NEON-NEXT:    fmov s16, #5.00000000
332; CHECK-NO-NEON-NEXT:    lsl w9, w8, w3
333; CHECK-NO-NEON-NEXT:    lsl w10, w8, w0
334; CHECK-NO-NEON-NEXT:    lsl w11, w8, w2
335; CHECK-NO-NEON-NEXT:    lsl w8, w8, w1
336; CHECK-NO-NEON-NEXT:    ucvtf s4, w10
337; CHECK-NO-NEON-NEXT:    ucvtf s5, w9
338; CHECK-NO-NEON-NEXT:    ucvtf s7, w11
339; CHECK-NO-NEON-NEXT:    ucvtf s6, w8
340; CHECK-NO-NEON-NEXT:    fmul s5, s5, s16
341; CHECK-NO-NEON-NEXT:    fmul s4, s4, s16
342; CHECK-NO-NEON-NEXT:    fmul s7, s7, s16
343; CHECK-NO-NEON-NEXT:    fmul s6, s6, s16
344; CHECK-NO-NEON-NEXT:    fadd s0, s4, s0
345; CHECK-NO-NEON-NEXT:    fadd s3, s5, s3
346; CHECK-NO-NEON-NEXT:    fadd s1, s6, s1
347; CHECK-NO-NEON-NEXT:    fadd s2, s7, s2
348; CHECK-NO-NEON-NEXT:    ret
349  %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
350  %conv = uitofp <4 x i32> %shl to <4 x float>
351  %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv
352  %res = fadd <4 x float> %mul, %add
353  ret <4 x float> %res
354}
355
356define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind {
357; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
358; CHECK-NEON:       // %bb.0:
359; CHECK-NEON-NEXT:    mov w8, #2 // =0x2
360; CHECK-NEON-NEXT:    dup v1.2d, x8
361; CHECK-NEON-NEXT:    adrp x8, .LCPI12_0
362; CHECK-NEON-NEXT:    ushl v0.2d, v1.2d, v0.2d
363; CHECK-NEON-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
364; CHECK-NEON-NEXT:    ucvtf v0.2d, v0.2d
365; CHECK-NEON-NEXT:    fmul v0.2d, v0.2d, v1.2d
366; CHECK-NEON-NEXT:    ret
367;
368; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
369; CHECK-NO-NEON:       // %bb.0:
370; CHECK-NO-NEON-NEXT:    mov w8, #2 // =0x2
371; CHECK-NO-NEON-NEXT:    fmov d2, #15.00000000
372; CHECK-NO-NEON-NEXT:    fmov d3, #14.00000000
373; CHECK-NO-NEON-NEXT:    lsl x9, x8, x0
374; CHECK-NO-NEON-NEXT:    lsl x8, x8, x1
375; CHECK-NO-NEON-NEXT:    ucvtf d1, x8
376; CHECK-NO-NEON-NEXT:    ucvtf d0, x9
377; CHECK-NO-NEON-NEXT:    fmul d0, d0, d2
378; CHECK-NO-NEON-NEXT:    fmul d1, d1, d3
379; CHECK-NO-NEON-NEXT:    ret
380  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
381  %conv = uitofp <2 x i64> %shl to <2 x double>
382  %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv
383  ret <2 x double> %mul
384}
385
386define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind {
387; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
388; CHECK-NEON:       // %bb.0:
389; CHECK-NEON-NEXT:    adrp x8, .LCPI13_0
390; CHECK-NEON-NEXT:    ldr q1, [x8, :lo12:.LCPI13_0]
391; CHECK-NEON-NEXT:    ushl v0.2d, v1.2d, v0.2d
392; CHECK-NEON-NEXT:    fmov v1.2d, #15.00000000
393; CHECK-NEON-NEXT:    ucvtf v0.2d, v0.2d
394; CHECK-NEON-NEXT:    fmul v0.2d, v0.2d, v1.2d
395; CHECK-NEON-NEXT:    ret
396;
397; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
398; CHECK-NO-NEON:       // %bb.0:
399; CHECK-NO-NEON-NEXT:    mov w8, #2 // =0x2
400; CHECK-NO-NEON-NEXT:    mov w9, #1 // =0x1
401; CHECK-NO-NEON-NEXT:    fmov d2, #15.00000000
402; CHECK-NO-NEON-NEXT:    lsl x8, x8, x0
403; CHECK-NO-NEON-NEXT:    lsl x9, x9, x1
404; CHECK-NO-NEON-NEXT:    ucvtf d1, x9
405; CHECK-NO-NEON-NEXT:    ucvtf d0, x8
406; CHECK-NO-NEON-NEXT:    fmul d0, d0, d2
407; CHECK-NO-NEON-NEXT:    fmul d1, d1, d2
408; CHECK-NO-NEON-NEXT:    ret
409  %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt
410  %conv = uitofp <2 x i64> %shl to <2 x double>
411  %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
412  ret <2 x double> %mul
413}
414
415
416define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
417; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp:
418; CHECK:       // %bb.0:
419; CHECK-NEXT:    mov w8, #1 // =0x1
420; CHECK-NEXT:    lsl x8, x8, x0
421; CHECK-NEXT:    ucvtf d0, x8
422; CHECK-NEXT:    adrp x8, .LCPI14_0
423; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
424; CHECK-NEXT:    fmul d0, d0, d1
425; CHECK-NEXT:    ret
426  %shl = shl nuw i64 1, %cnt
427  %conv = uitofp i64 %shl to double
428  %mul = fmul double 9.745314e+288, %conv
429  ret double %mul
430}
431
432define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
433; CHECK-LABEL: fmul_pow_shl_cnt_safe:
434; CHECK:       // %bb.0:
435; CHECK-NEXT:    mov w8, #1 // =0x1
436; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
437; CHECK-NEXT:    lsl w8, w8, w0
438; CHECK-NEXT:    and w8, w8, #0xffff
439; CHECK-NEXT:    ucvtf d0, w8
440; CHECK-NEXT:    adrp x8, .LCPI15_0
441; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI15_0]
442; CHECK-NEXT:    fmul d0, d0, d1
443; CHECK-NEXT:    ret
444  %shl = shl nuw i16 1, %cnt
445  %conv = uitofp i16 %shl to double
446  %mul = fmul double 9.745314e+288, %conv
447  ret double %mul
448}
449
450define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
451; CHECK-NEON-LABEL: fdiv_pow_shl_cnt_vec:
452; CHECK-NEON:       // %bb.0:
453; CHECK-NEON-NEXT:    fmov v1.2d, #1.00000000
454; CHECK-NEON-NEXT:    shl v0.2d, v0.2d, #52
455; CHECK-NEON-NEXT:    sub v0.2d, v1.2d, v0.2d
456; CHECK-NEON-NEXT:    ret
457;
458; CHECK-NO-NEON-LABEL: fdiv_pow_shl_cnt_vec:
459; CHECK-NO-NEON:       // %bb.0:
460; CHECK-NO-NEON-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
461; CHECK-NO-NEON-NEXT:    sub x9, x8, x0, lsl #52
462; CHECK-NO-NEON-NEXT:    sub x8, x8, x1, lsl #52
463; CHECK-NO-NEON-NEXT:    fmov d0, x9
464; CHECK-NO-NEON-NEXT:    fmov d1, x8
465; CHECK-NO-NEON-NEXT:    ret
466  %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
467  %conv = uitofp <2 x i64> %shl to <2 x double>
468  %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv
469  ret <2 x double> %mul
470}
471
472define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind {
473; CHECK-NEON-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
474; CHECK-NEON:       // %bb.0:
475; CHECK-NEON-NEXT:    xtn v0.2s, v0.2d
476; CHECK-NEON-NEXT:    fmov v1.2s, #1.00000000
477; CHECK-NEON-NEXT:    shl v0.2s, v0.2s, #23
478; CHECK-NEON-NEXT:    sub v0.2s, v1.2s, v0.2s
479; CHECK-NEON-NEXT:    ret
480;
481; CHECK-NO-NEON-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
482; CHECK-NO-NEON:       // %bb.0:
483; CHECK-NO-NEON-NEXT:    mov w8, #1065353216 // =0x3f800000
484; CHECK-NO-NEON-NEXT:    sub w9, w8, w0, lsl #23
485; CHECK-NO-NEON-NEXT:    sub w8, w8, w1, lsl #23
486; CHECK-NO-NEON-NEXT:    fmov s0, w9
487; CHECK-NO-NEON-NEXT:    fmov s1, w8
488; CHECK-NO-NEON-NEXT:    ret
489  %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
490  %conv = uitofp <2 x i64> %shl to <2 x float>
491  %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv
492  ret <2 x float> %mul
493}
494
495define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
496; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
497; CHECK:       // %bb.0:
498; CHECK-NEXT:    mov w8, #8 // =0x8
499; CHECK-NEXT:    fmov s1, #-9.00000000
500; CHECK-NEXT:    lsl x8, x8, x0
501; CHECK-NEXT:    ucvtf s0, x8
502; CHECK-NEXT:    fdiv s0, s1, s0
503; CHECK-NEXT:    ret
504  %shl = shl i64 8, %cnt
505  %conv = uitofp i64 %shl to float
506  %mul = fdiv float -9.000000e+00, %conv
507  ret float %mul
508}
509
510define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
511; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int:
512; CHECK:       // %bb.0:
513; CHECK-NEXT:    mov w8, #8 // =0x8
514; CHECK-NEXT:    fmov s1, #-9.00000000
515; CHECK-NEXT:    lsl x8, x8, x0
516; CHECK-NEXT:    scvtf s0, x8
517; CHECK-NEXT:    fdiv s0, s1, s0
518; CHECK-NEXT:    ret
519  %shl = shl i64 8, %cnt
520  %conv = sitofp i64 %shl to float
521  %mul = fdiv float -9.000000e+00, %conv
522  ret float %mul
523}
524
525define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
526; CHECK-LABEL: fdiv_pow_shl_cnt:
527; CHECK:       // %bb.0:
528; CHECK-NEXT:    mov w8, #-1115684864 // =0xbd800000
529; CHECK-NEXT:    and w9, w0, #0x1f
530; CHECK-NEXT:    sub w8, w8, w9, lsl #23
531; CHECK-NEXT:    fmov s0, w8
532; CHECK-NEXT:    ret
533  %cnt = and i64 %cnt_in, 31
534  %shl = shl i64 8, %cnt
535  %conv = sitofp i64 %shl to float
536  %mul = fdiv float -0.500000e+00, %conv
537  ret float %mul
538}
539
540define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
541; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
542; CHECK:       // %bb.0:
543; CHECK-NEXT:    mov x8, #3936146074321813504 // =0x36a0000000000000
544; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
545; CHECK-NEXT:    sub x8, x8, x0, lsl #52
546; CHECK-NEXT:    fmov d0, x8
547; CHECK-NEXT:    ret
548  %shl = shl nuw i32 1, %cnt
549  %conv = uitofp i32 %shl to double
550  %mul = fdiv double 0x36A0000000000000, %conv
551  ret double %mul
552}
553
554define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
555; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2:
556; CHECK:       // %bb.0:
557; CHECK-NEXT:    mov w8, #1 // =0x1
558; CHECK-NEXT:    lsl w8, w8, w0
559; CHECK-NEXT:    ucvtf s0, w8
560; CHECK-NEXT:    mov w8, #65528 // =0xfff8
561; CHECK-NEXT:    movk w8, #4351, lsl #16
562; CHECK-NEXT:    fmov s1, w8
563; CHECK-NEXT:    fdiv s0, s1, s0
564; CHECK-NEXT:    ret
565  %shl = shl nuw i32 1, %cnt
566  %conv = uitofp i32 %shl to float
567  %mul = fdiv float 0x3a1fffff00000000, %conv
568  ret float %mul
569}
570
571define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
572; CHECK-LABEL: fdiv_pow_shl_cnt32_okay:
573; CHECK:       // %bb.0:
574; CHECK-NEXT:    mov w8, #285212672 // =0x11000000
575; CHECK-NEXT:    sub w8, w8, w0, lsl #23
576; CHECK-NEXT:    fmov s0, w8
577; CHECK-NEXT:    ret
578  %shl = shl nuw i32 1, %cnt
579  %conv = uitofp i32 %shl to float
580  %mul = fdiv float 0x3a20000000000000, %conv
581  ret float %mul
582}
583
584define fastcc i1 @quantum_hadamard(i32 %0) {
585; CHECK-LABEL: quantum_hadamard:
586; CHECK:       // %bb.0:
587; CHECK-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
588; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
589; CHECK-NEXT:    sub x8, x8, x0, lsl #52
590; CHECK-NEXT:    fmov d0, x8
591; CHECK-NEXT:    fcvt s0, d0
592; CHECK-NEXT:    fcmp s0, #0.0
593; CHECK-NEXT:    cset w0, gt
594; CHECK-NEXT:    ret
595  %2 = zext i32 %0 to i64
596  %3 = shl i64 1, %2
597  %4 = uitofp i64 %3 to double
598  %5 = fdiv double 1.000000e+00, %4
599  %6 = fptrunc double %5 to float
600  %7 = fcmp olt float 0.000000e+00, %6
601  ret i1 %7
602}
603
604define <vscale x 4 x float> @fdiv_pow2_nx4xfloat(<vscale x 4 x i32> %i) "target-features"="+sve" {
605; CHECK-LABEL: fdiv_pow2_nx4xfloat:
606; CHECK:       // %bb.0:
607; CHECK-NEXT:    mov z1.s, #1 // =0x1
608; CHECK-NEXT:    ptrue p0.s
609; CHECK-NEXT:    lslr z0.s, p0/m, z0.s, z1.s
610; CHECK-NEXT:    fmov z1.s, #9.00000000
611; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
612; CHECK-NEXT:    fdivr z0.s, p0/m, z0.s, z1.s
613; CHECK-NEXT:    ret
614  %p2 = shl <vscale x 4 x i32> splat (i32 1), %i
615  %p2_f = uitofp <vscale x 4 x i32> %p2 to <vscale x 4 x float>
616  %r = fdiv <vscale x 4 x float> splat (float 9.000000e+00), %p2_f
617  ret <vscale x 4 x float> %r
618}
619
620define <vscale x 2 x double> @scalable2(<vscale x 2 x i64> %0) "target-features"="+sve" {
621; CHECK-LABEL: scalable2:
622; CHECK:       // %bb.0:
623; CHECK-NEXT:    ptrue p0.d
624; CHECK-NEXT:    fmov z1.d, #1.00000000
625; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
626; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
627; CHECK-NEXT:    ret
628  %2 = uitofp <vscale x 2 x i64> %0 to <vscale x 2 x double>
629  %3 = fdiv <vscale x 2 x double> splat (double 1.000000e+00), %2
630  ret <vscale x 2 x double> %3
631}
632