1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON 3; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON 4 5declare i16 @llvm.umax.i16(i16, i16) 6declare i64 @llvm.umin.i64(i64, i64) 7 8declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) 9 10define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) { 11; CHECK-NEON-LABEL: fmul_pow2_4xfloat: 12; CHECK-NEON: // %bb.0: 13; CHECK-NEON-NEXT: movi v1.4s, #1 14; CHECK-NEON-NEXT: ushl v0.4s, v1.4s, v0.4s 15; CHECK-NEON-NEXT: fmov v1.4s, #9.00000000 16; CHECK-NEON-NEXT: ucvtf v0.4s, v0.4s 17; CHECK-NEON-NEXT: fmul v0.4s, v0.4s, v1.4s 18; CHECK-NEON-NEXT: ret 19; 20; CHECK-NO-NEON-LABEL: fmul_pow2_4xfloat: 21; CHECK-NO-NEON: // %bb.0: 22; CHECK-NO-NEON-NEXT: mov w8, #1 // =0x1 23; CHECK-NO-NEON-NEXT: fmov s3, #9.00000000 24; CHECK-NO-NEON-NEXT: lsl w9, w8, w0 25; CHECK-NO-NEON-NEXT: lsl w10, w8, w1 26; CHECK-NO-NEON-NEXT: lsl w11, w8, w2 27; CHECK-NO-NEON-NEXT: lsl w8, w8, w3 28; CHECK-NO-NEON-NEXT: ucvtf s1, w10 29; CHECK-NO-NEON-NEXT: ucvtf s0, w9 30; CHECK-NO-NEON-NEXT: ucvtf s2, w11 31; CHECK-NO-NEON-NEXT: ucvtf s4, w8 32; CHECK-NO-NEON-NEXT: fmul s0, s0, s3 33; CHECK-NO-NEON-NEXT: fmul s1, s1, s3 34; CHECK-NO-NEON-NEXT: fmul s2, s2, s3 35; CHECK-NO-NEON-NEXT: fmul s3, s4, s3 36; CHECK-NO-NEON-NEXT: ret 37 %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i 38 %p2_f = uitofp <4 x i32> %p2 to <4 x float> 39 %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f 40 ret <4 x float> %r 41} 42 43define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) { 44; CHECK-NEON-LABEL: fmul_pow2_ldexp_4xfloat: 45; CHECK-NEON: // %bb.0: 46; CHECK-NEON-NEXT: sub sp, sp, #48 47; CHECK-NEON-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 48; CHECK-NEON-NEXT: .cfi_def_cfa_offset 48 49; CHECK-NEON-NEXT: .cfi_offset w30, -16 50; CHECK-NEON-NEXT: mov w0, v0.s[1] 51; CHECK-NEON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 52; CHECK-NEON-NEXT: fmov s0, #9.00000000 53; CHECK-NEON-NEXT: bl ldexpf 54; CHECK-NEON-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 55; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 56; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Folded Spill 57; CHECK-NEON-NEXT: fmov s0, #9.00000000 58; CHECK-NEON-NEXT: fmov w0, s1 59; CHECK-NEON-NEXT: bl ldexpf 60; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload 61; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 62; CHECK-NEON-NEXT: mov v0.s[1], v1.s[0] 63; CHECK-NEON-NEXT: str q0, [sp] // 16-byte Folded Spill 64; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 65; CHECK-NEON-NEXT: mov w0, v0.s[2] 66; CHECK-NEON-NEXT: fmov s0, #9.00000000 67; CHECK-NEON-NEXT: bl ldexpf 68; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload 69; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 70; CHECK-NEON-NEXT: mov v1.s[2], v0.s[0] 71; CHECK-NEON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 72; CHECK-NEON-NEXT: mov w0, v0.s[3] 73; CHECK-NEON-NEXT: fmov s0, #9.00000000 74; CHECK-NEON-NEXT: str q1, [sp] // 16-byte Folded Spill 75; CHECK-NEON-NEXT: bl ldexpf 76; CHECK-NEON-NEXT: ldr q1, [sp] // 16-byte Folded Reload 77; CHECK-NEON-NEXT: // kill: def $s0 killed $s0 def $q0 78; CHECK-NEON-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 79; CHECK-NEON-NEXT: mov v1.s[3], v0.s[0] 80; CHECK-NEON-NEXT: mov v0.16b, v1.16b 81; CHECK-NEON-NEXT: add sp, sp, #48 82; CHECK-NEON-NEXT: ret 83; 84; CHECK-NO-NEON-LABEL: fmul_pow2_ldexp_4xfloat: 85; CHECK-NO-NEON: // %bb.0: 86; CHECK-NO-NEON-NEXT: str d10, [sp, #-64]! // 8-byte Folded Spill 87; CHECK-NO-NEON-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill 88; CHECK-NO-NEON-NEXT: stp x30, x21, [sp, #32] // 16-byte Folded Spill 89; CHECK-NO-NEON-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 90; CHECK-NO-NEON-NEXT: .cfi_def_cfa_offset 64 91; CHECK-NO-NEON-NEXT: .cfi_offset w19, -8 92; CHECK-NO-NEON-NEXT: .cfi_offset w20, -16 93; CHECK-NO-NEON-NEXT: .cfi_offset w21, -24 94; CHECK-NO-NEON-NEXT: .cfi_offset w30, -32 95; CHECK-NO-NEON-NEXT: .cfi_offset b8, -40 96; CHECK-NO-NEON-NEXT: .cfi_offset b9, -48 97; CHECK-NO-NEON-NEXT: .cfi_offset b10, -64 98; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000 99; CHECK-NO-NEON-NEXT: mov w19, w3 100; CHECK-NO-NEON-NEXT: mov w20, w2 101; CHECK-NO-NEON-NEXT: mov w21, w1 102; CHECK-NO-NEON-NEXT: bl ldexpf 103; CHECK-NO-NEON-NEXT: fmov s8, s0 104; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000 105; CHECK-NO-NEON-NEXT: mov w0, w21 106; CHECK-NO-NEON-NEXT: bl ldexpf 107; CHECK-NO-NEON-NEXT: fmov s9, s0 108; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000 109; CHECK-NO-NEON-NEXT: mov w0, w20 110; CHECK-NO-NEON-NEXT: bl ldexpf 111; CHECK-NO-NEON-NEXT: fmov s10, s0 112; CHECK-NO-NEON-NEXT: fmov s0, #9.00000000 113; CHECK-NO-NEON-NEXT: mov w0, w19 114; CHECK-NO-NEON-NEXT: bl ldexpf 115; CHECK-NO-NEON-NEXT: fmov s3, s0 116; CHECK-NO-NEON-NEXT: fmov s0, s8 117; CHECK-NO-NEON-NEXT: fmov s1, s9 118; CHECK-NO-NEON-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 119; CHECK-NO-NEON-NEXT: fmov s2, s10 120; CHECK-NO-NEON-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload 121; CHECK-NO-NEON-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload 122; CHECK-NO-NEON-NEXT: ldr d10, [sp], #64 // 8-byte Folded Reload 123; CHECK-NO-NEON-NEXT: ret 124 %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i) 125 ret <4 x float> %r 126} 127 128define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) { 129; CHECK-NEON-LABEL: fdiv_pow2_4xfloat: 130; CHECK-NEON: // %bb.0: 131; CHECK-NEON-NEXT: fmov v1.4s, #9.00000000 132; CHECK-NEON-NEXT: shl v0.4s, v0.4s, #23 133; CHECK-NEON-NEXT: sub v0.4s, v1.4s, v0.4s 134; CHECK-NEON-NEXT: ret 135; 136; CHECK-NO-NEON-LABEL: fdiv_pow2_4xfloat: 137; CHECK-NO-NEON: // %bb.0: 138; CHECK-NO-NEON-NEXT: mov w8, #1091567616 // =0x41100000 139; CHECK-NO-NEON-NEXT: sub w9, w8, w0, lsl #23 140; CHECK-NO-NEON-NEXT: sub w10, w8, w1, lsl #23 141; CHECK-NO-NEON-NEXT: sub w11, w8, w2, lsl #23 142; CHECK-NO-NEON-NEXT: sub w8, w8, w3, lsl #23 143; CHECK-NO-NEON-NEXT: fmov s0, w9 144; CHECK-NO-NEON-NEXT: fmov s1, w10 145; CHECK-NO-NEON-NEXT: fmov s2, w11 146; CHECK-NO-NEON-NEXT: fmov s3, w8 147; CHECK-NO-NEON-NEXT: ret 148 %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i 149 %p2_f = uitofp <4 x i32> %p2 to <4 x float> 150 %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f 151 ret <4 x float> %r 152} 153 154define double @fmul_pow_shl_cnt(i64 %cnt) nounwind { 155; CHECK-LABEL: fmul_pow_shl_cnt: 156; CHECK: // %bb.0: 157; CHECK-NEXT: mov w8, #1 // =0x1 158; CHECK-NEXT: fmov d1, #9.00000000 159; CHECK-NEXT: lsl x8, x8, x0 160; CHECK-NEXT: ucvtf d0, x8 161; CHECK-NEXT: fmul d0, d0, d1 162; CHECK-NEXT: ret 163 %shl = shl nuw i64 1, %cnt 164 %conv = uitofp i64 %shl to double 165 %mul = fmul double 9.000000e+00, %conv 166 ret double %mul 167} 168 169define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind { 170; CHECK-LABEL: fmul_pow_shl_cnt2: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mov w8, #2 // =0x2 173; CHECK-NEXT: fmov d1, #-9.00000000 174; CHECK-NEXT: lsl x8, x8, x0 175; CHECK-NEXT: ucvtf d0, x8 176; CHECK-NEXT: fmul d0, d0, d1 177; CHECK-NEXT: ret 178 %shl = shl nuw i64 2, %cnt 179 %conv = uitofp i64 %shl to double 180 %mul = fmul double -9.000000e+00, %conv 181 ret double %mul 182} 183 184define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind { 185; CHECK-LABEL: fmul_pow_select: 186; CHECK: // %bb.0: 187; CHECK-NEXT: mov w8, #1 // =0x1 188; CHECK-NEXT: tst w1, #0x1 189; CHECK-NEXT: fmov s1, #9.00000000 190; CHECK-NEXT: cinc w8, w8, eq 191; CHECK-NEXT: lsl w8, w8, w0 192; CHECK-NEXT: ucvtf s0, w8 193; CHECK-NEXT: fmul s0, s0, s1 194; CHECK-NEXT: ret 195 %shl2 = shl nuw i32 2, %cnt 196 %shl1 = shl nuw i32 1, %cnt 197 %shl = select i1 %c, i32 %shl1, i32 %shl2 198 %conv = uitofp i32 %shl to float 199 %mul = fmul float 9.000000e+00, %conv 200 ret float %mul 201} 202 203define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind { 204; CHECK-LABEL: fmul_fly_pow_mul_min_pow2: 205; CHECK: // %bb.0: 206; CHECK-NEXT: mov w8, #8 // =0x8 207; CHECK-NEXT: mov w9, #8192 // =0x2000 208; CHECK-NEXT: fmov s1, #9.00000000 209; CHECK-NEXT: lsl x8, x8, x0 210; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 211; CHECK-NEXT: csel x8, x8, x9, lo 212; CHECK-NEXT: ucvtf s0, x8 213; CHECK-NEXT: fmul s0, s0, s1 214; CHECK-NEXT: ret 215 %shl8 = shl nuw i64 8, %cnt 216 %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192) 217 %conv = uitofp i64 %shl to float 218 %mul = fmul float 9.000000e+00, %conv 219 ret float %mul 220} 221 222define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind { 223; CHECK-LABEL: fmul_pow_mul_max_pow2: 224; CHECK: // %bb.0: 225; CHECK-NEXT: mov w8, #2 // =0x2 226; CHECK-NEXT: mov w9, #1 // =0x1 227; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 228; CHECK-NEXT: fmov d1, #3.00000000 229; CHECK-NEXT: lsl w8, w8, w0 230; CHECK-NEXT: lsl w9, w9, w0 231; CHECK-NEXT: and w8, w8, #0xfffe 232; CHECK-NEXT: and w9, w9, #0xffff 233; CHECK-NEXT: cmp w9, w8 234; CHECK-NEXT: csel w8, w9, w8, hi 235; CHECK-NEXT: ucvtf d0, w8 236; CHECK-NEXT: fmul d0, d0, d1 237; CHECK-NEXT: ret 238 %shl2 = shl nuw i16 2, %cnt 239 %shl1 = shl nuw i16 1, %cnt 240 %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2) 241 %conv = uitofp i16 %shl to double 242 %mul = fmul double 3.000000e+00, %conv 243 ret double %mul 244} 245 246define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind { 247; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: 248; CHECK: // %bb.0: 249; CHECK-NEXT: lsl x8, x0, x1 250; CHECK-NEXT: fmov d1, #9.00000000 251; CHECK-NEXT: ucvtf d0, x8 252; CHECK-NEXT: fmul d0, d0, d1 253; CHECK-NEXT: ret 254 %shl = shl nuw i64 %v, %cnt 255 %conv = uitofp i64 %shl to double 256 %mul = fmul double 9.000000e+00, %conv 257 ret double %mul 258} 259 260define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind { 261; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: 262; CHECK-NEON: // %bb.0: 263; CHECK-NEON-NEXT: mov w8, #2 // =0x2 264; CHECK-NEON-NEXT: dup v1.2d, x8 265; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d 266; CHECK-NEON-NEXT: fmov v1.2s, #15.00000000 267; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d 268; CHECK-NEON-NEXT: fcvtn v0.2s, v0.2d 269; CHECK-NEON-NEXT: fmul v0.2s, v0.2s, v1.2s 270; CHECK-NEON-NEXT: ret 271; 272; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: 273; CHECK-NO-NEON: // %bb.0: 274; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2 275; CHECK-NO-NEON-NEXT: fmov s2, #15.00000000 276; CHECK-NO-NEON-NEXT: lsl x9, x8, x0 277; CHECK-NO-NEON-NEXT: lsl x8, x8, x1 278; CHECK-NO-NEON-NEXT: ucvtf s1, x8 279; CHECK-NO-NEON-NEXT: ucvtf s0, x9 280; CHECK-NO-NEON-NEXT: fmul s0, s0, s2 281; CHECK-NO-NEON-NEXT: fmul s1, s1, s2 282; CHECK-NO-NEON-NEXT: ret 283 %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt 284 %conv = uitofp <2 x i64> %shl to <2 x float> 285 %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv 286 ret <2 x float> %mul 287} 288 289define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { 290; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec: 291; CHECK-NEON: // %bb.0: 292; CHECK-NEON-NEXT: mov w8, #2 // =0x2 293; CHECK-NEON-NEXT: dup v1.2d, x8 294; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d 295; CHECK-NEON-NEXT: fmov v1.2d, #15.00000000 296; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d 297; CHECK-NEON-NEXT: fmul v0.2d, v0.2d, v1.2d 298; CHECK-NEON-NEXT: ret 299; 300; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec: 301; CHECK-NO-NEON: // %bb.0: 302; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2 303; CHECK-NO-NEON-NEXT: fmov d2, #15.00000000 304; CHECK-NO-NEON-NEXT: lsl x9, x8, x0 305; CHECK-NO-NEON-NEXT: lsl x8, x8, x1 306; CHECK-NO-NEON-NEXT: ucvtf d1, x8 307; CHECK-NO-NEON-NEXT: ucvtf d0, x9 308; CHECK-NO-NEON-NEXT: fmul d0, d0, d2 309; CHECK-NO-NEON-NEXT: fmul d1, d1, d2 310; CHECK-NO-NEON-NEXT: ret 311 %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt 312 %conv = uitofp <2 x i64> %shl to <2 x double> 313 %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv 314 ret <2 x double> %mul 315} 316 317define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind { 318; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: 319; CHECK-NEON: // %bb.0: 320; CHECK-NEON-NEXT: movi v2.4s, #2 321; CHECK-NEON-NEXT: ushl v0.4s, v2.4s, v0.4s 322; CHECK-NEON-NEXT: fmov v2.4s, #5.00000000 323; CHECK-NEON-NEXT: ucvtf v0.4s, v0.4s 324; CHECK-NEON-NEXT: fmul v0.4s, v0.4s, v2.4s 325; CHECK-NEON-NEXT: fadd v0.4s, v0.4s, v1.4s 326; CHECK-NEON-NEXT: ret 327; 328; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_preserve_fma: 329; CHECK-NO-NEON: // %bb.0: 330; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2 331; CHECK-NO-NEON-NEXT: fmov s16, #5.00000000 332; CHECK-NO-NEON-NEXT: lsl w9, w8, w3 333; CHECK-NO-NEON-NEXT: lsl w10, w8, w0 334; CHECK-NO-NEON-NEXT: lsl w11, w8, w2 335; CHECK-NO-NEON-NEXT: lsl w8, w8, w1 336; CHECK-NO-NEON-NEXT: ucvtf s4, w10 337; CHECK-NO-NEON-NEXT: ucvtf s5, w9 338; CHECK-NO-NEON-NEXT: ucvtf s7, w11 339; CHECK-NO-NEON-NEXT: ucvtf s6, w8 340; CHECK-NO-NEON-NEXT: fmul s5, s5, s16 341; CHECK-NO-NEON-NEXT: fmul s4, s4, s16 342; CHECK-NO-NEON-NEXT: fmul s7, s7, s16 343; CHECK-NO-NEON-NEXT: fmul s6, s6, s16 344; CHECK-NO-NEON-NEXT: fadd s0, s4, s0 345; CHECK-NO-NEON-NEXT: fadd s3, s5, s3 346; CHECK-NO-NEON-NEXT: fadd s1, s6, s1 347; CHECK-NO-NEON-NEXT: fadd s2, s7, s2 348; CHECK-NO-NEON-NEXT: ret 349 %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt 350 %conv = uitofp <4 x i32> %shl to <4 x float> 351 %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv 352 %res = fadd <4 x float> %mul, %add 353 ret <4 x float> %res 354} 355 356define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind { 357; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: 358; CHECK-NEON: // %bb.0: 359; CHECK-NEON-NEXT: mov w8, #2 // =0x2 360; CHECK-NEON-NEXT: dup v1.2d, x8 361; CHECK-NEON-NEXT: adrp x8, .LCPI12_0 362; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d 363; CHECK-NEON-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] 364; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d 365; CHECK-NEON-NEXT: fmul v0.2d, v0.2d, v1.2d 366; CHECK-NEON-NEXT: ret 367; 368; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: 369; CHECK-NO-NEON: // %bb.0: 370; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2 371; CHECK-NO-NEON-NEXT: fmov d2, #15.00000000 372; CHECK-NO-NEON-NEXT: fmov d3, #14.00000000 373; CHECK-NO-NEON-NEXT: lsl x9, x8, x0 374; CHECK-NO-NEON-NEXT: lsl x8, x8, x1 375; CHECK-NO-NEON-NEXT: ucvtf d1, x8 376; CHECK-NO-NEON-NEXT: ucvtf d0, x9 377; CHECK-NO-NEON-NEXT: fmul d0, d0, d2 378; CHECK-NO-NEON-NEXT: fmul d1, d1, d3 379; CHECK-NO-NEON-NEXT: ret 380 %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt 381 %conv = uitofp <2 x i64> %shl to <2 x double> 382 %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv 383 ret <2 x double> %mul 384} 385 386define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind { 387; CHECK-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: 388; CHECK-NEON: // %bb.0: 389; CHECK-NEON-NEXT: adrp x8, .LCPI13_0 390; CHECK-NEON-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] 391; CHECK-NEON-NEXT: ushl v0.2d, v1.2d, v0.2d 392; CHECK-NEON-NEXT: fmov v1.2d, #15.00000000 393; CHECK-NEON-NEXT: ucvtf v0.2d, v0.2d 394; CHECK-NEON-NEXT: fmul v0.2d, v0.2d, v1.2d 395; CHECK-NEON-NEXT: ret 396; 397; CHECK-NO-NEON-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: 398; CHECK-NO-NEON: // %bb.0: 399; CHECK-NO-NEON-NEXT: mov w8, #2 // =0x2 400; CHECK-NO-NEON-NEXT: mov w9, #1 // =0x1 401; CHECK-NO-NEON-NEXT: fmov d2, #15.00000000 402; CHECK-NO-NEON-NEXT: lsl x8, x8, x0 403; CHECK-NO-NEON-NEXT: lsl x9, x9, x1 404; CHECK-NO-NEON-NEXT: ucvtf d1, x9 405; CHECK-NO-NEON-NEXT: ucvtf d0, x8 406; CHECK-NO-NEON-NEXT: fmul d0, d0, d2 407; CHECK-NO-NEON-NEXT: fmul d1, d1, d2 408; CHECK-NO-NEON-NEXT: ret 409 %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt 410 %conv = uitofp <2 x i64> %shl to <2 x double> 411 %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv 412 ret <2 x double> %mul 413} 414 415 416define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind { 417; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: 418; CHECK: // %bb.0: 419; CHECK-NEXT: mov w8, #1 // =0x1 420; CHECK-NEXT: lsl x8, x8, x0 421; CHECK-NEXT: ucvtf d0, x8 422; CHECK-NEXT: adrp x8, .LCPI14_0 423; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] 424; CHECK-NEXT: fmul d0, d0, d1 425; CHECK-NEXT: ret 426 %shl = shl nuw i64 1, %cnt 427 %conv = uitofp i64 %shl to double 428 %mul = fmul double 9.745314e+288, %conv 429 ret double %mul 430} 431 432define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind { 433; CHECK-LABEL: fmul_pow_shl_cnt_safe: 434; CHECK: // %bb.0: 435; CHECK-NEXT: mov w8, #1 // =0x1 436; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 437; CHECK-NEXT: lsl w8, w8, w0 438; CHECK-NEXT: and w8, w8, #0xffff 439; CHECK-NEXT: ucvtf d0, w8 440; CHECK-NEXT: adrp x8, .LCPI15_0 441; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] 442; CHECK-NEXT: fmul d0, d0, d1 443; CHECK-NEXT: ret 444 %shl = shl nuw i16 1, %cnt 445 %conv = uitofp i16 %shl to double 446 %mul = fmul double 9.745314e+288, %conv 447 ret double %mul 448} 449 450define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind { 451; CHECK-NEON-LABEL: fdiv_pow_shl_cnt_vec: 452; CHECK-NEON: // %bb.0: 453; CHECK-NEON-NEXT: fmov v1.2d, #1.00000000 454; CHECK-NEON-NEXT: shl v0.2d, v0.2d, #52 455; CHECK-NEON-NEXT: sub v0.2d, v1.2d, v0.2d 456; CHECK-NEON-NEXT: ret 457; 458; CHECK-NO-NEON-LABEL: fdiv_pow_shl_cnt_vec: 459; CHECK-NO-NEON: // %bb.0: 460; CHECK-NO-NEON-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000 461; CHECK-NO-NEON-NEXT: sub x9, x8, x0, lsl #52 462; CHECK-NO-NEON-NEXT: sub x8, x8, x1, lsl #52 463; CHECK-NO-NEON-NEXT: fmov d0, x9 464; CHECK-NO-NEON-NEXT: fmov d1, x8 465; CHECK-NO-NEON-NEXT: ret 466 %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt 467 %conv = uitofp <2 x i64> %shl to <2 x double> 468 %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv 469 ret <2 x double> %mul 470} 471 472define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind { 473; CHECK-NEON-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast: 474; CHECK-NEON: // %bb.0: 475; CHECK-NEON-NEXT: xtn v0.2s, v0.2d 476; CHECK-NEON-NEXT: fmov v1.2s, #1.00000000 477; CHECK-NEON-NEXT: shl v0.2s, v0.2s, #23 478; CHECK-NEON-NEXT: sub v0.2s, v1.2s, v0.2s 479; CHECK-NEON-NEXT: ret 480; 481; CHECK-NO-NEON-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast: 482; CHECK-NO-NEON: // %bb.0: 483; CHECK-NO-NEON-NEXT: mov w8, #1065353216 // =0x3f800000 484; CHECK-NO-NEON-NEXT: sub w9, w8, w0, lsl #23 485; CHECK-NO-NEON-NEXT: sub w8, w8, w1, lsl #23 486; CHECK-NO-NEON-NEXT: fmov s0, w9 487; CHECK-NO-NEON-NEXT: fmov s1, w8 488; CHECK-NO-NEON-NEXT: ret 489 %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt 490 %conv = uitofp <2 x i64> %shl to <2 x float> 491 %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv 492 ret <2 x float> %mul 493} 494 495define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind { 496; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: 497; CHECK: // %bb.0: 498; CHECK-NEXT: mov w8, #8 // =0x8 499; CHECK-NEXT: fmov s1, #-9.00000000 500; CHECK-NEXT: lsl x8, x8, x0 501; CHECK-NEXT: ucvtf s0, x8 502; CHECK-NEXT: fdiv s0, s1, s0 503; CHECK-NEXT: ret 504 %shl = shl i64 8, %cnt 505 %conv = uitofp i64 %shl to float 506 %mul = fdiv float -9.000000e+00, %conv 507 ret float %mul 508} 509 510define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind { 511; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int: 512; CHECK: // %bb.0: 513; CHECK-NEXT: mov w8, #8 // =0x8 514; CHECK-NEXT: fmov s1, #-9.00000000 515; CHECK-NEXT: lsl x8, x8, x0 516; CHECK-NEXT: scvtf s0, x8 517; CHECK-NEXT: fdiv s0, s1, s0 518; CHECK-NEXT: ret 519 %shl = shl i64 8, %cnt 520 %conv = sitofp i64 %shl to float 521 %mul = fdiv float -9.000000e+00, %conv 522 ret float %mul 523} 524 525define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind { 526; CHECK-LABEL: fdiv_pow_shl_cnt: 527; CHECK: // %bb.0: 528; CHECK-NEXT: mov w8, #-1115684864 // =0xbd800000 529; CHECK-NEXT: and w9, w0, #0x1f 530; CHECK-NEXT: sub w8, w8, w9, lsl #23 531; CHECK-NEXT: fmov s0, w8 532; CHECK-NEXT: ret 533 %cnt = and i64 %cnt_in, 31 534 %shl = shl i64 8, %cnt 535 %conv = sitofp i64 %shl to float 536 %mul = fdiv float -0.500000e+00, %conv 537 ret float %mul 538} 539 540define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind { 541; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay: 542; CHECK: // %bb.0: 543; CHECK-NEXT: mov x8, #3936146074321813504 // =0x36a0000000000000 544; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 545; CHECK-NEXT: sub x8, x8, x0, lsl #52 546; CHECK-NEXT: fmov d0, x8 547; CHECK-NEXT: ret 548 %shl = shl nuw i32 1, %cnt 549 %conv = uitofp i32 %shl to double 550 %mul = fdiv double 0x36A0000000000000, %conv 551 ret double %mul 552} 553 554define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind { 555; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2: 556; CHECK: // %bb.0: 557; CHECK-NEXT: mov w8, #1 // =0x1 558; CHECK-NEXT: lsl w8, w8, w0 559; CHECK-NEXT: ucvtf s0, w8 560; CHECK-NEXT: mov w8, #65528 // =0xfff8 561; CHECK-NEXT: movk w8, #4351, lsl #16 562; CHECK-NEXT: fmov s1, w8 563; CHECK-NEXT: fdiv s0, s1, s0 564; CHECK-NEXT: ret 565 %shl = shl nuw i32 1, %cnt 566 %conv = uitofp i32 %shl to float 567 %mul = fdiv float 0x3a1fffff00000000, %conv 568 ret float %mul 569} 570 571define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind { 572; CHECK-LABEL: fdiv_pow_shl_cnt32_okay: 573; CHECK: // %bb.0: 574; CHECK-NEXT: mov w8, #285212672 // =0x11000000 575; CHECK-NEXT: sub w8, w8, w0, lsl #23 576; CHECK-NEXT: fmov s0, w8 577; CHECK-NEXT: ret 578 %shl = shl nuw i32 1, %cnt 579 %conv = uitofp i32 %shl to float 580 %mul = fdiv float 0x3a20000000000000, %conv 581 ret float %mul 582} 583 584define fastcc i1 @quantum_hadamard(i32 %0) { 585; CHECK-LABEL: quantum_hadamard: 586; CHECK: // %bb.0: 587; CHECK-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000 588; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 589; CHECK-NEXT: sub x8, x8, x0, lsl #52 590; CHECK-NEXT: fmov d0, x8 591; CHECK-NEXT: fcvt s0, d0 592; CHECK-NEXT: fcmp s0, #0.0 593; CHECK-NEXT: cset w0, gt 594; CHECK-NEXT: ret 595 %2 = zext i32 %0 to i64 596 %3 = shl i64 1, %2 597 %4 = uitofp i64 %3 to double 598 %5 = fdiv double 1.000000e+00, %4 599 %6 = fptrunc double %5 to float 600 %7 = fcmp olt float 0.000000e+00, %6 601 ret i1 %7 602} 603 604define <vscale x 4 x float> @fdiv_pow2_nx4xfloat(<vscale x 4 x i32> %i) "target-features"="+sve" { 605; CHECK-LABEL: fdiv_pow2_nx4xfloat: 606; CHECK: // %bb.0: 607; CHECK-NEXT: mov z1.s, #1 // =0x1 608; CHECK-NEXT: ptrue p0.s 609; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s 610; CHECK-NEXT: fmov z1.s, #9.00000000 611; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s 612; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s 613; CHECK-NEXT: ret 614 %p2 = shl <vscale x 4 x i32> splat (i32 1), %i 615 %p2_f = uitofp <vscale x 4 x i32> %p2 to <vscale x 4 x float> 616 %r = fdiv <vscale x 4 x float> splat (float 9.000000e+00), %p2_f 617 ret <vscale x 4 x float> %r 618} 619 620define <vscale x 2 x double> @scalable2(<vscale x 2 x i64> %0) "target-features"="+sve" { 621; CHECK-LABEL: scalable2: 622; CHECK: // %bb.0: 623; CHECK-NEXT: ptrue p0.d 624; CHECK-NEXT: fmov z1.d, #1.00000000 625; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d 626; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d 627; CHECK-NEXT: ret 628 %2 = uitofp <vscale x 2 x i64> %0 to <vscale x 2 x double> 629 %3 = fdiv <vscale x 2 x double> splat (double 1.000000e+00), %2 630 ret <vscale x 2 x double> %3 631} 632