1; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs 2; PR11829 3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" 4target triple = "armv7-none-linux-gnueabi" 5 6define arm_aapcs_vfpcc void @foo(ptr nocapture %arg) nounwind uwtable align 2 { 7bb: 8 br i1 undef, label %bb1, label %bb2 9 10bb1: ; preds = %bb 11 unreachable 12 13bb2: ; preds = %bb 14 br label %bb3 15 16bb3: ; preds = %bb4, %bb2 17 %tmp = icmp slt i32 undef, undef 18 br i1 %tmp, label %bb4, label %bb67 19 20bb4: ; preds = %bb3 21 %tmp5 = load <4 x i32>, ptr undef, align 16 22 %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607> 23 %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216> 24 %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float> 25 %constexpr = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128 26 %constexpr1 = lshr i128 %constexpr, 64 27 %constexpr2 = trunc i128 %constexpr1 to i64 28 %constexpr3 = zext i64 %constexpr2 to i128 29 %constexpr4 = shl i128 %constexpr3, 64 30 %constexpr5 = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128 31 %constexpr6 = trunc i128 %constexpr5 to i64 32 %constexpr7 = zext i64 %constexpr6 to i128 33 %or = or i128 %constexpr4, %constexpr7 34 %bc = bitcast i128 %or to <4 x float> 35 %tmp9 = fsub <4 x float> %tmp8, %bc 36 %tmp10 = fmul <4 x float> undef, %tmp9 37 %tmp11 = fadd <4 x float> undef, %tmp10 38 %tmp12 = bitcast <4 x float> zeroinitializer to i128 39 %tmp13 = lshr i128 %tmp12, 64 40 %tmp14 = trunc i128 %tmp13 to i64 41 %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1 42 %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) #3 43 %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) #3 44 %tmp18 = fmul <4 x float> %tmp17, %tmp16 45 %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) #3 46 %tmp20 = fmul <4 x float> %tmp19, %tmp18 47 %tmp21 = fmul <4 x float> %tmp20, zeroinitializer 48 %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) #3 49 call arm_aapcs_vfpcc void @bar(ptr null, ptr undef, ptr undef, [2 x i64] zeroinitializer) #3 50 %tmp23 = bitcast <4 x float> %tmp22 to i128 51 %tmp24 = trunc i128 %tmp23 to i64 52 %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0 53 %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1 54 %tmp27 = load float, ptr undef, align 4 55 %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3 56 %tmp29 = load <4 x i32>, ptr undef, align 16 57 %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607> 58 %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216> 59 %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float> 60 %constexpr8 = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128 61 %constexpr9 = lshr i128 %constexpr8, 64 62 %constexpr10 = trunc i128 %constexpr9 to i64 63 %constexpr11 = zext i64 %constexpr10 to i128 64 %constexpr12 = shl i128 %constexpr11, 64 65 %constexpr13 = bitcast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128 66 %constexpr14 = trunc i128 %constexpr13 to i64 67 %constexpr15 = zext i64 %constexpr14 to i128 68 %or2 = or i128 %constexpr12, %constexpr15 69 %bc2 = bitcast i128 %or2 to <4 x float> 70 %tmp33 = fsub <4 x float> %tmp32, %bc2 71 %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) #3 72 %tmp35 = fmul <4 x float> %tmp34, undef 73 %tmp36 = fmul <4 x float> %tmp35, undef 74 %tmp37 = call arm_aapcs_vfpcc ptr undef(ptr undef) #3 75 %tmp38 = load float, ptr undef, align 4 76 %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0 77 %tmp40 = call arm_aapcs_vfpcc ptr undef(ptr undef) #3 78 %tmp41 = load float, ptr undef, align 4 79 %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3 80 %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer 81 %tmp44 = fmul <4 x float> %tmp33, %tmp43 82 %tmp45 = fadd <4 x float> %tmp42, %tmp44 83 %tmp46 = fsub <4 x float> %tmp45, undef 84 %tmp47 = fmul <4 x float> %tmp46, %tmp36 85 %tmp48 = fadd <4 x float> undef, %tmp47 86 %tmp49 = call arm_aapcs_vfpcc ptr undef(ptr undef) #3 87 %tmp50 = load float, ptr undef, align 4 88 %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3 89 %tmp52 = call arm_aapcs_vfpcc ptr null(ptr undef) #3 90 %tmp54 = load float, ptr %tmp52, align 4 91 %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3 92 %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22 93 %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) #3 94 %tmp58 = fmul <4 x float> undef, %tmp57 95 %tmp59 = fsub <4 x float> %tmp51, %tmp48 96 %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58 97 %tmp61 = fmul <4 x float> %tmp59, %tmp60 98 %tmp62 = fadd <4 x float> %tmp48, %tmp61 99 call arm_aapcs_vfpcc void @baz(ptr undef, ptr undef, [2 x i64] %tmp26, ptr undef) 100 %tmp63 = bitcast <4 x float> %tmp62 to i128 101 %tmp64 = lshr i128 %tmp63, 64 102 %tmp65 = trunc i128 %tmp64 to i64 103 %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1 104 call arm_aapcs_vfpcc void @quux(ptr undef, ptr undef, [2 x i64] undef, ptr undef, [2 x i64] %tmp66, ptr undef, ptr undef, [2 x i64] %tmp26, [2 x i64] %tmp15, ptr undef) 105 br label %bb3 106 107bb67: ; preds = %bb3 108 ret void 109} 110 111declare arm_aapcs_vfpcc void @bar(ptr, ptr, ptr, [2 x i64]) 112 113declare arm_aapcs_vfpcc void @baz(ptr, ptr nocapture, [2 x i64], ptr nocapture) nounwind uwtable inlinehint align 2 114 115declare arm_aapcs_vfpcc void @quux(ptr, ptr, [2 x i64], ptr nocapture, [2 x i64], ptr nocapture, ptr nocapture, [2 x i64], [2 x i64], ptr nocapture) nounwind uwtable inlinehint align 2 116 117declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone 118 119declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone 120 121declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone 122