1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s 3 4; Test signed conversion. 5define arm_aapcs_vfpcc <2 x float> @t1(<2 x i32> %vecinit2.i) nounwind { 6; CHECK-LABEL: t1: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3 9; CHECK-NEXT: bx lr 10entry: 11 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 12 %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00> 13 ret <2 x float> %div.i 14} 15 16; Test unsigned conversion. 17define arm_aapcs_vfpcc <2 x float> @t2(<2 x i32> %vecinit2.i) nounwind { 18; CHECK-LABEL: t2: 19; CHECK: @ %bb.0: @ %entry 20; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3 21; CHECK-NEXT: bx lr 22entry: 23 %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> 24 %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00> 25 ret <2 x float> %div.i 26} 27 28; Test which should not fold due to non-power of 2. 29define arm_aapcs_vfpcc <2 x float> @t3(<2 x i32> %vecinit2.i) nounwind { 30; CHECK-LABEL: t3: 31; CHECK: @ %bb.0: @ %entry 32; CHECK-NEXT: vcvt.f32.s32 d2, d0 33; CHECK-NEXT: vldr s2, LCPI2_0 34; CHECK-NEXT: vdiv.f32 s1, s5, s2 35; CHECK-NEXT: vdiv.f32 s0, s4, s2 36; CHECK-NEXT: bx lr 37; CHECK-NEXT: .p2align 2 38; CHECK-NEXT: @ %bb.1: 39; CHECK-NEXT: .data_region 40; CHECK-NEXT: LCPI2_0: 41; CHECK-NEXT: .long 0x40d9999a @ float 6.80000019 42; CHECK-NEXT: .end_data_region 43entry: 44 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 45 %div.i = fdiv <2 x float> %vcvt.i, <float 0x401B333340000000, float 0x401B333340000000> 46 ret <2 x float> %div.i 47} 48 49; Test which should not fold due to power of 2 out of range. 50define arm_aapcs_vfpcc <2 x float> @t4(<2 x i32> %vecinit2.i) nounwind { 51; CHECK-LABEL: t4: 52; CHECK: @ %bb.0: @ %entry 53; CHECK-NEXT: vcvt.f32.s32 d16, d0 54; CHECK-NEXT: vmov.i32 d17, #0x2f000000 55; CHECK-NEXT: vmul.f32 d0, d16, d17 56; CHECK-NEXT: bx lr 57entry: 58 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 59 %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000> 60 ret <2 x float> %div.i 61} 62 63; Test case where const is max power of 2 (i.e., 2^32). 64define arm_aapcs_vfpcc <2 x float> @t5(<2 x i32> %vecinit2.i) nounwind { 65; CHECK-LABEL: t5: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32 68; CHECK-NEXT: bx lr 69entry: 70 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 71 %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000> 72 ret <2 x float> %div.i 73} 74 75; Test quadword. 76define arm_aapcs_vfpcc <4 x float> @t6(<4 x i32> %vecinit6.i) nounwind { 77; CHECK-LABEL: t6: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 80; CHECK-NEXT: bx lr 81entry: 82 %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float> 83 %div.i = fdiv <4 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00> 84 ret <4 x float> %div.i 85} 86 87define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) { 88; CHECK-LABEL: fix_unsigned_i16_to_float: 89; CHECK: @ %bb.0: 90; CHECK-NEXT: vmovl.u16 q8, d0 91; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1 92; CHECK-NEXT: bx lr 93 %conv = uitofp <4 x i16> %in to <4 x float> 94 %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0> 95 ret <4 x float> %shift 96} 97 98define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) { 99; CHECK-LABEL: fix_signed_i16_to_float: 100; CHECK: @ %bb.0: 101; CHECK-NEXT: vmovl.s16 q8, d0 102; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1 103; CHECK-NEXT: bx lr 104 %conv = sitofp <4 x i16> %in to <4 x float> 105 %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0> 106 ret <4 x float> %shift 107} 108 109define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float(<2 x i64> %in) { 110; CHECK-LABEL: fix_i64_to_float: 111; CHECK: @ %bb.0: 112; CHECK-NEXT: push {lr} 113; CHECK-NEXT: vpush {d8, d9} 114; CHECK-NEXT: vorr q4, q0, q0 115; CHECK-NEXT: vmov r0, r1, d9 116; CHECK-NEXT: bl ___floatundisf 117; CHECK-NEXT: vmov r2, r1, d8 118; CHECK-NEXT: vmov s19, r0 119; CHECK-NEXT: vmov.i32 d8, #0x3f000000 120; CHECK-NEXT: mov r0, r2 121; CHECK-NEXT: bl ___floatundisf 122; CHECK-NEXT: vmov s18, r0 123; CHECK-NEXT: vmul.f32 d0, d9, d8 124; CHECK-NEXT: vpop {d8, d9} 125; CHECK-NEXT: pop {lr} 126; CHECK-NEXT: bx lr 127 %conv = uitofp <2 x i64> %in to <2 x float> 128 %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0> 129 ret <2 x float> %shift 130} 131 132define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double(<2 x i64> %in) { 133; CHECK-LABEL: fix_i64_to_double: 134; CHECK: @ %bb.0: 135; CHECK-NEXT: push {lr} 136; CHECK-NEXT: vpush {d8, d9} 137; CHECK-NEXT: vorr q4, q0, q0 138; CHECK-NEXT: vmov r0, r1, d9 139; CHECK-NEXT: bl ___floatundidf 140; CHECK-NEXT: vmov r2, r3, d8 141; CHECK-NEXT: vmov d9, r0, r1 142; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 143; CHECK-NEXT: mov r0, r2 144; CHECK-NEXT: mov r1, r3 145; CHECK-NEXT: bl ___floatundidf 146; CHECK-NEXT: vmov d16, r0, r1 147; CHECK-NEXT: vmul.f64 d1, d9, d8 148; CHECK-NEXT: vmul.f64 d0, d16, d8 149; CHECK-NEXT: vpop {d8, d9} 150; CHECK-NEXT: pop {lr} 151; CHECK-NEXT: bx lr 152 %conv = uitofp <2 x i64> %in to <2 x double> 153 %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0> 154 ret <2 x double> %shift 155} 156 157; Don't combine with 8 lanes. Just make sure things don't crash. 158define arm_aapcs_vfpcc <8 x float> @test7(<8 x i32> %in) nounwind { 159; CHECK-LABEL: test7: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 162; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3 163; CHECK-NEXT: bx lr 164entry: 165 %vcvt.i = sitofp <8 x i32> %in to <8 x float> 166 %div.i = fdiv <8 x float> %vcvt.i, <float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0> 167 ret <8 x float> %div.i 168} 169 170; Can combine splat with an undef. 171define arm_aapcs_vfpcc <4 x float> @test8(<4 x i32> %in) { 172; CHECK-LABEL: test8: 173; CHECK: @ %bb.0: 174; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1 175; CHECK-NEXT: bx lr 176 %vcvt.i = sitofp <4 x i32> %in to <4 x float> 177 %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef> 178 ret <4 x float> %div.i 179} 180 181define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp(<3 x i32> %in) { 182; CHECK-LABEL: test_illegal_int_to_fp: 183; CHECK: @ %bb.0: 184; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 185; CHECK-NEXT: bx lr 186 %conv = sitofp <3 x i32> %in to <3 x float> 187 %res = fdiv <3 x float> %conv, <float 4.0, float 4.0, float 4.0> 188 ret <3 x float> %res 189} 190 191 192define arm_aapcs_vfpcc <2 x float> @t1_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 { 193; CHECK-LABEL: t1_mul: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: vcvt.f32.s32 d0, d0, #3 196; CHECK-NEXT: bx lr 197entry: 198 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 199 %div.i = fmul <2 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01> 200 ret <2 x float> %div.i 201} 202 203define arm_aapcs_vfpcc <2 x float> @t2_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 { 204; CHECK-LABEL: t2_mul: 205; CHECK: @ %bb.0: @ %entry 206; CHECK-NEXT: vcvt.f32.u32 d0, d0, #3 207; CHECK-NEXT: bx lr 208entry: 209 %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float> 210 %div.i = fmul <2 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01> 211 ret <2 x float> %div.i 212} 213 214define arm_aapcs_vfpcc <2 x float> @t4_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 { 215; CHECK-LABEL: t4_mul: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vcvt.f32.s32 d16, d0 218; CHECK-NEXT: vmov.i32 d17, #0x2f000000 219; CHECK-NEXT: vmul.f32 d0, d16, d17 220; CHECK-NEXT: bx lr 221entry: 222 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 223 %div.i = fmul <2 x float> %vcvt.i, <float 0x3DE0000000000000, float 0x3DE0000000000000> 224 ret <2 x float> %div.i 225} 226 227define arm_aapcs_vfpcc <2 x float> @t5_mul(<2 x i32> %vecinit2.i) local_unnamed_addr #0 { 228; CHECK-LABEL: t5_mul: 229; CHECK: @ %bb.0: @ %entry 230; CHECK-NEXT: vcvt.f32.s32 d0, d0, #32 231; CHECK-NEXT: bx lr 232entry: 233 %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float> 234 %div.i = fmul <2 x float> %vcvt.i, <float 0x3DF0000000000000, float 0x3DF0000000000000> 235 ret <2 x float> %div.i 236} 237 238define arm_aapcs_vfpcc <4 x float> @t6_mul(<4 x i32> %vecinit6.i) local_unnamed_addr #0 { 239; CHECK-LABEL: t6_mul: 240; CHECK: @ %bb.0: @ %entry 241; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 242; CHECK-NEXT: bx lr 243entry: 244 %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float> 245 %div.i = fmul <4 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01> 246 ret <4 x float> %div.i 247} 248 249define arm_aapcs_vfpcc <4 x float> @fix_unsigned_i16_to_float_mul(<4 x i16> %in) local_unnamed_addr #0 { 250; CHECK-LABEL: fix_unsigned_i16_to_float_mul: 251; CHECK: @ %bb.0: 252; CHECK-NEXT: vmovl.u16 q8, d0 253; CHECK-NEXT: vcvt.f32.u32 q0, q8, #1 254; CHECK-NEXT: bx lr 255 %conv = uitofp <4 x i16> %in to <4 x float> 256 %shift = fmul <4 x float> %conv, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> 257 ret <4 x float> %shift 258} 259 260define arm_aapcs_vfpcc <4 x float> @fix_signed_i16_to_float_mul(<4 x i16> %in) local_unnamed_addr #0 { 261; CHECK-LABEL: fix_signed_i16_to_float_mul: 262; CHECK: @ %bb.0: 263; CHECK-NEXT: vmovl.s16 q8, d0 264; CHECK-NEXT: vcvt.f32.s32 q0, q8, #1 265; CHECK-NEXT: bx lr 266 %conv = sitofp <4 x i16> %in to <4 x float> 267 %shift = fmul <4 x float> %conv, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> 268 ret <4 x float> %shift 269} 270 271define arm_aapcs_vfpcc <2 x float> @fix_i64_to_float_mul(<2 x i64> %in) local_unnamed_addr #0 { 272; CHECK-LABEL: fix_i64_to_float_mul: 273; CHECK: @ %bb.0: 274; CHECK-NEXT: push {lr} 275; CHECK-NEXT: vpush {d8, d9} 276; CHECK-NEXT: vorr q4, q0, q0 277; CHECK-NEXT: vmov r0, r1, d9 278; CHECK-NEXT: bl ___floatundisf 279; CHECK-NEXT: vmov r2, r1, d8 280; CHECK-NEXT: vmov s19, r0 281; CHECK-NEXT: vmov.i32 d8, #0x3f000000 282; CHECK-NEXT: mov r0, r2 283; CHECK-NEXT: bl ___floatundisf 284; CHECK-NEXT: vmov s18, r0 285; CHECK-NEXT: vmul.f32 d0, d9, d8 286; CHECK-NEXT: vpop {d8, d9} 287; CHECK-NEXT: pop {lr} 288; CHECK-NEXT: bx lr 289 %conv = uitofp <2 x i64> %in to <2 x float> 290 %shift = fmul <2 x float> %conv, <float 5.000000e-01, float 5.000000e-01> 291 ret <2 x float> %shift 292} 293 294define arm_aapcs_vfpcc <2 x double> @fix_i64_to_double_mul(<2 x i64> %in) local_unnamed_addr #0 { 295; CHECK-LABEL: fix_i64_to_double_mul: 296; CHECK: @ %bb.0: 297; CHECK-NEXT: push {lr} 298; CHECK-NEXT: vpush {d8, d9} 299; CHECK-NEXT: vorr q4, q0, q0 300; CHECK-NEXT: vmov r0, r1, d9 301; CHECK-NEXT: bl ___floatundidf 302; CHECK-NEXT: vmov r2, r3, d8 303; CHECK-NEXT: vmov d9, r0, r1 304; CHECK-NEXT: vmov.f64 d8, #5.000000e-01 305; CHECK-NEXT: mov r0, r2 306; CHECK-NEXT: mov r1, r3 307; CHECK-NEXT: bl ___floatundidf 308; CHECK-NEXT: vmov d16, r0, r1 309; CHECK-NEXT: vmul.f64 d1, d9, d8 310; CHECK-NEXT: vmul.f64 d0, d16, d8 311; CHECK-NEXT: vpop {d8, d9} 312; CHECK-NEXT: pop {lr} 313; CHECK-NEXT: bx lr 314 %conv = uitofp <2 x i64> %in to <2 x double> 315 %shift = fmul <2 x double> %conv, <double 5.000000e-01, double 5.000000e-01> 316 ret <2 x double> %shift 317} 318 319define arm_aapcs_vfpcc <8 x float> @test7_mul(<8 x i32> %in) local_unnamed_addr #0 { 320; CHECK-LABEL: test7_mul: 321; CHECK: @ %bb.0: @ %entry 322; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 323; CHECK-NEXT: vcvt.f32.s32 q1, q1, #3 324; CHECK-NEXT: bx lr 325entry: 326 %vcvt.i = sitofp <8 x i32> %in to <8 x float> 327 %div.i = fmul <8 x float> %vcvt.i, <float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01, float 1.250000e-01> 328 ret <8 x float> %div.i 329} 330 331define arm_aapcs_vfpcc <3 x float> @test_illegal_int_to_fp_mul(<3 x i32> %in) local_unnamed_addr #0 { 332; CHECK-LABEL: test_illegal_int_to_fp_mul: 333; CHECK: @ %bb.0: 334; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 335; CHECK-NEXT: bx lr 336 %conv = sitofp <3 x i32> %in to <3 x float> 337 %res = fmul <3 x float> %conv, <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01> 338 ret <3 x float> %res 339} 340