1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP 4 5define arm_aapcs_vfpcc <4 x float> @foo_float_int32(<4 x i32> %src) { 6; CHECK-MVE-LABEL: foo_float_int32: 7; CHECK-MVE: @ %bb.0: @ %entry 8; CHECK-MVE-NEXT: vcvt.f32.s32 s3, s3 9; CHECK-MVE-NEXT: vcvt.f32.s32 s2, s2 10; CHECK-MVE-NEXT: vcvt.f32.s32 s1, s1 11; CHECK-MVE-NEXT: vcvt.f32.s32 s0, s0 12; CHECK-MVE-NEXT: bx lr 13; 14; CHECK-MVEFP-LABEL: foo_float_int32: 15; CHECK-MVEFP: @ %bb.0: @ %entry 16; CHECK-MVEFP-NEXT: vcvt.f32.s32 q0, q0 17; CHECK-MVEFP-NEXT: bx lr 18entry: 19 %out = sitofp <4 x i32> %src to <4 x float> 20 ret <4 x float> %out 21} 22 23define arm_aapcs_vfpcc <4 x float> @foo_float_uint32(<4 x i32> %src) { 24; CHECK-MVE-LABEL: foo_float_uint32: 25; CHECK-MVE: @ %bb.0: @ %entry 26; CHECK-MVE-NEXT: vcvt.f32.u32 s3, s3 27; CHECK-MVE-NEXT: vcvt.f32.u32 s2, s2 28; CHECK-MVE-NEXT: vcvt.f32.u32 s1, s1 29; CHECK-MVE-NEXT: vcvt.f32.u32 s0, s0 30; CHECK-MVE-NEXT: bx lr 31; 32; CHECK-MVEFP-LABEL: foo_float_uint32: 33; CHECK-MVEFP: @ %bb.0: @ %entry 34; CHECK-MVEFP-NEXT: vcvt.f32.u32 q0, q0 35; CHECK-MVEFP-NEXT: bx lr 36entry: 37 %out = uitofp <4 x i32> %src to <4 x float> 38 ret <4 x float> %out 39} 40 41define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) { 42; CHECK-MVE-LABEL: foo_int32_float: 43; CHECK-MVE: @ %bb.0: @ %entry 44; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2 45; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 46; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s3 47; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1 48; CHECK-MVE-NEXT: vmov r0, s2 49; CHECK-MVE-NEXT: vmov r1, s0 50; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 51; CHECK-MVE-NEXT: vmov r0, s4 52; CHECK-MVE-NEXT: vmov r1, s6 53; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0 54; CHECK-MVE-NEXT: bx lr 55; 56; CHECK-MVEFP-LABEL: foo_int32_float: 57; CHECK-MVEFP: @ %bb.0: @ %entry 58; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0 59; CHECK-MVEFP-NEXT: bx lr 60entry: 61 %out = fptosi <4 x float> %src to <4 x i32> 62 ret <4 x i32> %out 63} 64 65define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) { 66; CHECK-MVE-LABEL: foo_uint32_float: 67; CHECK-MVE: @ %bb.0: @ %entry 68; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 69; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 70; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s3 71; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s1 72; CHECK-MVE-NEXT: vmov r0, s2 73; CHECK-MVE-NEXT: vmov r1, s0 74; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 75; CHECK-MVE-NEXT: vmov r0, s4 76; CHECK-MVE-NEXT: vmov r1, s6 77; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0 78; CHECK-MVE-NEXT: bx lr 79; 80; CHECK-MVEFP-LABEL: foo_uint32_float: 81; CHECK-MVEFP: @ %bb.0: @ %entry 82; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0 83; CHECK-MVEFP-NEXT: bx lr 84entry: 85 %out = fptoui <4 x float> %src to <4 x i32> 86 ret <4 x i32> %out 87} 88 89define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) { 90; CHECK-MVE-LABEL: foo_half_int16: 91; CHECK-MVE: @ %bb.0: @ %entry 92; CHECK-MVE-NEXT: vmov q1, q0 93; CHECK-MVE-NEXT: vmov.s16 r0, q0[0] 94; CHECK-MVE-NEXT: vmov s0, r0 95; CHECK-MVE-NEXT: vmov.s16 r0, q1[1] 96; CHECK-MVE-NEXT: vmov s2, r0 97; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0 98; CHECK-MVE-NEXT: vcvt.f16.s32 s2, s2 99; CHECK-MVE-NEXT: vmov.s16 r0, q1[3] 100; CHECK-MVE-NEXT: vins.f16 s0, s2 101; CHECK-MVE-NEXT: vmov s2, r0 102; CHECK-MVE-NEXT: vmov.s16 r0, q1[2] 103; CHECK-MVE-NEXT: vcvt.f16.s32 s2, s2 104; CHECK-MVE-NEXT: vmov s8, r0 105; CHECK-MVE-NEXT: vmov.s16 r0, q1[4] 106; CHECK-MVE-NEXT: vcvt.f16.s32 s1, s8 107; CHECK-MVE-NEXT: vins.f16 s1, s2 108; CHECK-MVE-NEXT: vmov s2, r0 109; CHECK-MVE-NEXT: vmov.s16 r0, q1[5] 110; CHECK-MVE-NEXT: vcvt.f16.s32 s2, s2 111; CHECK-MVE-NEXT: vmov s8, r0 112; CHECK-MVE-NEXT: vmov.s16 r0, q1[7] 113; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 114; CHECK-MVE-NEXT: vins.f16 s2, s8 115; CHECK-MVE-NEXT: vmov s8, r0 116; CHECK-MVE-NEXT: vmov.s16 r0, q1[6] 117; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 118; CHECK-MVE-NEXT: vmov s4, r0 119; CHECK-MVE-NEXT: vcvt.f16.s32 s3, s4 120; CHECK-MVE-NEXT: vins.f16 s3, s8 121; CHECK-MVE-NEXT: bx lr 122; 123; CHECK-MVEFP-LABEL: foo_half_int16: 124; CHECK-MVEFP: @ %bb.0: @ %entry 125; CHECK-MVEFP-NEXT: vcvt.f16.s16 q0, q0 126; CHECK-MVEFP-NEXT: bx lr 127entry: 128 %out = sitofp <8 x i16> %src to <8 x half> 129 ret <8 x half> %out 130} 131 132define arm_aapcs_vfpcc <8 x half> @foo_half_uint16(<8 x i16> %src) { 133; CHECK-MVE-LABEL: foo_half_uint16: 134; CHECK-MVE: @ %bb.0: @ %entry 135; CHECK-MVE-NEXT: vmov q1, q0 136; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] 137; CHECK-MVE-NEXT: vmov s0, r0 138; CHECK-MVE-NEXT: vmov.u16 r0, q1[1] 139; CHECK-MVE-NEXT: vmov s2, r0 140; CHECK-MVE-NEXT: vcvt.f16.u32 s0, s0 141; CHECK-MVE-NEXT: vcvt.f16.u32 s2, s2 142; CHECK-MVE-NEXT: vmov.u16 r0, q1[3] 143; CHECK-MVE-NEXT: vins.f16 s0, s2 144; CHECK-MVE-NEXT: vmov s2, r0 145; CHECK-MVE-NEXT: vmov.u16 r0, q1[2] 146; CHECK-MVE-NEXT: vcvt.f16.u32 s2, s2 147; CHECK-MVE-NEXT: vmov s8, r0 148; CHECK-MVE-NEXT: vmov.u16 r0, q1[4] 149; CHECK-MVE-NEXT: vcvt.f16.u32 s1, s8 150; CHECK-MVE-NEXT: vins.f16 s1, s2 151; CHECK-MVE-NEXT: vmov s2, r0 152; CHECK-MVE-NEXT: vmov.u16 r0, q1[5] 153; CHECK-MVE-NEXT: vcvt.f16.u32 s2, s2 154; CHECK-MVE-NEXT: vmov s8, r0 155; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] 156; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 157; CHECK-MVE-NEXT: vins.f16 s2, s8 158; CHECK-MVE-NEXT: vmov s8, r0 159; CHECK-MVE-NEXT: vmov.u16 r0, q1[6] 160; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 161; CHECK-MVE-NEXT: vmov s4, r0 162; CHECK-MVE-NEXT: vcvt.f16.u32 s3, s4 163; CHECK-MVE-NEXT: vins.f16 s3, s8 164; CHECK-MVE-NEXT: bx lr 165; 166; CHECK-MVEFP-LABEL: foo_half_uint16: 167; CHECK-MVEFP: @ %bb.0: @ %entry 168; CHECK-MVEFP-NEXT: vcvt.f16.u16 q0, q0 169; CHECK-MVEFP-NEXT: bx lr 170entry: 171 %out = uitofp <8 x i16> %src to <8 x half> 172 ret <8 x half> %out 173} 174 175define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) { 176; CHECK-MVE-LABEL: foo_int16_half: 177; CHECK-MVE: @ %bb.0: @ %entry 178; CHECK-MVE-NEXT: vmovx.f16 s6, s2 179; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2 180; CHECK-MVE-NEXT: vmovx.f16 s2, s0 181; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 182; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s2 183; CHECK-MVE-NEXT: vmov r0, s0 184; CHECK-MVE-NEXT: vmovx.f16 s4, s3 185; CHECK-MVE-NEXT: vmovx.f16 s10, s1 186; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3 187; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1 188; CHECK-MVE-NEXT: vmov.16 q0[0], r0 189; CHECK-MVE-NEXT: vmov r0, s14 190; CHECK-MVE-NEXT: vmov.16 q0[1], r0 191; CHECK-MVE-NEXT: vmov r0, s5 192; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 193; CHECK-MVE-NEXT: vmov.16 q0[2], r0 194; CHECK-MVE-NEXT: vmov r0, s10 195; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 196; CHECK-MVE-NEXT: vmov.16 q0[3], r0 197; CHECK-MVE-NEXT: vmov r0, s12 198; CHECK-MVE-NEXT: vmov.16 q0[4], r0 199; CHECK-MVE-NEXT: vmov r0, s6 200; CHECK-MVE-NEXT: vmov.16 q0[5], r0 201; CHECK-MVE-NEXT: vmov r0, s8 202; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 203; CHECK-MVE-NEXT: vmov.16 q0[6], r0 204; CHECK-MVE-NEXT: vmov r0, s4 205; CHECK-MVE-NEXT: vmov.16 q0[7], r0 206; CHECK-MVE-NEXT: bx lr 207; 208; CHECK-MVEFP-LABEL: foo_int16_half: 209; CHECK-MVEFP: @ %bb.0: @ %entry 210; CHECK-MVEFP-NEXT: vcvt.s16.f16 q0, q0 211; CHECK-MVEFP-NEXT: bx lr 212entry: 213 %out = fptosi <8 x half> %src to <8 x i16> 214 ret <8 x i16> %out 215} 216 217define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) { 218; CHECK-MVE-LABEL: foo_uint16_half: 219; CHECK-MVE: @ %bb.0: @ %entry 220; CHECK-MVE-NEXT: vmovx.f16 s6, s2 221; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s2 222; CHECK-MVE-NEXT: vmovx.f16 s2, s0 223; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 224; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s2 225; CHECK-MVE-NEXT: vmov r0, s0 226; CHECK-MVE-NEXT: vmovx.f16 s4, s3 227; CHECK-MVE-NEXT: vmovx.f16 s10, s1 228; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s3 229; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s1 230; CHECK-MVE-NEXT: vmov.16 q0[0], r0 231; CHECK-MVE-NEXT: vmov r0, s14 232; CHECK-MVE-NEXT: vmov.16 q0[1], r0 233; CHECK-MVE-NEXT: vmov r0, s5 234; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 235; CHECK-MVE-NEXT: vmov.16 q0[2], r0 236; CHECK-MVE-NEXT: vmov r0, s10 237; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 238; CHECK-MVE-NEXT: vmov.16 q0[3], r0 239; CHECK-MVE-NEXT: vmov r0, s12 240; CHECK-MVE-NEXT: vmov.16 q0[4], r0 241; CHECK-MVE-NEXT: vmov r0, s6 242; CHECK-MVE-NEXT: vmov.16 q0[5], r0 243; CHECK-MVE-NEXT: vmov r0, s8 244; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 245; CHECK-MVE-NEXT: vmov.16 q0[6], r0 246; CHECK-MVE-NEXT: vmov r0, s4 247; CHECK-MVE-NEXT: vmov.16 q0[7], r0 248; CHECK-MVE-NEXT: bx lr 249; 250; CHECK-MVEFP-LABEL: foo_uint16_half: 251; CHECK-MVEFP: @ %bb.0: @ %entry 252; CHECK-MVEFP-NEXT: vcvt.u16.f16 q0, q0 253; CHECK-MVEFP-NEXT: bx lr 254entry: 255 %out = fptoui <8 x half> %src to <8 x i16> 256 ret <8 x i16> %out 257} 258 259define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) { 260; CHECK-LABEL: foo_float_int64: 261; CHECK: @ %bb.0: @ %entry 262; CHECK-NEXT: .save {r7, lr} 263; CHECK-NEXT: push {r7, lr} 264; CHECK-NEXT: .vsave {d8, d9} 265; CHECK-NEXT: vpush {d8, d9} 266; CHECK-NEXT: vmov q4, q0 267; CHECK-NEXT: vmov r0, r1, d9 268; CHECK-NEXT: bl __aeabi_l2d 269; CHECK-NEXT: vmov r2, r3, d8 270; CHECK-NEXT: vmov d9, r0, r1 271; CHECK-NEXT: mov r0, r2 272; CHECK-NEXT: mov r1, r3 273; CHECK-NEXT: bl __aeabi_l2d 274; CHECK-NEXT: vmov d8, r0, r1 275; CHECK-NEXT: vmov q0, q4 276; CHECK-NEXT: vpop {d8, d9} 277; CHECK-NEXT: pop {r7, pc} 278entry: 279 %out = sitofp <2 x i64> %src to <2 x double> 280 ret <2 x double> %out 281} 282 283define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) { 284; CHECK-LABEL: foo_float_uint64: 285; CHECK: @ %bb.0: @ %entry 286; CHECK-NEXT: .save {r7, lr} 287; CHECK-NEXT: push {r7, lr} 288; CHECK-NEXT: .vsave {d8, d9} 289; CHECK-NEXT: vpush {d8, d9} 290; CHECK-NEXT: vmov q4, q0 291; CHECK-NEXT: vmov r0, r1, d9 292; CHECK-NEXT: bl __aeabi_ul2d 293; CHECK-NEXT: vmov r2, r3, d8 294; CHECK-NEXT: vmov d9, r0, r1 295; CHECK-NEXT: mov r0, r2 296; CHECK-NEXT: mov r1, r3 297; CHECK-NEXT: bl __aeabi_ul2d 298; CHECK-NEXT: vmov d8, r0, r1 299; CHECK-NEXT: vmov q0, q4 300; CHECK-NEXT: vpop {d8, d9} 301; CHECK-NEXT: pop {r7, pc} 302entry: 303 %out = uitofp <2 x i64> %src to <2 x double> 304 ret <2 x double> %out 305} 306 307define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) { 308; CHECK-LABEL: foo_int64_float: 309; CHECK: @ %bb.0: @ %entry 310; CHECK-NEXT: .save {r4, r5, r7, lr} 311; CHECK-NEXT: push {r4, r5, r7, lr} 312; CHECK-NEXT: .vsave {d8, d9} 313; CHECK-NEXT: vpush {d8, d9} 314; CHECK-NEXT: vmov q4, q0 315; CHECK-NEXT: vmov r0, r1, d9 316; CHECK-NEXT: bl __aeabi_d2lz 317; CHECK-NEXT: mov r4, r0 318; CHECK-NEXT: mov r5, r1 319; CHECK-NEXT: vmov r0, r1, d8 320; CHECK-NEXT: bl __aeabi_d2lz 321; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 322; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 323; CHECK-NEXT: vpop {d8, d9} 324; CHECK-NEXT: pop {r4, r5, r7, pc} 325entry: 326 %out = fptosi <2 x double> %src to <2 x i64> 327 ret <2 x i64> %out 328} 329 330define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) { 331; CHECK-LABEL: foo_uint64_float: 332; CHECK: @ %bb.0: @ %entry 333; CHECK-NEXT: .save {r4, r5, r7, lr} 334; CHECK-NEXT: push {r4, r5, r7, lr} 335; CHECK-NEXT: .vsave {d8, d9} 336; CHECK-NEXT: vpush {d8, d9} 337; CHECK-NEXT: vmov q4, q0 338; CHECK-NEXT: vmov r0, r1, d9 339; CHECK-NEXT: bl __aeabi_d2ulz 340; CHECK-NEXT: mov r4, r0 341; CHECK-NEXT: mov r5, r1 342; CHECK-NEXT: vmov r0, r1, d8 343; CHECK-NEXT: bl __aeabi_d2ulz 344; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 345; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 346; CHECK-NEXT: vpop {d8, d9} 347; CHECK-NEXT: pop {r4, r5, r7, pc} 348entry: 349 %out = fptoui <2 x double> %src to <2 x i64> 350 ret <2 x i64> %out 351} 352 353define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc1(<4 x float> %src1, <4 x float> %src2) { 354; CHECK-MVE-LABEL: vmovn32_trunc1: 355; CHECK-MVE: @ %bb.0: @ %entry 356; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s0 357; CHECK-MVE-NEXT: vcvtb.f16.f32 s1, s1 358; CHECK-MVE-NEXT: vcvtb.f16.f32 s2, s2 359; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s3 360; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s4 361; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s5 362; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s6 363; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s7 364; CHECK-MVE-NEXT: bx lr 365; 366; CHECK-MVEFP-LABEL: vmovn32_trunc1: 367; CHECK-MVEFP: @ %bb.0: @ %entry 368; CHECK-MVEFP-NEXT: vcvtb.f16.f32 q0, q0 369; CHECK-MVEFP-NEXT: vcvtt.f16.f32 q0, q1 370; CHECK-MVEFP-NEXT: bx lr 371entry: 372 %strided.vec = shufflevector <4 x float> %src1, <4 x float> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 373 %out = fptrunc <8 x float> %strided.vec to <8 x half> 374 ret <8 x half> %out 375} 376 377define arm_aapcs_vfpcc <8 x half> @vmovn32_trunc2(<4 x float> %src1, <4 x float> %src2) { 378; CHECK-MVE-LABEL: vmovn32_trunc2: 379; CHECK-MVE: @ %bb.0: @ %entry 380; CHECK-MVE-NEXT: vmov q2, q0 381; CHECK-MVE-NEXT: vcvtb.f16.f32 s0, s4 382; CHECK-MVE-NEXT: vcvtb.f16.f32 s1, s5 383; CHECK-MVE-NEXT: vcvtb.f16.f32 s2, s6 384; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s7 385; CHECK-MVE-NEXT: vcvtt.f16.f32 s0, s8 386; CHECK-MVE-NEXT: vcvtt.f16.f32 s1, s9 387; CHECK-MVE-NEXT: vcvtt.f16.f32 s2, s10 388; CHECK-MVE-NEXT: vcvtt.f16.f32 s3, s11 389; CHECK-MVE-NEXT: bx lr 390; 391; CHECK-MVEFP-LABEL: vmovn32_trunc2: 392; CHECK-MVEFP: @ %bb.0: @ %entry 393; CHECK-MVEFP-NEXT: vcvtb.f16.f32 q1, q1 394; CHECK-MVEFP-NEXT: vcvtt.f16.f32 q1, q0 395; CHECK-MVEFP-NEXT: vmov q0, q1 396; CHECK-MVEFP-NEXT: bx lr 397entry: 398 %strided.vec = shufflevector <4 x float> %src1, <4 x float> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3> 399 %out = fptrunc <8 x float> %strided.vec to <8 x half> 400 ret <8 x half> %out 401} 402