1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECKHARD 3; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=soft < %s | FileCheck %s --check-prefix=CHECKSOFT 4 5define float @test_vget_lane_f16_1(<4 x half> %a) nounwind { 6; CHECKHARD-LABEL: test_vget_lane_f16_1: 7; CHECKHARD: @ %bb.0: @ %entry 8; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s0 9; CHECKHARD-NEXT: bx lr 10; 11; CHECKSOFT-LABEL: test_vget_lane_f16_1: 12; CHECKSOFT: @ %bb.0: @ %entry 13; CHECKSOFT-NEXT: vmov d0, r0, r1 14; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s0 15; CHECKSOFT-NEXT: vmov r0, s0 16; CHECKSOFT-NEXT: bx lr 17entry: 18 %elt = extractelement <4 x half> %a, i32 1 19 %conv = fpext half %elt to float 20 ret float %conv 21} 22 23define float @test_vget_lane_f16_2(<4 x half> %a) nounwind { 24; CHECKHARD-LABEL: test_vget_lane_f16_2: 25; CHECKHARD: @ %bb.0: @ %entry 26; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s1 27; CHECKHARD-NEXT: bx lr 28; 29; CHECKSOFT-LABEL: test_vget_lane_f16_2: 30; CHECKSOFT: @ %bb.0: @ %entry 31; CHECKSOFT-NEXT: vmov d0, r0, r1 32; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s1 33; CHECKSOFT-NEXT: vmov r0, s0 34; CHECKSOFT-NEXT: bx lr 35entry: 36 %elt = extractelement <4 x half> %a, i32 2 37 %conv = fpext half %elt to float 38 ret float %conv 39} 40 41define float @test_vget_laneq_f16_6(<8 x half> %a) nounwind { 42; CHECKHARD-LABEL: test_vget_laneq_f16_6: 43; CHECKHARD: @ %bb.0: @ %entry 44; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s3 45; CHECKHARD-NEXT: bx lr 46; 47; CHECKSOFT-LABEL: test_vget_laneq_f16_6: 48; CHECKSOFT: @ %bb.0: @ %entry 49; CHECKSOFT-NEXT: vmov d1, r2, r3 50; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s3 51; CHECKSOFT-NEXT: vmov r0, s0 52; CHECKSOFT-NEXT: bx lr 53entry: 54 %elt = extractelement <8 x half> %a, i32 6 55 %conv = fpext half %elt to float 56 ret float %conv 57} 58 59define float @test_vget_laneq_f16_7(<8 x half> %a) nounwind { 60; CHECKHARD-LABEL: test_vget_laneq_f16_7: 61; CHECKHARD: @ %bb.0: @ %entry 62; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s3 63; CHECKHARD-NEXT: bx lr 64; 65; CHECKSOFT-LABEL: test_vget_laneq_f16_7: 66; CHECKSOFT: @ %bb.0: @ %entry 67; CHECKSOFT-NEXT: vmov d1, r2, r3 68; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s3 69; CHECKSOFT-NEXT: vmov r0, s0 70; CHECKSOFT-NEXT: bx lr 71entry: 72 %elt = extractelement <8 x half> %a, i32 7 73 %conv = fpext half %elt to float 74 ret float %conv 75} 76 77define <4 x half> @insert_v4f16(half %a) { 78; CHECKHARD-LABEL: insert_v4f16: 79; CHECKHARD: @ %bb.0: @ %entry 80; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $d0 81; CHECKHARD-NEXT: bx lr 82; 83; CHECKSOFT-LABEL: insert_v4f16: 84; CHECKSOFT: @ %bb.0: @ %entry 85; CHECKSOFT-NEXT: vmov.f16 s0, r0 86; CHECKSOFT-NEXT: vmov r0, r1, d0 87; CHECKSOFT-NEXT: bx lr 88entry: 89 %res = insertelement <4 x half> undef, half %a, i32 0 90 ret <4 x half> %res 91} 92 93define <8 x half> @insert_v8f16(half %a) { 94; CHECKHARD-LABEL: insert_v8f16: 95; CHECKHARD: @ %bb.0: @ %entry 96; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $q0 97; CHECKHARD-NEXT: bx lr 98; 99; CHECKSOFT-LABEL: insert_v8f16: 100; CHECKSOFT: @ %bb.0: @ %entry 101; CHECKSOFT-NEXT: vmov.f16 s0, r0 102; CHECKSOFT-NEXT: vmov r2, r3, d1 103; CHECKSOFT-NEXT: vmov r0, r1, d0 104; CHECKSOFT-NEXT: bx lr 105entry: 106 %res = insertelement <8 x half> undef, half %a, i32 0 107 ret <8 x half> %res 108} 109 110define <4 x half> @test_vset_lane_f16(<4 x half> %a, float %fb) nounwind { 111; CHECKHARD-LABEL: test_vset_lane_f16: 112; CHECKHARD: @ %bb.0: @ %entry 113; CHECKHARD-NEXT: vcvtt.f16.f32 s1, s2 114; CHECKHARD-NEXT: bx lr 115; 116; CHECKSOFT-LABEL: test_vset_lane_f16: 117; CHECKSOFT: @ %bb.0: @ %entry 118; CHECKSOFT-NEXT: vmov d0, r0, r1 119; CHECKSOFT-NEXT: vmov s2, r2 120; CHECKSOFT-NEXT: vcvtt.f16.f32 s1, s2 121; CHECKSOFT-NEXT: vmov r0, r1, d0 122; CHECKSOFT-NEXT: bx lr 123entry: 124 %b = fptrunc float %fb to half 125 %x = insertelement <4 x half> %a, half %b, i32 3 126 ret <4 x half> %x 127} 128 129define <8 x half> @test_vset_laneq_f16_1(<8 x half> %a, float %fb) nounwind { 130; CHECKHARD-LABEL: test_vset_laneq_f16_1: 131; CHECKHARD: @ %bb.0: @ %entry 132; CHECKHARD-NEXT: vcvtt.f16.f32 s0, s4 133; CHECKHARD-NEXT: bx lr 134; 135; CHECKSOFT-LABEL: test_vset_laneq_f16_1: 136; CHECKSOFT: @ %bb.0: @ %entry 137; CHECKSOFT-NEXT: vmov d1, r2, r3 138; CHECKSOFT-NEXT: vldr s4, [sp] 139; CHECKSOFT-NEXT: vmov d0, r0, r1 140; CHECKSOFT-NEXT: vcvtt.f16.f32 s0, s4 141; CHECKSOFT-NEXT: vmov r2, r3, d1 142; CHECKSOFT-NEXT: vmov r0, r1, d0 143; CHECKSOFT-NEXT: bx lr 144entry: 145 %b = fptrunc float %fb to half 146 %x = insertelement <8 x half> %a, half %b, i32 1 147 ret <8 x half> %x 148} 149 150define <8 x half> @test_vset_laneq_f16_7(<8 x half> %a, float %fb) nounwind { 151; CHECKHARD-LABEL: test_vset_laneq_f16_7: 152; CHECKHARD: @ %bb.0: @ %entry 153; CHECKHARD-NEXT: vcvtt.f16.f32 s3, s4 154; CHECKHARD-NEXT: bx lr 155; 156; CHECKSOFT-LABEL: test_vset_laneq_f16_7: 157; CHECKSOFT: @ %bb.0: @ %entry 158; CHECKSOFT-NEXT: vmov d1, r2, r3 159; CHECKSOFT-NEXT: vldr s4, [sp] 160; CHECKSOFT-NEXT: vmov d0, r0, r1 161; CHECKSOFT-NEXT: vcvtt.f16.f32 s3, s4 162; CHECKSOFT-NEXT: vmov r0, r1, d0 163; CHECKSOFT-NEXT: vmov r2, r3, d1 164; CHECKSOFT-NEXT: bx lr 165entry: 166 %b = fptrunc float %fb to half 167 %x = insertelement <8 x half> %a, half %b, i32 7 168 ret <8 x half> %x 169} 170 171define arm_aapcs_vfpcc <8 x half> @shuffle3step_f16(<32 x half> %src) { 172; CHECKHARD-LABEL: shuffle3step_f16: 173; CHECKHARD: @ %bb.0: @ %entry 174; CHECKHARD-NEXT: vmov r1, s0 175; CHECKHARD-NEXT: vmovx.f16 s12, s1 176; CHECKHARD-NEXT: vmov r0, s12 177; CHECKHARD-NEXT: vrev32.16 d16, d3 178; CHECKHARD-NEXT: vext.16 d17, d4, d5, #2 179; CHECKHARD-NEXT: vmovx.f16 s12, s4 180; CHECKHARD-NEXT: vext.16 d16, d16, d3, #1 181; CHECKHARD-NEXT: vext.16 d16, d17, d16, #2 182; CHECKHARD-NEXT: vext.16 d16, d16, d17, #1 183; CHECKHARD-NEXT: vext.16 d17, d16, d16, #1 184; CHECKHARD-NEXT: vmov.16 d16[0], r1 185; CHECKHARD-NEXT: vmov.16 d16[1], r0 186; CHECKHARD-NEXT: vmov r0, s3 187; CHECKHARD-NEXT: vmov.16 d16[2], r0 188; CHECKHARD-NEXT: vmov r0, s12 189; CHECKHARD-NEXT: vmovx.f16 s12, s0 190; CHECKHARD-NEXT: vmov r1, s12 191; CHECKHARD-NEXT: vmovx.f16 s12, s3 192; CHECKHARD-NEXT: vmov.16 d16[3], r0 193; CHECKHARD-NEXT: vmov r0, s2 194; CHECKHARD-NEXT: vmov.16 d18[0], r1 195; CHECKHARD-NEXT: vmov.16 d18[1], r0 196; CHECKHARD-NEXT: vmov r0, s12 197; CHECKHARD-NEXT: vdup.16 q3, d3[1] 198; CHECKHARD-NEXT: vmov r1, s12 199; CHECKHARD-NEXT: vmovx.f16 s12, s9 200; CHECKHARD-NEXT: vmov.16 d18[2], r0 201; CHECKHARD-NEXT: vmov r0, s5 202; CHECKHARD-NEXT: vmov.16 d18[3], r0 203; CHECKHARD-NEXT: vmov r0, s8 204; CHECKHARD-NEXT: vmov.16 d19[0], r1 205; CHECKHARD-NEXT: vmov.16 d19[1], r0 206; CHECKHARD-NEXT: vmov r0, s12 207; CHECKHARD-NEXT: vmov.16 d19[2], r0 208; CHECKHARD-NEXT: vmov r0, s11 209; CHECKHARD-NEXT: vmov.16 d19[3], r0 210; CHECKHARD-NEXT: vadd.f16 q8, q8, q9 211; CHECKHARD-NEXT: vext.16 d18, d0, d1, #2 212; CHECKHARD-NEXT: vmovx.f16 s0, s8 213; CHECKHARD-NEXT: vmov r0, s0 214; CHECKHARD-NEXT: vdup.16 q0, d3[2] 215; CHECKHARD-NEXT: vext.16 d19, d18, d2, #3 216; CHECKHARD-NEXT: vmov r1, s0 217; CHECKHARD-NEXT: vext.16 d18, d2, d18, #1 218; CHECKHARD-NEXT: vmovx.f16 s0, s11 219; CHECKHARD-NEXT: vext.16 d18, d18, d19, #2 220; CHECKHARD-NEXT: vext.16 d18, d18, d18, #1 221; CHECKHARD-NEXT: vmov.16 d19[0], r1 222; CHECKHARD-NEXT: vmov.16 d19[1], r0 223; CHECKHARD-NEXT: vmov r0, s10 224; CHECKHARD-NEXT: vmov.16 d19[2], r0 225; CHECKHARD-NEXT: vmov r0, s0 226; CHECKHARD-NEXT: vmov.16 d19[3], r0 227; CHECKHARD-NEXT: vadd.f16 q0, q8, q9 228; CHECKHARD-NEXT: bx lr 229; 230; CHECKSOFT-LABEL: shuffle3step_f16: 231; CHECKSOFT: @ %bb.0: @ %entry 232; CHECKSOFT-NEXT: vmov r1, s0 233; CHECKSOFT-NEXT: vmovx.f16 s12, s1 234; CHECKSOFT-NEXT: vmov r0, s12 235; CHECKSOFT-NEXT: vrev32.16 d16, d3 236; CHECKSOFT-NEXT: vext.16 d17, d4, d5, #2 237; CHECKSOFT-NEXT: vmovx.f16 s12, s4 238; CHECKSOFT-NEXT: vext.16 d16, d16, d3, #1 239; CHECKSOFT-NEXT: vext.16 d16, d17, d16, #2 240; CHECKSOFT-NEXT: vext.16 d16, d16, d17, #1 241; CHECKSOFT-NEXT: vext.16 d17, d16, d16, #1 242; CHECKSOFT-NEXT: vmov.16 d16[0], r1 243; CHECKSOFT-NEXT: vmov.16 d16[1], r0 244; CHECKSOFT-NEXT: vmov r0, s3 245; CHECKSOFT-NEXT: vmov.16 d16[2], r0 246; CHECKSOFT-NEXT: vmov r0, s12 247; CHECKSOFT-NEXT: vmovx.f16 s12, s0 248; CHECKSOFT-NEXT: vmov r1, s12 249; CHECKSOFT-NEXT: vmovx.f16 s12, s3 250; CHECKSOFT-NEXT: vmov.16 d16[3], r0 251; CHECKSOFT-NEXT: vmov r0, s2 252; CHECKSOFT-NEXT: vmov.16 d18[0], r1 253; CHECKSOFT-NEXT: vmov.16 d18[1], r0 254; CHECKSOFT-NEXT: vmov r0, s12 255; CHECKSOFT-NEXT: vdup.16 q3, d3[1] 256; CHECKSOFT-NEXT: vmov r1, s12 257; CHECKSOFT-NEXT: vmovx.f16 s12, s9 258; CHECKSOFT-NEXT: vmov.16 d18[2], r0 259; CHECKSOFT-NEXT: vmov r0, s5 260; CHECKSOFT-NEXT: vmov.16 d18[3], r0 261; CHECKSOFT-NEXT: vmov r0, s8 262; CHECKSOFT-NEXT: vmov.16 d19[0], r1 263; CHECKSOFT-NEXT: vmov.16 d19[1], r0 264; CHECKSOFT-NEXT: vmov r0, s12 265; CHECKSOFT-NEXT: vmov.16 d19[2], r0 266; CHECKSOFT-NEXT: vmov r0, s11 267; CHECKSOFT-NEXT: vmov.16 d19[3], r0 268; CHECKSOFT-NEXT: vadd.f16 q8, q8, q9 269; CHECKSOFT-NEXT: vext.16 d18, d0, d1, #2 270; CHECKSOFT-NEXT: vmovx.f16 s0, s8 271; CHECKSOFT-NEXT: vmov r0, s0 272; CHECKSOFT-NEXT: vdup.16 q0, d3[2] 273; CHECKSOFT-NEXT: vext.16 d19, d18, d2, #3 274; CHECKSOFT-NEXT: vmov r1, s0 275; CHECKSOFT-NEXT: vext.16 d18, d2, d18, #1 276; CHECKSOFT-NEXT: vmovx.f16 s0, s11 277; CHECKSOFT-NEXT: vext.16 d18, d18, d19, #2 278; CHECKSOFT-NEXT: vext.16 d18, d18, d18, #1 279; CHECKSOFT-NEXT: vmov.16 d19[0], r1 280; CHECKSOFT-NEXT: vmov.16 d19[1], r0 281; CHECKSOFT-NEXT: vmov r0, s10 282; CHECKSOFT-NEXT: vmov.16 d19[2], r0 283; CHECKSOFT-NEXT: vmov r0, s0 284; CHECKSOFT-NEXT: vmov.16 d19[3], r0 285; CHECKSOFT-NEXT: vadd.f16 q0, q8, q9 286; CHECKSOFT-NEXT: bx lr 287entry: 288 %s1 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 289 %s2 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 290 %s3 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 291 %a = fadd <8 x half> %s1, %s2 292 %r = fadd <8 x half> %a, %s3 293 ret <8 x half> %r 294} 295 296define i16 @extract_v4i16(<4 x half> %a) { 297; CHECKHARD-LABEL: extract_v4i16: 298; CHECKHARD: @ %bb.0: @ %entry 299; CHECKHARD-NEXT: vmov.u16 r0, d0[0] 300; CHECKHARD-NEXT: bx lr 301; 302; CHECKSOFT-LABEL: extract_v4i16: 303; CHECKSOFT: @ %bb.0: @ %entry 304; CHECKSOFT-NEXT: vmov d16, r0, r1 305; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] 306; CHECKSOFT-NEXT: bx lr 307entry: 308 %elt = extractelement <4 x half> %a, i32 0 309 %t = bitcast half %elt to i16 310 ret i16 %t 311} 312 313define i16 @extract_v8i16(<8 x half> %a) { 314; CHECKHARD-LABEL: extract_v8i16: 315; CHECKHARD: @ %bb.0: @ %entry 316; CHECKHARD-NEXT: vmov.u16 r0, d0[0] 317; CHECKHARD-NEXT: bx lr 318; 319; CHECKSOFT-LABEL: extract_v8i16: 320; CHECKSOFT: @ %bb.0: @ %entry 321; CHECKSOFT-NEXT: vmov d16, r0, r1 322; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] 323; CHECKSOFT-NEXT: bx lr 324entry: 325 %elt = extractelement <8 x half> %a, i32 0 326 %t = bitcast half %elt to i16 327 ret i16 %t 328} 329 330define i32 @extract_v4s32(<4 x half> %a) { 331; CHECKHARD-LABEL: extract_v4s32: 332; CHECKHARD: @ %bb.0: @ %entry 333; CHECKHARD-NEXT: vmov.u16 r0, d0[0] 334; CHECKHARD-NEXT: sxth r0, r0 335; CHECKHARD-NEXT: bx lr 336; 337; CHECKSOFT-LABEL: extract_v4s32: 338; CHECKSOFT: @ %bb.0: @ %entry 339; CHECKSOFT-NEXT: vmov d16, r0, r1 340; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] 341; CHECKSOFT-NEXT: sxth r0, r0 342; CHECKSOFT-NEXT: bx lr 343entry: 344 %elt = extractelement <4 x half> %a, i32 0 345 %t = bitcast half %elt to i16 346 %s = sext i16 %t to i32 347 ret i32 %s 348} 349 350define i32 @extract_v8s32(<8 x half> %a) { 351; CHECKHARD-LABEL: extract_v8s32: 352; CHECKHARD: @ %bb.0: @ %entry 353; CHECKHARD-NEXT: vmov.u16 r0, d0[0] 354; CHECKHARD-NEXT: sxth r0, r0 355; CHECKHARD-NEXT: bx lr 356; 357; CHECKSOFT-LABEL: extract_v8s32: 358; CHECKSOFT: @ %bb.0: @ %entry 359; CHECKSOFT-NEXT: vmov d16, r0, r1 360; CHECKSOFT-NEXT: vmov.u16 r0, d16[0] 361; CHECKSOFT-NEXT: sxth r0, r0 362; CHECKSOFT-NEXT: bx lr 363entry: 364 %elt = extractelement <8 x half> %a, i32 0 365 %t = bitcast half %elt to i16 366 %s = sext i16 %t to i32 367 ret i32 %s 368} 369