1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECKHARD 3; RUN: llc -mtriple=arm-eabi -mattr=+armv8.2-a,+fullfp16,+neon -float-abi=soft < %s | FileCheck %s --check-prefix=CHECKSOFT 4 5define float @test_vget_lane_f16_1(<4 x half> %a) nounwind { 6; CHECKHARD-LABEL: test_vget_lane_f16_1: 7; CHECKHARD: @ %bb.0: @ %entry 8; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s0 9; CHECKHARD-NEXT: bx lr 10; 11; CHECKSOFT-LABEL: test_vget_lane_f16_1: 12; CHECKSOFT: @ %bb.0: @ %entry 13; CHECKSOFT-NEXT: vmov d0, r0, r1 14; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s0 15; CHECKSOFT-NEXT: vmov r0, s0 16; CHECKSOFT-NEXT: bx lr 17entry: 18 %elt = extractelement <4 x half> %a, i32 1 19 %conv = fpext half %elt to float 20 ret float %conv 21} 22 23define float @test_vget_lane_f16_2(<4 x half> %a) nounwind { 24; CHECKHARD-LABEL: test_vget_lane_f16_2: 25; CHECKHARD: @ %bb.0: @ %entry 26; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s1 27; CHECKHARD-NEXT: bx lr 28; 29; CHECKSOFT-LABEL: test_vget_lane_f16_2: 30; CHECKSOFT: @ %bb.0: @ %entry 31; CHECKSOFT-NEXT: vmov d0, r0, r1 32; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s1 33; CHECKSOFT-NEXT: vmov r0, s0 34; CHECKSOFT-NEXT: bx lr 35entry: 36 %elt = extractelement <4 x half> %a, i32 2 37 %conv = fpext half %elt to float 38 ret float %conv 39} 40 41define float @test_vget_laneq_f16_6(<8 x half> %a) nounwind { 42; CHECKHARD-LABEL: test_vget_laneq_f16_6: 43; CHECKHARD: @ %bb.0: @ %entry 44; CHECKHARD-NEXT: vcvtb.f32.f16 s0, s3 45; CHECKHARD-NEXT: bx lr 46; 47; CHECKSOFT-LABEL: test_vget_laneq_f16_6: 48; CHECKSOFT: @ %bb.0: @ %entry 49; CHECKSOFT-NEXT: vmov d1, r2, r3 50; CHECKSOFT-NEXT: vcvtb.f32.f16 s0, s3 51; CHECKSOFT-NEXT: vmov r0, s0 52; CHECKSOFT-NEXT: bx lr 53entry: 54 %elt = extractelement <8 x half> %a, i32 6 55 %conv = fpext half %elt to float 56 ret float %conv 57} 58 59define float @test_vget_laneq_f16_7(<8 x half> %a) nounwind { 60; CHECKHARD-LABEL: test_vget_laneq_f16_7: 61; CHECKHARD: @ %bb.0: @ %entry 62; CHECKHARD-NEXT: vcvtt.f32.f16 s0, s3 63; CHECKHARD-NEXT: bx lr 64; 65; CHECKSOFT-LABEL: test_vget_laneq_f16_7: 66; CHECKSOFT: @ %bb.0: @ %entry 67; CHECKSOFT-NEXT: vmov d1, r2, r3 68; CHECKSOFT-NEXT: vcvtt.f32.f16 s0, s3 69; CHECKSOFT-NEXT: vmov r0, s0 70; CHECKSOFT-NEXT: bx lr 71entry: 72 %elt = extractelement <8 x half> %a, i32 7 73 %conv = fpext half %elt to float 74 ret float %conv 75} 76 77define <4 x half> @insert_v4f16(half %a) { 78; CHECKHARD-LABEL: insert_v4f16: 79; CHECKHARD: @ %bb.0: @ %entry 80; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $d0 81; CHECKHARD-NEXT: bx lr 82; 83; CHECKSOFT-LABEL: insert_v4f16: 84; CHECKSOFT: @ %bb.0: @ %entry 85; CHECKSOFT-NEXT: vmov.f16 s0, r0 86; CHECKSOFT-NEXT: vmov r0, r1, d0 87; CHECKSOFT-NEXT: bx lr 88entry: 89 %res = insertelement <4 x half> undef, half %a, i32 0 90 ret <4 x half> %res 91} 92 93define <8 x half> @insert_v8f16(half %a) { 94; CHECKHARD-LABEL: insert_v8f16: 95; CHECKHARD: @ %bb.0: @ %entry 96; CHECKHARD-NEXT: @ kill: def $s0 killed $s0 def $q0 97; CHECKHARD-NEXT: bx lr 98; 99; CHECKSOFT-LABEL: insert_v8f16: 100; CHECKSOFT: @ %bb.0: @ %entry 101; CHECKSOFT-NEXT: vmov.f16 s0, r0 102; CHECKSOFT-NEXT: vmov r2, r3, d1 103; CHECKSOFT-NEXT: vmov r0, r1, d0 104; CHECKSOFT-NEXT: bx lr 105entry: 106 %res = insertelement <8 x half> undef, half %a, i32 0 107 ret <8 x half> %res 108} 109 110define <4 x half> @test_vset_lane_f16(<4 x half> %a, float %fb) nounwind { 111; CHECKHARD-LABEL: test_vset_lane_f16: 112; CHECKHARD: @ %bb.0: @ %entry 113; CHECKHARD-NEXT: vcvtt.f16.f32 s1, s2 114; CHECKHARD-NEXT: bx lr 115; 116; CHECKSOFT-LABEL: test_vset_lane_f16: 117; CHECKSOFT: @ %bb.0: @ %entry 118; CHECKSOFT-NEXT: vmov d0, r0, r1 119; CHECKSOFT-NEXT: vmov s2, r2 120; CHECKSOFT-NEXT: vcvtt.f16.f32 s1, s2 121; CHECKSOFT-NEXT: vmov r0, r1, d0 122; CHECKSOFT-NEXT: bx lr 123entry: 124 %b = fptrunc float %fb to half 125 %x = insertelement <4 x half> %a, half %b, i32 3 126 ret <4 x half> %x 127} 128 129define <8 x half> @test_vset_laneq_f16_1(<8 x half> %a, float %fb) nounwind { 130; CHECKHARD-LABEL: test_vset_laneq_f16_1: 131; CHECKHARD: @ %bb.0: @ %entry 132; CHECKHARD-NEXT: vcvtt.f16.f32 s0, s4 133; CHECKHARD-NEXT: bx lr 134; 135; CHECKSOFT-LABEL: test_vset_laneq_f16_1: 136; CHECKSOFT: @ %bb.0: @ %entry 137; CHECKSOFT-NEXT: vmov d1, r2, r3 138; CHECKSOFT-NEXT: vldr s4, [sp] 139; CHECKSOFT-NEXT: vmov d0, r0, r1 140; CHECKSOFT-NEXT: vcvtt.f16.f32 s0, s4 141; CHECKSOFT-NEXT: vmov r2, r3, d1 142; CHECKSOFT-NEXT: vmov r0, r1, d0 143; CHECKSOFT-NEXT: bx lr 144entry: 145 %b = fptrunc float %fb to half 146 %x = insertelement <8 x half> %a, half %b, i32 1 147 ret <8 x half> %x 148} 149 150define <8 x half> @test_vset_laneq_f16_7(<8 x half> %a, float %fb) nounwind { 151; CHECKHARD-LABEL: test_vset_laneq_f16_7: 152; CHECKHARD: @ %bb.0: @ %entry 153; CHECKHARD-NEXT: vcvtt.f16.f32 s3, s4 154; CHECKHARD-NEXT: bx lr 155; 156; CHECKSOFT-LABEL: test_vset_laneq_f16_7: 157; CHECKSOFT: @ %bb.0: @ %entry 158; CHECKSOFT-NEXT: vmov d1, r2, r3 159; CHECKSOFT-NEXT: vldr s4, [sp] 160; CHECKSOFT-NEXT: vmov d0, r0, r1 161; CHECKSOFT-NEXT: vcvtt.f16.f32 s3, s4 162; CHECKSOFT-NEXT: vmov r0, r1, d0 163; CHECKSOFT-NEXT: vmov r2, r3, d1 164; CHECKSOFT-NEXT: bx lr 165entry: 166 %b = fptrunc float %fb to half 167 %x = insertelement <8 x half> %a, half %b, i32 7 168 ret <8 x half> %x 169} 170 171define arm_aapcs_vfpcc <8 x half> @shuffle3step_f16(<32 x half> %src) { 172; CHECKHARD-LABEL: shuffle3step_f16: 173; CHECKHARD: @ %bb.0: @ %entry 174; CHECKHARD-NEXT: vmov r1, s0 175; CHECKHARD-NEXT: vmovx.f16 s12, s1 176; CHECKHARD-NEXT: vmov r0, s12 177; CHECKHARD-NEXT: vext.16 d16, d4, d5, #2 178; CHECKHARD-NEXT: vmovx.f16 s12, s4 179; CHECKHARD-NEXT: vdup.16 q11, d3[1] 180; CHECKHARD-NEXT: vrev32.16 d17, d16 181; CHECKHARD-NEXT: vext.16 d16, d16, d17, #3 182; CHECKHARD-NEXT: vrev32.16 d17, d3 183; CHECKHARD-NEXT: vext.16 d17, d17, d3, #1 184; CHECKHARD-NEXT: vext.16 d16, d16, d17, #2 185; CHECKHARD-NEXT: vext.16 d17, d16, d16, #2 186; CHECKHARD-NEXT: vmov.16 d16[0], r1 187; CHECKHARD-NEXT: vmov.16 d16[1], r0 188; CHECKHARD-NEXT: vmov r0, s3 189; CHECKHARD-NEXT: vmov.16 d16[2], r0 190; CHECKHARD-NEXT: vmov r0, s12 191; CHECKHARD-NEXT: vmovx.f16 s12, s0 192; CHECKHARD-NEXT: vmov r1, s12 193; CHECKHARD-NEXT: vmovx.f16 s12, s3 194; CHECKHARD-NEXT: vmov.16 d16[3], r0 195; CHECKHARD-NEXT: vmov r0, s2 196; CHECKHARD-NEXT: vmov.16 d18[0], r1 197; CHECKHARD-NEXT: vmov r1, s8 198; CHECKHARD-NEXT: vmov.16 d18[1], r0 199; CHECKHARD-NEXT: vmov r0, s12 200; CHECKHARD-NEXT: vmovx.f16 s12, s9 201; CHECKHARD-NEXT: vmov.16 d20[1], r1 202; CHECKHARD-NEXT: vmov.16 d18[2], r0 203; CHECKHARD-NEXT: vmov r0, s5 204; CHECKHARD-NEXT: vmov.16 d18[3], r0 205; CHECKHARD-NEXT: vmov r0, s12 206; CHECKHARD-NEXT: vmov.16 d20[2], r0 207; CHECKHARD-NEXT: vmov r0, s11 208; CHECKHARD-NEXT: vmov.16 d20[3], r0 209; CHECKHARD-NEXT: vmov r0, s10 210; CHECKHARD-NEXT: vext.16 d20, d20, d22, #1 211; CHECKHARD-NEXT: vdup.16 q11, d3[2] 212; CHECKHARD-NEXT: vext.16 d19, d20, d20, #3 213; CHECKHARD-NEXT: vadd.f16 q8, q8, q9 214; CHECKHARD-NEXT: vext.16 d18, d0, d1, #2 215; CHECKHARD-NEXT: vmovx.f16 s0, s8 216; CHECKHARD-NEXT: vmov r1, s0 217; CHECKHARD-NEXT: vmovx.f16 s0, s11 218; CHECKHARD-NEXT: vext.16 d19, d18, d2, #3 219; CHECKHARD-NEXT: vext.16 d18, d2, d18, #1 220; CHECKHARD-NEXT: vext.16 d18, d18, d19, #2 221; CHECKHARD-NEXT: vext.16 d18, d18, d18, #1 222; CHECKHARD-NEXT: vmov.16 d20[1], r1 223; CHECKHARD-NEXT: vmov.16 d20[2], r0 224; CHECKHARD-NEXT: vmov r0, s0 225; CHECKHARD-NEXT: vmov.16 d20[3], r0 226; CHECKHARD-NEXT: vext.16 d20, d20, d22, #1 227; CHECKHARD-NEXT: vext.16 d19, d20, d20, #3 228; CHECKHARD-NEXT: vadd.f16 q0, q8, q9 229; CHECKHARD-NEXT: bx lr 230; 231; CHECKSOFT-LABEL: shuffle3step_f16: 232; CHECKSOFT: @ %bb.0: @ %entry 233; CHECKSOFT-NEXT: vmov r1, s0 234; CHECKSOFT-NEXT: vmovx.f16 s12, s1 235; CHECKSOFT-NEXT: vmov r0, s12 236; CHECKSOFT-NEXT: vext.16 d16, d4, d5, #2 237; CHECKSOFT-NEXT: vmovx.f16 s12, s4 238; CHECKSOFT-NEXT: vdup.16 q11, d3[1] 239; CHECKSOFT-NEXT: vrev32.16 d17, d16 240; CHECKSOFT-NEXT: vext.16 d16, d16, d17, #3 241; CHECKSOFT-NEXT: vrev32.16 d17, d3 242; CHECKSOFT-NEXT: vext.16 d17, d17, d3, #1 243; CHECKSOFT-NEXT: vext.16 d16, d16, d17, #2 244; CHECKSOFT-NEXT: vext.16 d17, d16, d16, #2 245; CHECKSOFT-NEXT: vmov.16 d16[0], r1 246; CHECKSOFT-NEXT: vmov.16 d16[1], r0 247; CHECKSOFT-NEXT: vmov r0, s3 248; CHECKSOFT-NEXT: vmov.16 d16[2], r0 249; CHECKSOFT-NEXT: vmov r0, s12 250; CHECKSOFT-NEXT: vmovx.f16 s12, s0 251; CHECKSOFT-NEXT: vmov r1, s12 252; CHECKSOFT-NEXT: vmovx.f16 s12, s3 253; CHECKSOFT-NEXT: vmov.16 d16[3], r0 254; CHECKSOFT-NEXT: vmov r0, s2 255; CHECKSOFT-NEXT: vmov.16 d18[0], r1 256; CHECKSOFT-NEXT: vmov r1, s8 257; CHECKSOFT-NEXT: vmov.16 d18[1], r0 258; CHECKSOFT-NEXT: vmov r0, s12 259; CHECKSOFT-NEXT: vmovx.f16 s12, s9 260; CHECKSOFT-NEXT: vmov.16 d20[1], r1 261; CHECKSOFT-NEXT: vmov.16 d18[2], r0 262; CHECKSOFT-NEXT: vmov r0, s5 263; CHECKSOFT-NEXT: vmov.16 d18[3], r0 264; CHECKSOFT-NEXT: vmov r0, s12 265; CHECKSOFT-NEXT: vmov.16 d20[2], r0 266; CHECKSOFT-NEXT: vmov r0, s11 267; CHECKSOFT-NEXT: vmov.16 d20[3], r0 268; CHECKSOFT-NEXT: vmov r0, s10 269; CHECKSOFT-NEXT: vext.16 d20, d20, d22, #1 270; CHECKSOFT-NEXT: vdup.16 q11, d3[2] 271; CHECKSOFT-NEXT: vext.16 d19, d20, d20, #3 272; CHECKSOFT-NEXT: vadd.f16 q8, q8, q9 273; CHECKSOFT-NEXT: vext.16 d18, d0, d1, #2 274; CHECKSOFT-NEXT: vmovx.f16 s0, s8 275; CHECKSOFT-NEXT: vmov r1, s0 276; CHECKSOFT-NEXT: vmovx.f16 s0, s11 277; CHECKSOFT-NEXT: vext.16 d19, d18, d2, #3 278; CHECKSOFT-NEXT: vext.16 d18, d2, d18, #1 279; CHECKSOFT-NEXT: vext.16 d18, d18, d19, #2 280; CHECKSOFT-NEXT: vext.16 d18, d18, d18, #1 281; CHECKSOFT-NEXT: vmov.16 d20[1], r1 282; CHECKSOFT-NEXT: vmov.16 d20[2], r0 283; CHECKSOFT-NEXT: vmov r0, s0 284; CHECKSOFT-NEXT: vmov.16 d20[3], r0 285; CHECKSOFT-NEXT: vext.16 d20, d20, d22, #1 286; CHECKSOFT-NEXT: vext.16 d19, d20, d20, #3 287; CHECKSOFT-NEXT: vadd.f16 q0, q8, q9 288; CHECKSOFT-NEXT: bx lr 289entry: 290 %s1 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 291 %s2 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 292 %s3 = shufflevector <32 x half> %src, <32 x half> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 293 %a = fadd <8 x half> %s1, %s2 294 %r = fadd <8 x half> %a, %s3 295 ret <8 x half> %r 296} 297 298