1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +v8.2a -target-feature +neon -target-feature +fp16fml \ 3*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s 4*207e5cccSFangrui Song 5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 6*207e5cccSFangrui Song 7*207e5cccSFangrui Song // Test AArch64 Armv8.2-A FP16 Fused Multiply-Add Long intrinsics 8*207e5cccSFangrui Song 9*207e5cccSFangrui Song #include <arm_neon.h> 10*207e5cccSFangrui Song 11*207e5cccSFangrui Song // Vector form 12*207e5cccSFangrui Song 13*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_low_f16( 14*207e5cccSFangrui Song // CHECK-NEXT: entry: 15*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 16*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 17*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8> 18*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]]) 19*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLAL_LOW3_I]] 20*207e5cccSFangrui Song // 21*207e5cccSFangrui Song float32x2_t test_vfmlal_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 22*207e5cccSFangrui Song return vfmlal_low_f16(a, b, c); 23*207e5cccSFangrui Song } 24*207e5cccSFangrui Song 25*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_low_f16( 26*207e5cccSFangrui Song // CHECK-NEXT: entry: 27*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 28*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 29*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8> 30*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]]) 31*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLSL_LOW3_I]] 32*207e5cccSFangrui Song // 33*207e5cccSFangrui Song float32x2_t test_vfmlsl_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 34*207e5cccSFangrui Song return vfmlsl_low_f16(a, b, c); 35*207e5cccSFangrui Song } 36*207e5cccSFangrui Song 37*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_high_f16( 38*207e5cccSFangrui Song // CHECK-NEXT: entry: 39*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 40*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 41*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8> 42*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]]) 43*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLAL_HIGH3_I]] 44*207e5cccSFangrui Song // 45*207e5cccSFangrui Song float32x2_t test_vfmlal_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 46*207e5cccSFangrui Song return vfmlal_high_f16(a, b, c); 47*207e5cccSFangrui Song } 48*207e5cccSFangrui Song 49*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_high_f16( 50*207e5cccSFangrui Song // CHECK-NEXT: entry: 51*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 52*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 53*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C:%.*]] to <8 x i8> 54*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[C]]) 55*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLSL_HIGH3_I]] 56*207e5cccSFangrui Song // 57*207e5cccSFangrui Song float32x2_t test_vfmlsl_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 58*207e5cccSFangrui Song return vfmlsl_high_f16(a, b, c); 59*207e5cccSFangrui Song } 60*207e5cccSFangrui Song 61*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_low_f16( 62*207e5cccSFangrui Song // CHECK-NEXT: entry: 63*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 64*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 65*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8> 66*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]]) 67*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLAL_LOW3_I]] 68*207e5cccSFangrui Song // 69*207e5cccSFangrui Song float32x4_t test_vfmlalq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 70*207e5cccSFangrui Song return vfmlalq_low_f16(a, b, c); 71*207e5cccSFangrui Song } 72*207e5cccSFangrui Song 73*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_low_f16( 74*207e5cccSFangrui Song // CHECK-NEXT: entry: 75*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 76*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 77*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8> 78*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]]) 79*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLSL_LOW3_I]] 80*207e5cccSFangrui Song // 81*207e5cccSFangrui Song float32x4_t test_vfmlslq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 82*207e5cccSFangrui Song return vfmlslq_low_f16(a, b, c); 83*207e5cccSFangrui Song } 84*207e5cccSFangrui Song 85*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_high_f16( 86*207e5cccSFangrui Song // CHECK-NEXT: entry: 87*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 88*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 89*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8> 90*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]]) 91*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLAL_HIGH3_I]] 92*207e5cccSFangrui Song // 93*207e5cccSFangrui Song float32x4_t test_vfmlalq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 94*207e5cccSFangrui Song return vfmlalq_high_f16(a, b, c); 95*207e5cccSFangrui Song } 96*207e5cccSFangrui Song 97*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_high_f16( 98*207e5cccSFangrui Song // CHECK-NEXT: entry: 99*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 100*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 101*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C:%.*]] to <16 x i8> 102*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[C]]) 103*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLSL_HIGH3_I]] 104*207e5cccSFangrui Song // 105*207e5cccSFangrui Song float32x4_t test_vfmlslq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 106*207e5cccSFangrui Song return vfmlslq_high_f16(a, b, c); 107*207e5cccSFangrui Song } 108*207e5cccSFangrui Song 109*207e5cccSFangrui Song // Indexed form 110*207e5cccSFangrui Song 111*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_lane_low_f16( 112*207e5cccSFangrui Song // CHECK-NEXT: entry: 113*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 114*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 115*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 116*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 117*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 118*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 119*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 120*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 121*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 122*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 123*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0 124*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 125*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 126*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 127*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 128*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 129*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0 130*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 131*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 132*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 133*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 134*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 135*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0 136*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 137*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 138*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 139*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 140*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 141*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0 142*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 143*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 144*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 145*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 146*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 147*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 148*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 149*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLAL_LOW3_I]] 150*207e5cccSFangrui Song // 151*207e5cccSFangrui Song float32x2_t test_vfmlal_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 152*207e5cccSFangrui Song return vfmlal_lane_low_f16(a, b, c, 0); 153*207e5cccSFangrui Song } 154*207e5cccSFangrui Song 155*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_lane_high_f16( 156*207e5cccSFangrui Song // CHECK-NEXT: entry: 157*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 158*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 159*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 160*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 161*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 162*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 163*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 164*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 165*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 166*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 167*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1 168*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 169*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 170*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 171*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 172*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 173*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1 174*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 175*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 176*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 177*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 178*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 179*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1 180*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 181*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 182*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 183*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 184*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 185*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1 186*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 187*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 188*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 189*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 190*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 191*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 192*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 193*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLAL_HIGH3_I]] 194*207e5cccSFangrui Song // 195*207e5cccSFangrui Song float32x2_t test_vfmlal_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 196*207e5cccSFangrui Song return vfmlal_lane_high_f16(a, b, c, 1); 197*207e5cccSFangrui Song } 198*207e5cccSFangrui Song 199*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_lane_low_f16( 200*207e5cccSFangrui Song // CHECK-NEXT: entry: 201*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 202*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 203*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 204*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 205*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 206*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 207*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 208*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 209*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8 210*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2 211*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8 212*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2 213*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8 214*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2 215*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8 216*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2 217*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 218*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 219*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 220*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 221*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 222*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 223*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 224*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 225*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2 226*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 227*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 228*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 229*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 230*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 231*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 232*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 233*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 234*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 235*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 236*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 237*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2 238*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 239*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 240*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 241*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8 242*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8 243*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2 244*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2 245*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2 246*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 247*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8 248*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8 249*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2 250*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2 251*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2 252*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 253*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8 254*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8 255*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2 256*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2 257*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2 258*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 259*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8 260*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8 261*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2 262*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2 263*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2 264*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 265*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 266*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 267*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 268*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 269*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLAL_LOW3_I]] 270*207e5cccSFangrui Song // 271*207e5cccSFangrui Song float32x4_t test_vfmlalq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) { 272*207e5cccSFangrui Song return vfmlalq_lane_low_f16(a, b, c, 2); 273*207e5cccSFangrui Song } 274*207e5cccSFangrui Song 275*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_lane_high_f16( 276*207e5cccSFangrui Song // CHECK-NEXT: entry: 277*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 278*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 279*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 280*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 281*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 282*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 283*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 284*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 285*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8 286*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2 287*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8 288*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2 289*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8 290*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2 291*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8 292*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2 293*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 294*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 295*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3 296*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 297*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 298*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 299*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 300*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 301*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 302*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 303*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 304*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 305*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 306*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 307*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 308*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 309*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 310*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 311*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 312*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 313*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3 314*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 315*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 316*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 317*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8 318*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8 319*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3 320*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2 321*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2 322*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 323*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8 324*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8 325*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3 326*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2 327*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2 328*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 329*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8 330*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8 331*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3 332*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2 333*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2 334*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 335*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8 336*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8 337*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3 338*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2 339*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2 340*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 341*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 342*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 343*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 344*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 345*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLAL_HIGH3_I]] 346*207e5cccSFangrui Song // 347*207e5cccSFangrui Song float32x4_t test_vfmlalq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) { 348*207e5cccSFangrui Song return vfmlalq_lane_high_f16(a, b, c, 3); 349*207e5cccSFangrui Song } 350*207e5cccSFangrui Song 351*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_laneq_low_f16( 352*207e5cccSFangrui Song // CHECK-NEXT: entry: 353*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 354*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 355*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 356*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 357*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 358*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 359*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 360*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 361*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 362*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 363*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 4 364*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 365*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 366*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 367*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 368*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 369*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4 370*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 371*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 372*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 373*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 374*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 375*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4 376*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 377*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 378*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 379*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 380*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 381*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4 382*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 383*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 384*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 385*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 386*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 387*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 388*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 389*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLAL_LOW3_I]] 390*207e5cccSFangrui Song // 391*207e5cccSFangrui Song float32x2_t test_vfmlal_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) { 392*207e5cccSFangrui Song return vfmlal_laneq_low_f16(a, b, c, 4); 393*207e5cccSFangrui Song } 394*207e5cccSFangrui Song 395*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlal_laneq_high_f16( 396*207e5cccSFangrui Song // CHECK-NEXT: entry: 397*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 398*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 399*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 400*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 401*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 402*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 403*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 404*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 405*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 406*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 407*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 5 408*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 409*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 410*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 411*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 412*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 413*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5 414*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 415*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 416*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 417*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 418*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 419*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5 420*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 421*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 422*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 423*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 424*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 425*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5 426*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 427*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 428*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 429*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 430*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 431*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 432*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlal2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 433*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLAL_HIGH3_I]] 434*207e5cccSFangrui Song // 435*207e5cccSFangrui Song float32x2_t test_vfmlal_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) { 436*207e5cccSFangrui Song return vfmlal_laneq_high_f16(a, b, c, 5); 437*207e5cccSFangrui Song } 438*207e5cccSFangrui Song 439*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_laneq_low_f16( 440*207e5cccSFangrui Song // CHECK-NEXT: entry: 441*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 442*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 443*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 444*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 445*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 446*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 447*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 448*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 449*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16 450*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2 451*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16 452*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2 453*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16 454*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2 455*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16 456*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2 457*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 458*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 459*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 6 460*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 461*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 462*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 463*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 464*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 465*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6 466*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 467*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 468*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 469*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 470*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 471*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6 472*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 473*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 474*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 475*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 476*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 477*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6 478*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 479*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 480*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 481*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16 482*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16 483*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6 484*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2 485*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2 486*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 487*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16 488*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16 489*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6 490*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2 491*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2 492*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 493*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16 494*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16 495*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6 496*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2 497*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2 498*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 499*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16 500*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16 501*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6 502*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2 503*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2 504*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 505*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 506*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 507*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 508*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 509*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLAL_LOW3_I]] 510*207e5cccSFangrui Song // 511*207e5cccSFangrui Song float32x4_t test_vfmlalq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 512*207e5cccSFangrui Song return vfmlalq_laneq_low_f16(a, b, c, 6); 513*207e5cccSFangrui Song } 514*207e5cccSFangrui Song 515*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlalq_laneq_high_f16( 516*207e5cccSFangrui Song // CHECK-NEXT: entry: 517*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 518*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 519*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 520*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 521*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 522*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 523*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 524*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 525*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16 526*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2 527*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16 528*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2 529*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16 530*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2 531*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16 532*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2 533*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 534*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 535*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7 536*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 537*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 538*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 539*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 540*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 541*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 542*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 543*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 544*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 545*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 546*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 547*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7 548*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 549*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 550*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 551*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 552*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 553*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7 554*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 555*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 556*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 557*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16 558*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16 559*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7 560*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2 561*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2 562*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 563*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16 564*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16 565*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7 566*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2 567*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2 568*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 569*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16 570*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16 571*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7 572*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2 573*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2 574*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 575*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16 576*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16 577*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7 578*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2 579*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2 580*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 581*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 582*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 583*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 584*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLAL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 585*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLAL_HIGH3_I]] 586*207e5cccSFangrui Song // 587*207e5cccSFangrui Song float32x4_t test_vfmlalq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 588*207e5cccSFangrui Song return vfmlalq_laneq_high_f16(a, b, c, 7); 589*207e5cccSFangrui Song } 590*207e5cccSFangrui Song 591*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_lane_low_f16( 592*207e5cccSFangrui Song // CHECK-NEXT: entry: 593*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 594*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 595*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 596*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 597*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 598*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 599*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 600*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 601*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 602*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 603*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0 604*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 605*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 606*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 607*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 608*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 609*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0 610*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 611*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 612*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 613*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 614*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 615*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 0 616*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 617*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 618*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 619*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 620*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 621*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 0 622*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 623*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 624*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 625*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 626*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 627*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 628*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 629*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLSL_LOW3_I]] 630*207e5cccSFangrui Song // 631*207e5cccSFangrui Song float32x2_t test_vfmlsl_lane_low_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 632*207e5cccSFangrui Song return vfmlsl_lane_low_f16(a, b, c, 0); 633*207e5cccSFangrui Song } 634*207e5cccSFangrui Song 635*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_lane_high_f16( 636*207e5cccSFangrui Song // CHECK-NEXT: entry: 637*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 638*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 639*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 640*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 641*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 642*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 643*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 644*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 645*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 646*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 647*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1 648*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 649*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 650*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 651*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 652*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 653*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1 654*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 655*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 656*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 657*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 658*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 659*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 1 660*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 661*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 662*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 663*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 664*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 665*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 1 666*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 667*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 668*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 669*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 670*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 671*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 672*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 673*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLSL_HIGH3_I]] 674*207e5cccSFangrui Song // 675*207e5cccSFangrui Song float32x2_t test_vfmlsl_lane_high_f16(float32x2_t a, float16x4_t b, float16x4_t c) { 676*207e5cccSFangrui Song return vfmlsl_lane_high_f16(a, b, c, 1); 677*207e5cccSFangrui Song } 678*207e5cccSFangrui Song 679*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_lane_low_f16( 680*207e5cccSFangrui Song // CHECK-NEXT: entry: 681*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 682*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 683*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 684*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 685*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 686*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 687*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 688*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 689*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8 690*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2 691*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8 692*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2 693*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8 694*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2 695*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8 696*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2 697*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 698*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 699*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2 700*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 701*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 702*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 703*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 704*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 705*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 2 706*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 707*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 708*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 709*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 710*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 711*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 712*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 713*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 714*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 715*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 716*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 717*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 2 718*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 719*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 720*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 721*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8 722*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8 723*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 2 724*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2 725*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2 726*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 727*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8 728*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8 729*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 2 730*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2 731*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2 732*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 733*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8 734*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8 735*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 2 736*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2 737*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2 738*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 739*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8 740*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8 741*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 2 742*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2 743*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2 744*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 745*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 746*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 747*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 748*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 749*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLSL_LOW3_I]] 750*207e5cccSFangrui Song // 751*207e5cccSFangrui Song float32x4_t test_vfmlslq_lane_low_f16(float32x4_t a, float16x8_t b, float16x4_t c) { 752*207e5cccSFangrui Song return vfmlslq_lane_low_f16(a, b, c, 2); 753*207e5cccSFangrui Song } 754*207e5cccSFangrui Song 755*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_lane_high_f16( 756*207e5cccSFangrui Song // CHECK-NEXT: entry: 757*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_847:%.*]] = alloca <4 x half>, align 8 758*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_847:%.*]] = alloca i16, align 2 759*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8474:%.*]] = alloca <4 x half>, align 8 760*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8475:%.*]] = alloca i16, align 2 761*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84714:%.*]] = alloca <4 x half>, align 8 762*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84715:%.*]] = alloca i16, align 2 763*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84724:%.*]] = alloca <4 x half>, align 8 764*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84725:%.*]] = alloca i16, align 2 765*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84734:%.*]] = alloca <4 x half>, align 8 766*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84735:%.*]] = alloca i16, align 2 767*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84744:%.*]] = alloca <4 x half>, align 8 768*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84745:%.*]] = alloca i16, align 2 769*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84754:%.*]] = alloca <4 x half>, align 8 770*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84755:%.*]] = alloca i16, align 2 771*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_84764:%.*]] = alloca <4 x half>, align 8 772*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_84765:%.*]] = alloca i16, align 2 773*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C:%.*]], ptr [[__REINT_847]], align 8 774*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_847]], align 8 775*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3 776*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE]], ptr [[__REINT1_847]], align 2 777*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_847]], align 2 778*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 779*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_8474]], align 8 780*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[__REINT_8474]], align 8 781*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 782*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE8]], ptr [[__REINT1_8475]], align 2 783*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8475]], align 2 784*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 785*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84714]], align 8 786*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[__REINT_84714]], align 8 787*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE18:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 788*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE18]], ptr [[__REINT1_84715]], align 2 789*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_84715]], align 2 790*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 791*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84724]], align 8 792*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <4 x i16>, ptr [[__REINT_84724]], align 8 793*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE28:%.*]] = extractelement <4 x i16> [[TMP6]], i32 3 794*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE28]], ptr [[__REINT1_84725]], align 2 795*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_84725]], align 2 796*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 797*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84734]], align 8 798*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <4 x i16>, ptr [[__REINT_84734]], align 8 799*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE38:%.*]] = extractelement <4 x i16> [[TMP8]], i32 3 800*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE38]], ptr [[__REINT1_84735]], align 2 801*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_84735]], align 2 802*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 803*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84744]], align 8 804*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <4 x i16>, ptr [[__REINT_84744]], align 8 805*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE48:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3 806*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE48]], ptr [[__REINT1_84745]], align 2 807*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_84745]], align 2 808*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 809*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84754]], align 8 810*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <4 x i16>, ptr [[__REINT_84754]], align 8 811*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE58:%.*]] = extractelement <4 x i16> [[TMP12]], i32 3 812*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE58]], ptr [[__REINT1_84755]], align 2 813*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_84755]], align 2 814*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 815*207e5cccSFangrui Song // CHECK-NEXT: store <4 x half> [[C]], ptr [[__REINT_84764]], align 8 816*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <4 x i16>, ptr [[__REINT_84764]], align 8 817*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE68:%.*]] = extractelement <4 x i16> [[TMP14]], i32 3 818*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGET_LANE68]], ptr [[__REINT1_84765]], align 2 819*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_84765]], align 2 820*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 821*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 822*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 823*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 824*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 825*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLSL_HIGH3_I]] 826*207e5cccSFangrui Song // 827*207e5cccSFangrui Song float32x4_t test_vfmlslq_lane_high_f16(float32x4_t a, float16x8_t b, float16x4_t c) { 828*207e5cccSFangrui Song return vfmlslq_lane_high_f16(a, b, c, 3); 829*207e5cccSFangrui Song } 830*207e5cccSFangrui Song 831*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_laneq_low_f16( 832*207e5cccSFangrui Song // CHECK-NEXT: entry: 833*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 834*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 835*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 836*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 837*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 838*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 839*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 840*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 841*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 842*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 843*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 4 844*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 845*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 846*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 847*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 848*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 849*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 4 850*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 851*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 852*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 853*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 854*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 855*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 4 856*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 857*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 858*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 859*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 860*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 861*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 4 862*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 863*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 864*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 865*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 866*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 867*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 868*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 869*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLSL_LOW3_I]] 870*207e5cccSFangrui Song // 871*207e5cccSFangrui Song float32x2_t test_vfmlsl_laneq_low_f16(float32x2_t a, float16x4_t b, float16x8_t c) { 872*207e5cccSFangrui Song return vfmlsl_laneq_low_f16(a, b, c, 4); 873*207e5cccSFangrui Song } 874*207e5cccSFangrui Song 875*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlsl_laneq_high_f16( 876*207e5cccSFangrui Song // CHECK-NEXT: entry: 877*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 878*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 879*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 880*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 881*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 882*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 883*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 884*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 885*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 886*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 887*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 5 888*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 889*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 890*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[TMP1]], i32 0 891*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 892*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 893*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 5 894*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 895*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 896*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP3]], i32 1 897*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 898*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 899*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 5 900*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 901*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 902*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <4 x half> [[VECINIT11]], half [[TMP5]], i32 2 903*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 904*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 905*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 5 906*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 907*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 908*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <4 x half> [[VECINIT21]], half [[TMP7]], i32 3 909*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 910*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x half> [[B:%.*]] to <8 x i8> 911*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[VECINIT31]] to <8 x i8> 912*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmlsl2.v2f32.v4f16(<2 x float> [[A]], <4 x half> [[B]], <4 x half> [[VECINIT31]]) 913*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VFMLSL_HIGH3_I]] 914*207e5cccSFangrui Song // 915*207e5cccSFangrui Song float32x2_t test_vfmlsl_laneq_high_f16(float32x2_t a, float16x4_t b, float16x8_t c) { 916*207e5cccSFangrui Song return vfmlsl_laneq_high_f16(a, b, c, 5); 917*207e5cccSFangrui Song } 918*207e5cccSFangrui Song 919*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_laneq_low_f16( 920*207e5cccSFangrui Song // CHECK-NEXT: entry: 921*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 922*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 923*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 924*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 925*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 926*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 927*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 928*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 929*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16 930*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2 931*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16 932*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2 933*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16 934*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2 935*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16 936*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2 937*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 938*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 939*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 6 940*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 941*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 942*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 943*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 944*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 945*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 6 946*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 947*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 948*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 949*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 950*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 951*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 6 952*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 953*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 954*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 955*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 956*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 957*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 6 958*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 959*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 960*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 961*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16 962*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16 963*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 6 964*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2 965*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2 966*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 967*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16 968*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16 969*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 6 970*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2 971*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2 972*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 973*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16 974*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16 975*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 6 976*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2 977*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2 978*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 979*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16 980*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16 981*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 6 982*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2 983*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2 984*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 985*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 986*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 987*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 988*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_LOW3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 989*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLSL_LOW3_I]] 990*207e5cccSFangrui Song // 991*207e5cccSFangrui Song float32x4_t test_vfmlslq_laneq_low_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 992*207e5cccSFangrui Song return vfmlslq_laneq_low_f16(a, b, c, 6); 993*207e5cccSFangrui Song } 994*207e5cccSFangrui Song 995*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmlslq_laneq_high_f16( 996*207e5cccSFangrui Song // CHECK-NEXT: entry: 997*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_850:%.*]] = alloca <8 x half>, align 16 998*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_850:%.*]] = alloca i16, align 2 999*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_8504:%.*]] = alloca <8 x half>, align 16 1000*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_8505:%.*]] = alloca i16, align 2 1001*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85014:%.*]] = alloca <8 x half>, align 16 1002*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85015:%.*]] = alloca i16, align 2 1003*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85024:%.*]] = alloca <8 x half>, align 16 1004*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85025:%.*]] = alloca i16, align 2 1005*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85034:%.*]] = alloca <8 x half>, align 16 1006*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85035:%.*]] = alloca i16, align 2 1007*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85044:%.*]] = alloca <8 x half>, align 16 1008*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85045:%.*]] = alloca i16, align 2 1009*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85054:%.*]] = alloca <8 x half>, align 16 1010*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85055:%.*]] = alloca i16, align 2 1011*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT_85064:%.*]] = alloca <8 x half>, align 16 1012*207e5cccSFangrui Song // CHECK-NEXT: [[__REINT1_85065:%.*]] = alloca i16, align 2 1013*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C:%.*]], ptr [[__REINT_850]], align 16 1014*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[__REINT_850]], align 16 1015*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP0]], i32 7 1016*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE]], ptr [[__REINT1_850]], align 2 1017*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[__REINT1_850]], align 2 1018*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[TMP1]], i32 0 1019*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_8504]], align 16 1020*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[__REINT_8504]], align 16 1021*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 1022*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE8]], ptr [[__REINT1_8505]], align 2 1023*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[__REINT1_8505]], align 2 1024*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT11:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP3]], i32 1 1025*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85014]], align 16 1026*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[__REINT_85014]], align 16 1027*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE18:%.*]] = extractelement <8 x i16> [[TMP4]], i32 7 1028*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE18]], ptr [[__REINT1_85015]], align 2 1029*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[__REINT1_85015]], align 2 1030*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT21:%.*]] = insertelement <8 x half> [[VECINIT11]], half [[TMP5]], i32 2 1031*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85024]], align 16 1032*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[__REINT_85024]], align 16 1033*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE28:%.*]] = extractelement <8 x i16> [[TMP6]], i32 7 1034*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE28]], ptr [[__REINT1_85025]], align 2 1035*207e5cccSFangrui Song // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[__REINT1_85025]], align 2 1036*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT31:%.*]] = insertelement <8 x half> [[VECINIT21]], half [[TMP7]], i32 3 1037*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85034]], align 16 1038*207e5cccSFangrui Song // CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr [[__REINT_85034]], align 16 1039*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE38:%.*]] = extractelement <8 x i16> [[TMP8]], i32 7 1040*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE38]], ptr [[__REINT1_85035]], align 2 1041*207e5cccSFangrui Song // CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[__REINT1_85035]], align 2 1042*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT41:%.*]] = insertelement <8 x half> [[VECINIT31]], half [[TMP9]], i32 4 1043*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85044]], align 16 1044*207e5cccSFangrui Song // CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr [[__REINT_85044]], align 16 1045*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE48:%.*]] = extractelement <8 x i16> [[TMP10]], i32 7 1046*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE48]], ptr [[__REINT1_85045]], align 2 1047*207e5cccSFangrui Song // CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[__REINT1_85045]], align 2 1048*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT51:%.*]] = insertelement <8 x half> [[VECINIT41]], half [[TMP11]], i32 5 1049*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85054]], align 16 1050*207e5cccSFangrui Song // CHECK-NEXT: [[TMP12:%.*]] = load <8 x i16>, ptr [[__REINT_85054]], align 16 1051*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE58:%.*]] = extractelement <8 x i16> [[TMP12]], i32 7 1052*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE58]], ptr [[__REINT1_85055]], align 2 1053*207e5cccSFangrui Song // CHECK-NEXT: [[TMP13:%.*]] = load half, ptr [[__REINT1_85055]], align 2 1054*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT61:%.*]] = insertelement <8 x half> [[VECINIT51]], half [[TMP13]], i32 6 1055*207e5cccSFangrui Song // CHECK-NEXT: store <8 x half> [[C]], ptr [[__REINT_85064]], align 16 1056*207e5cccSFangrui Song // CHECK-NEXT: [[TMP14:%.*]] = load <8 x i16>, ptr [[__REINT_85064]], align 16 1057*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE68:%.*]] = extractelement <8 x i16> [[TMP14]], i32 7 1058*207e5cccSFangrui Song // CHECK-NEXT: store i16 [[VGETQ_LANE68]], ptr [[__REINT1_85065]], align 2 1059*207e5cccSFangrui Song // CHECK-NEXT: [[TMP15:%.*]] = load half, ptr [[__REINT1_85065]], align 2 1060*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT71:%.*]] = insertelement <8 x half> [[VECINIT61]], half [[TMP15]], i32 7 1061*207e5cccSFangrui Song // CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 1062*207e5cccSFangrui Song // CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x half> [[B:%.*]] to <16 x i8> 1063*207e5cccSFangrui Song // CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x half> [[VECINIT71]] to <16 x i8> 1064*207e5cccSFangrui Song // CHECK-NEXT: [[VFMLSL_HIGH3_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmlsl2.v4f32.v8f16(<4 x float> [[A]], <8 x half> [[B]], <8 x half> [[VECINIT71]]) 1065*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VFMLSL_HIGH3_I]] 1066*207e5cccSFangrui Song // 1067*207e5cccSFangrui Song float32x4_t test_vfmlslq_laneq_high_f16(float32x4_t a, float16x8_t b, float16x8_t c) { 1068*207e5cccSFangrui Song return vfmlslq_laneq_high_f16(a, b, c, 7); 1069*207e5cccSFangrui Song } 1070