1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ 3*207e5cccSFangrui Song // RUN: -target-feature +v8.1a -emit-llvm -disable-O0-optnone -o - %s | opt -passes=mem2reg,dce -S | FileCheck %s 4*207e5cccSFangrui Song 5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 6*207e5cccSFangrui Song 7*207e5cccSFangrui Song #include <arm_neon.h> 8*207e5cccSFangrui Song 9*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlah_laneq_s16( 10*207e5cccSFangrui Song // CHECK-NEXT: entry: 11*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 12*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 14*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) 15*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]] 16*207e5cccSFangrui Song // 17*207e5cccSFangrui Song int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 18*207e5cccSFangrui Song return vqrdmlah_laneq_s16(a, b, v, 7); 19*207e5cccSFangrui Song } 20*207e5cccSFangrui Song 21*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlah_laneq_s32( 22*207e5cccSFangrui Song // CHECK-NEXT: entry: 23*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 24*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 25*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 26*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) 27*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]] 28*207e5cccSFangrui Song // 29*207e5cccSFangrui Song int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 30*207e5cccSFangrui Song return vqrdmlah_laneq_s32(a, b, v, 3); 31*207e5cccSFangrui Song } 32*207e5cccSFangrui Song 33*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahq_laneq_s16( 34*207e5cccSFangrui Song // CHECK-NEXT: entry: 35*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 36*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 37*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 38*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) 39*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]] 40*207e5cccSFangrui Song // 41*207e5cccSFangrui Song int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 42*207e5cccSFangrui Song return vqrdmlahq_laneq_s16(a, b, v, 7); 43*207e5cccSFangrui Song } 44*207e5cccSFangrui Song 45*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahq_laneq_s32( 46*207e5cccSFangrui Song // CHECK-NEXT: entry: 47*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 48*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 49*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 50*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) 51*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]] 52*207e5cccSFangrui Song // 53*207e5cccSFangrui Song int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 54*207e5cccSFangrui Song return vqrdmlahq_laneq_s32(a, b, v, 3); 55*207e5cccSFangrui Song } 56*207e5cccSFangrui Song 57*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahh_s16( 58*207e5cccSFangrui Song // CHECK-NEXT: entry: 59*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 60*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 61*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0 62*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 63*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0 64*207e5cccSFangrui Song // CHECK-NEXT: ret i16 [[TMP3]] 65*207e5cccSFangrui Song // 66*207e5cccSFangrui Song int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { 67*207e5cccSFangrui Song return vqrdmlahh_s16(a, b, c); 68*207e5cccSFangrui Song } 69*207e5cccSFangrui Song 70*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahs_s32( 71*207e5cccSFangrui Song // CHECK-NEXT: entry: 72*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) 73*207e5cccSFangrui Song // CHECK-NEXT: ret i32 [[VQRDMLAHS_S32_I]] 74*207e5cccSFangrui Song // 75*207e5cccSFangrui Song int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { 76*207e5cccSFangrui Song return vqrdmlahs_s32(a, b, c); 77*207e5cccSFangrui Song } 78*207e5cccSFangrui Song 79*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahh_lane_s16( 80*207e5cccSFangrui Song // CHECK-NEXT: entry: 81*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3 82*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 83*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 84*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0 85*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 86*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0 87*207e5cccSFangrui Song // CHECK-NEXT: ret i16 [[TMP3]] 88*207e5cccSFangrui Song // 89*207e5cccSFangrui Song int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) { 90*207e5cccSFangrui Song return vqrdmlahh_lane_s16(a, b, c, 3); 91*207e5cccSFangrui Song } 92*207e5cccSFangrui Song 93*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahs_lane_s32( 94*207e5cccSFangrui Song // CHECK-NEXT: entry: 95*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1 96*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]]) 97*207e5cccSFangrui Song // CHECK-NEXT: ret i32 [[VQRDMLAHS_S32_I]] 98*207e5cccSFangrui Song // 99*207e5cccSFangrui Song int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) { 100*207e5cccSFangrui Song return vqrdmlahs_lane_s32(a, b, c, 1); 101*207e5cccSFangrui Song } 102*207e5cccSFangrui Song 103*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahh_laneq_s16( 104*207e5cccSFangrui Song // CHECK-NEXT: entry: 105*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7 106*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 107*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 108*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0 109*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 110*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0 111*207e5cccSFangrui Song // CHECK-NEXT: ret i16 [[TMP3]] 112*207e5cccSFangrui Song // 113*207e5cccSFangrui Song int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { 114*207e5cccSFangrui Song return vqrdmlahh_laneq_s16(a, b, c, 7); 115*207e5cccSFangrui Song } 116*207e5cccSFangrui Song 117*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlahs_laneq_s32( 118*207e5cccSFangrui Song // CHECK-NEXT: entry: 119*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 120*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]]) 121*207e5cccSFangrui Song // CHECK-NEXT: ret i32 [[VQRDMLAHS_S32_I]] 122*207e5cccSFangrui Song // 123*207e5cccSFangrui Song int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { 124*207e5cccSFangrui Song return vqrdmlahs_laneq_s32(a, b, c, 3); 125*207e5cccSFangrui Song } 126*207e5cccSFangrui Song 127*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlsh_laneq_s16( 128*207e5cccSFangrui Song // CHECK-NEXT: entry: 129*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 130*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 131*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 132*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) 133*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]] 134*207e5cccSFangrui Song // 135*207e5cccSFangrui Song int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 136*207e5cccSFangrui Song return vqrdmlsh_laneq_s16(a, b, v, 7); 137*207e5cccSFangrui Song } 138*207e5cccSFangrui Song 139*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlsh_laneq_s32( 140*207e5cccSFangrui Song // CHECK-NEXT: entry: 141*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 142*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 143*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 144*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) 145*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]] 146*207e5cccSFangrui Song // 147*207e5cccSFangrui Song int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 148*207e5cccSFangrui Song return vqrdmlsh_laneq_s32(a, b, v, 3); 149*207e5cccSFangrui Song } 150*207e5cccSFangrui Song 151*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshq_laneq_s16( 152*207e5cccSFangrui Song // CHECK-NEXT: entry: 153*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 154*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 155*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 156*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) 157*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]] 158*207e5cccSFangrui Song // 159*207e5cccSFangrui Song int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 160*207e5cccSFangrui Song return vqrdmlshq_laneq_s16(a, b, v, 7); 161*207e5cccSFangrui Song } 162*207e5cccSFangrui Song 163*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshq_laneq_s32( 164*207e5cccSFangrui Song // CHECK-NEXT: entry: 165*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 166*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 167*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 168*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) 169*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]] 170*207e5cccSFangrui Song // 171*207e5cccSFangrui Song int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 172*207e5cccSFangrui Song return vqrdmlshq_laneq_s32(a, b, v, 3); 173*207e5cccSFangrui Song } 174*207e5cccSFangrui Song 175*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshh_s16( 176*207e5cccSFangrui Song // CHECK-NEXT: entry: 177*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 178*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 179*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0 180*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 181*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0 182*207e5cccSFangrui Song // CHECK-NEXT: ret i16 [[TMP3]] 183*207e5cccSFangrui Song // 184*207e5cccSFangrui Song int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { 185*207e5cccSFangrui Song return vqrdmlshh_s16(a, b, c); 186*207e5cccSFangrui Song } 187*207e5cccSFangrui Song 188*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshs_s32( 189*207e5cccSFangrui Song // CHECK-NEXT: entry: 190*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) 191*207e5cccSFangrui Song // CHECK-NEXT: ret i32 [[VQRDMLSHS_S32_I]] 192*207e5cccSFangrui Song // 193*207e5cccSFangrui Song int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { 194*207e5cccSFangrui Song return vqrdmlshs_s32(a, b, c); 195*207e5cccSFangrui Song } 196*207e5cccSFangrui Song 197*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshh_lane_s16( 198*207e5cccSFangrui Song // CHECK-NEXT: entry: 199*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3 200*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 201*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 202*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0 203*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 204*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0 205*207e5cccSFangrui Song // CHECK-NEXT: ret i16 [[TMP3]] 206*207e5cccSFangrui Song // 207*207e5cccSFangrui Song int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) { 208*207e5cccSFangrui Song return vqrdmlshh_lane_s16(a, b, c, 3); 209*207e5cccSFangrui Song } 210*207e5cccSFangrui Song 211*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshs_lane_s32( 212*207e5cccSFangrui Song // CHECK-NEXT: entry: 213*207e5cccSFangrui Song // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1 214*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]]) 215*207e5cccSFangrui Song // CHECK-NEXT: ret i32 [[VQRDMLSHS_S32_I]] 216*207e5cccSFangrui Song // 217*207e5cccSFangrui Song int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) { 218*207e5cccSFangrui Song return vqrdmlshs_lane_s32(a, b, c, 1); 219*207e5cccSFangrui Song } 220*207e5cccSFangrui Song 221*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshh_laneq_s16( 222*207e5cccSFangrui Song // CHECK-NEXT: entry: 223*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7 224*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 225*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 226*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0 227*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 228*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0 229*207e5cccSFangrui Song // CHECK-NEXT: ret i16 [[TMP3]] 230*207e5cccSFangrui Song // 231*207e5cccSFangrui Song int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { 232*207e5cccSFangrui Song return vqrdmlshh_laneq_s16(a, b, c, 7); 233*207e5cccSFangrui Song } 234*207e5cccSFangrui Song 235*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmlshs_laneq_s32( 236*207e5cccSFangrui Song // CHECK-NEXT: entry: 237*207e5cccSFangrui Song // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 238*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]]) 239*207e5cccSFangrui Song // CHECK-NEXT: ret i32 [[VQRDMLSHS_S32_I]] 240*207e5cccSFangrui Song // 241*207e5cccSFangrui Song int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { 242*207e5cccSFangrui Song return vqrdmlshs_laneq_s32(a, b, c, 3); 243*207e5cccSFangrui Song } 244