1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s 3*207e5cccSFangrui Song 4*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target || arm-registered-target 5*207e5cccSFangrui Song 6*207e5cccSFangrui Song #include <arm_neon.h> 7*207e5cccSFangrui Song 8*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_s16( 9*207e5cccSFangrui Song // CHECK-NEXT: entry: 10*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 11*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 12*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 13*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 14*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 15*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 16*207e5cccSFangrui Song // 17*207e5cccSFangrui Song int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { 18*207e5cccSFangrui Song return vmla_lane_s16(a, b, v, 3); 19*207e5cccSFangrui Song } 20*207e5cccSFangrui Song 21*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_s16( 22*207e5cccSFangrui Song // CHECK-NEXT: entry: 23*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 24*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 25*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 26*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 27*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 28*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 29*207e5cccSFangrui Song // 30*207e5cccSFangrui Song int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { 31*207e5cccSFangrui Song return vmlaq_lane_s16(a, b, v, 3); 32*207e5cccSFangrui Song } 33*207e5cccSFangrui Song 34*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_s32( 35*207e5cccSFangrui Song // CHECK-NEXT: entry: 36*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 37*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 38*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 39*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 40*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 41*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 42*207e5cccSFangrui Song // 43*207e5cccSFangrui Song int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { 44*207e5cccSFangrui Song return vmla_lane_s32(a, b, v, 1); 45*207e5cccSFangrui Song } 46*207e5cccSFangrui Song 47*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_s32( 48*207e5cccSFangrui Song // CHECK-NEXT: entry: 49*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 50*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 51*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 52*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 53*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 54*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 55*207e5cccSFangrui Song // 56*207e5cccSFangrui Song int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { 57*207e5cccSFangrui Song return vmlaq_lane_s32(a, b, v, 1); 58*207e5cccSFangrui Song } 59*207e5cccSFangrui Song 60*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_s16( 61*207e5cccSFangrui Song // CHECK-NEXT: entry: 62*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 63*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 64*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 65*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 66*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 67*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 68*207e5cccSFangrui Song // 69*207e5cccSFangrui Song int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 70*207e5cccSFangrui Song return vmla_laneq_s16(a, b, v, 7); 71*207e5cccSFangrui Song } 72*207e5cccSFangrui Song 73*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_s16( 74*207e5cccSFangrui Song // CHECK-NEXT: entry: 75*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 76*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 77*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 78*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 79*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 80*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 81*207e5cccSFangrui Song // 82*207e5cccSFangrui Song int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 83*207e5cccSFangrui Song return vmlaq_laneq_s16(a, b, v, 7); 84*207e5cccSFangrui Song } 85*207e5cccSFangrui Song 86*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_s32( 87*207e5cccSFangrui Song // CHECK-NEXT: entry: 88*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 89*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 90*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 91*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 92*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 93*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 94*207e5cccSFangrui Song // 95*207e5cccSFangrui Song int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 96*207e5cccSFangrui Song return vmla_laneq_s32(a, b, v, 3); 97*207e5cccSFangrui Song } 98*207e5cccSFangrui Song 99*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_s32( 100*207e5cccSFangrui Song // CHECK-NEXT: entry: 101*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 102*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 103*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 104*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 105*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 106*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 107*207e5cccSFangrui Song // 108*207e5cccSFangrui Song int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 109*207e5cccSFangrui Song return vmlaq_laneq_s32(a, b, v, 3); 110*207e5cccSFangrui Song } 111*207e5cccSFangrui Song 112*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_s16( 113*207e5cccSFangrui Song // CHECK-NEXT: entry: 114*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 115*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 116*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 117*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 118*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 119*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 120*207e5cccSFangrui Song // 121*207e5cccSFangrui Song int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { 122*207e5cccSFangrui Song return vmls_lane_s16(a, b, v, 3); 123*207e5cccSFangrui Song } 124*207e5cccSFangrui Song 125*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_s16( 126*207e5cccSFangrui Song // CHECK-NEXT: entry: 127*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 128*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 129*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 130*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 131*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 132*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 133*207e5cccSFangrui Song // 134*207e5cccSFangrui Song int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { 135*207e5cccSFangrui Song return vmlsq_lane_s16(a, b, v, 3); 136*207e5cccSFangrui Song } 137*207e5cccSFangrui Song 138*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_s32( 139*207e5cccSFangrui Song // CHECK-NEXT: entry: 140*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 141*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 142*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 143*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 144*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 145*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 146*207e5cccSFangrui Song // 147*207e5cccSFangrui Song int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { 148*207e5cccSFangrui Song return vmls_lane_s32(a, b, v, 1); 149*207e5cccSFangrui Song } 150*207e5cccSFangrui Song 151*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_s32( 152*207e5cccSFangrui Song // CHECK-NEXT: entry: 153*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 154*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 155*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 156*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 157*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 158*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 159*207e5cccSFangrui Song // 160*207e5cccSFangrui Song int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { 161*207e5cccSFangrui Song return vmlsq_lane_s32(a, b, v, 1); 162*207e5cccSFangrui Song } 163*207e5cccSFangrui Song 164*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_s16( 165*207e5cccSFangrui Song // CHECK-NEXT: entry: 166*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 167*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 168*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 169*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 170*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 171*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 172*207e5cccSFangrui Song // 173*207e5cccSFangrui Song int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 174*207e5cccSFangrui Song return vmls_laneq_s16(a, b, v, 7); 175*207e5cccSFangrui Song } 176*207e5cccSFangrui Song 177*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_s16( 178*207e5cccSFangrui Song // CHECK-NEXT: entry: 179*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 180*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 181*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 182*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 183*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 184*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 185*207e5cccSFangrui Song // 186*207e5cccSFangrui Song int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 187*207e5cccSFangrui Song return vmlsq_laneq_s16(a, b, v, 7); 188*207e5cccSFangrui Song } 189*207e5cccSFangrui Song 190*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_s32( 191*207e5cccSFangrui Song // CHECK-NEXT: entry: 192*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 193*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 194*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 195*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 196*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 197*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 198*207e5cccSFangrui Song // 199*207e5cccSFangrui Song int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 200*207e5cccSFangrui Song return vmls_laneq_s32(a, b, v, 3); 201*207e5cccSFangrui Song } 202*207e5cccSFangrui Song 203*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_s32( 204*207e5cccSFangrui Song // CHECK-NEXT: entry: 205*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 206*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 207*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 208*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 209*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 210*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 211*207e5cccSFangrui Song // 212*207e5cccSFangrui Song int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 213*207e5cccSFangrui Song return vmlsq_laneq_s32(a, b, v, 3); 214*207e5cccSFangrui Song } 215*207e5cccSFangrui Song 216*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_s16( 217*207e5cccSFangrui Song // CHECK-NEXT: entry: 218*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 219*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 220*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 221*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 222*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 223*207e5cccSFangrui Song // 224*207e5cccSFangrui Song int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) { 225*207e5cccSFangrui Song return vmul_lane_s16(a, v, 3); 226*207e5cccSFangrui Song } 227*207e5cccSFangrui Song 228*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_s16( 229*207e5cccSFangrui Song // CHECK-NEXT: entry: 230*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 231*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 232*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 233*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 234*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 235*207e5cccSFangrui Song // 236*207e5cccSFangrui Song int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) { 237*207e5cccSFangrui Song return vmulq_lane_s16(a, v, 3); 238*207e5cccSFangrui Song } 239*207e5cccSFangrui Song 240*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_s32( 241*207e5cccSFangrui Song // CHECK-NEXT: entry: 242*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 243*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 244*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 245*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 246*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 247*207e5cccSFangrui Song // 248*207e5cccSFangrui Song int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { 249*207e5cccSFangrui Song return vmul_lane_s32(a, v, 1); 250*207e5cccSFangrui Song } 251*207e5cccSFangrui Song 252*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_s32( 253*207e5cccSFangrui Song // CHECK-NEXT: entry: 254*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 255*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 256*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 257*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 258*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 259*207e5cccSFangrui Song // 260*207e5cccSFangrui Song int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { 261*207e5cccSFangrui Song return vmulq_lane_s32(a, v, 1); 262*207e5cccSFangrui Song } 263*207e5cccSFangrui Song 264*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_u16( 265*207e5cccSFangrui Song // CHECK-NEXT: entry: 266*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 267*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 268*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 269*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 270*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 271*207e5cccSFangrui Song // 272*207e5cccSFangrui Song uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) { 273*207e5cccSFangrui Song return vmul_lane_u16(a, v, 3); 274*207e5cccSFangrui Song } 275*207e5cccSFangrui Song 276*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_u16( 277*207e5cccSFangrui Song // CHECK-NEXT: entry: 278*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 279*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 280*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 281*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 282*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 283*207e5cccSFangrui Song // 284*207e5cccSFangrui Song uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) { 285*207e5cccSFangrui Song return vmulq_lane_u16(a, v, 3); 286*207e5cccSFangrui Song } 287*207e5cccSFangrui Song 288*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_u32( 289*207e5cccSFangrui Song // CHECK-NEXT: entry: 290*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 291*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 292*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 293*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 294*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 295*207e5cccSFangrui Song // 296*207e5cccSFangrui Song uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) { 297*207e5cccSFangrui Song return vmul_lane_u32(a, v, 1); 298*207e5cccSFangrui Song } 299*207e5cccSFangrui Song 300*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_u32( 301*207e5cccSFangrui Song // CHECK-NEXT: entry: 302*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 303*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 304*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 305*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 306*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 307*207e5cccSFangrui Song // 308*207e5cccSFangrui Song uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) { 309*207e5cccSFangrui Song return vmulq_lane_u32(a, v, 1); 310*207e5cccSFangrui Song } 311*207e5cccSFangrui Song 312*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_s16( 313*207e5cccSFangrui Song // CHECK-NEXT: entry: 314*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 315*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 316*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 317*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 318*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 319*207e5cccSFangrui Song // 320*207e5cccSFangrui Song int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) { 321*207e5cccSFangrui Song return vmul_laneq_s16(a, v, 7); 322*207e5cccSFangrui Song } 323*207e5cccSFangrui Song 324*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_s16( 325*207e5cccSFangrui Song // CHECK-NEXT: entry: 326*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 327*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 328*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 329*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 330*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 331*207e5cccSFangrui Song // 332*207e5cccSFangrui Song int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) { 333*207e5cccSFangrui Song return vmulq_laneq_s16(a, v, 7); 334*207e5cccSFangrui Song } 335*207e5cccSFangrui Song 336*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_s32( 337*207e5cccSFangrui Song // CHECK-NEXT: entry: 338*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 339*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 340*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 341*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 342*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 343*207e5cccSFangrui Song // 344*207e5cccSFangrui Song int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { 345*207e5cccSFangrui Song return vmul_laneq_s32(a, v, 3); 346*207e5cccSFangrui Song } 347*207e5cccSFangrui Song 348*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_s32( 349*207e5cccSFangrui Song // CHECK-NEXT: entry: 350*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 351*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 352*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 353*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 354*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 355*207e5cccSFangrui Song // 356*207e5cccSFangrui Song int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { 357*207e5cccSFangrui Song return vmulq_laneq_s32(a, v, 3); 358*207e5cccSFangrui Song } 359*207e5cccSFangrui Song 360*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_u16( 361*207e5cccSFangrui Song // CHECK-NEXT: entry: 362*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 363*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 364*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 365*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 366*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 367*207e5cccSFangrui Song // 368*207e5cccSFangrui Song uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) { 369*207e5cccSFangrui Song return vmul_laneq_u16(a, v, 7); 370*207e5cccSFangrui Song } 371*207e5cccSFangrui Song 372*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_u16( 373*207e5cccSFangrui Song // CHECK-NEXT: entry: 374*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 375*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 376*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 377*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 378*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 379*207e5cccSFangrui Song // 380*207e5cccSFangrui Song uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) { 381*207e5cccSFangrui Song return vmulq_laneq_u16(a, v, 7); 382*207e5cccSFangrui Song } 383*207e5cccSFangrui Song 384*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_u32( 385*207e5cccSFangrui Song // CHECK-NEXT: entry: 386*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 387*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 388*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 389*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 390*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 391*207e5cccSFangrui Song // 392*207e5cccSFangrui Song uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) { 393*207e5cccSFangrui Song return vmul_laneq_u32(a, v, 3); 394*207e5cccSFangrui Song } 395*207e5cccSFangrui Song 396*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_u32( 397*207e5cccSFangrui Song // CHECK-NEXT: entry: 398*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 399*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 400*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 401*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 402*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 403*207e5cccSFangrui Song // 404*207e5cccSFangrui Song uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) { 405*207e5cccSFangrui Song return vmulq_laneq_u32(a, v, 3); 406*207e5cccSFangrui Song } 407*207e5cccSFangrui Song 408*207e5cccSFangrui Song // CHECK-LABEL: @test_vfma_lane_f32( 409*207e5cccSFangrui Song // CHECK-NEXT: entry: 410*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 411*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8> 412*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 413*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 414*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> 415*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 416*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 417*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 418*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[FMLA2]] 419*207e5cccSFangrui Song // 420*207e5cccSFangrui Song float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 421*207e5cccSFangrui Song return vfma_lane_f32(a, b, v, 1); 422*207e5cccSFangrui Song } 423*207e5cccSFangrui Song 424*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_lane_f32( 425*207e5cccSFangrui Song // CHECK-NEXT: entry: 426*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 427*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8> 428*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 429*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 430*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 431*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 432*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 433*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 434*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[FMLA2]] 435*207e5cccSFangrui Song // 436*207e5cccSFangrui Song float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 437*207e5cccSFangrui Song return vfmaq_lane_f32(a, b, v, 1); 438*207e5cccSFangrui Song } 439*207e5cccSFangrui Song 440*207e5cccSFangrui Song // CHECK-LABEL: @test_vfma_laneq_f32( 441*207e5cccSFangrui Song // CHECK-NEXT: entry: 442*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 443*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8> 444*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 445*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 446*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 447*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 448*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3> 449*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 450*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[TMP6]] 451*207e5cccSFangrui Song // 452*207e5cccSFangrui Song float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 453*207e5cccSFangrui Song return vfma_laneq_f32(a, b, v, 3); 454*207e5cccSFangrui Song } 455*207e5cccSFangrui Song 456*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_laneq_f32( 457*207e5cccSFangrui Song // CHECK-NEXT: entry: 458*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 459*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8> 460*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 461*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 462*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 463*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 464*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 465*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 466*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[TMP6]] 467*207e5cccSFangrui Song // 468*207e5cccSFangrui Song float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 469*207e5cccSFangrui Song return vfmaq_laneq_f32(a, b, v, 3); 470*207e5cccSFangrui Song } 471*207e5cccSFangrui Song 472*207e5cccSFangrui Song // CHECK-LABEL: @test_vfms_lane_f32( 473*207e5cccSFangrui Song // CHECK-NEXT: entry: 474*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]] 475*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 476*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8> 477*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 478*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 479*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> 480*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 481*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 482*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 483*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[FMLA2]] 484*207e5cccSFangrui Song // 485*207e5cccSFangrui Song float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { 486*207e5cccSFangrui Song return vfms_lane_f32(a, b, v, 1); 487*207e5cccSFangrui Song } 488*207e5cccSFangrui Song 489*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_lane_f32( 490*207e5cccSFangrui Song // CHECK-NEXT: entry: 491*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]] 492*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 493*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8> 494*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 495*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 496*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 497*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 498*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 499*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 500*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[FMLA2]] 501*207e5cccSFangrui Song // 502*207e5cccSFangrui Song float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { 503*207e5cccSFangrui Song return vfmsq_lane_f32(a, b, v, 1); 504*207e5cccSFangrui Song } 505*207e5cccSFangrui Song 506*207e5cccSFangrui Song // CHECK-LABEL: @test_vfms_laneq_f32( 507*207e5cccSFangrui Song // CHECK-NEXT: entry: 508*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]] 509*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 510*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8> 511*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 512*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 513*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 514*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 515*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> <i32 3, i32 3> 516*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 517*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[TMP6]] 518*207e5cccSFangrui Song // 519*207e5cccSFangrui Song float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { 520*207e5cccSFangrui Song return vfms_laneq_f32(a, b, v, 3); 521*207e5cccSFangrui Song } 522*207e5cccSFangrui Song 523*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_laneq_f32( 524*207e5cccSFangrui Song // CHECK-NEXT: entry: 525*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]] 526*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 527*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8> 528*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 529*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 530*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 531*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 532*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 533*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 534*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[TMP6]] 535*207e5cccSFangrui Song // 536*207e5cccSFangrui Song float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { 537*207e5cccSFangrui Song return vfmsq_laneq_f32(a, b, v, 3); 538*207e5cccSFangrui Song } 539*207e5cccSFangrui Song 540*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_lane_f64( 541*207e5cccSFangrui Song // CHECK-NEXT: entry: 542*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 543*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 544*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8> 545*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 546*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer 547*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 548*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 549*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) 550*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[FMLA2]] 551*207e5cccSFangrui Song // 552*207e5cccSFangrui Song float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { 553*207e5cccSFangrui Song return vfmaq_lane_f64(a, b, v, 0); 554*207e5cccSFangrui Song } 555*207e5cccSFangrui Song 556*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_laneq_f64( 557*207e5cccSFangrui Song // CHECK-NEXT: entry: 558*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 559*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 560*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 561*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 562*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 563*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 564*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1> 565*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 566*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[TMP6]] 567*207e5cccSFangrui Song // 568*207e5cccSFangrui Song float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { 569*207e5cccSFangrui Song return vfmaq_laneq_f64(a, b, v, 1); 570*207e5cccSFangrui Song } 571*207e5cccSFangrui Song 572*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_lane_f64( 573*207e5cccSFangrui Song // CHECK-NEXT: entry: 574*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[B:%.*]] 575*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 576*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <16 x i8> 577*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8> 578*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> 579*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer 580*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 581*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 582*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) 583*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[FMLA2]] 584*207e5cccSFangrui Song // 585*207e5cccSFangrui Song float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { 586*207e5cccSFangrui Song return vfmsq_lane_f64(a, b, v, 0); 587*207e5cccSFangrui Song } 588*207e5cccSFangrui Song 589*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_laneq_f64( 590*207e5cccSFangrui Song // CHECK-NEXT: entry: 591*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[B:%.*]] 592*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 593*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <16 x i8> 594*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 595*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 596*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 597*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 598*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> <i32 1, i32 1> 599*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 600*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[TMP6]] 601*207e5cccSFangrui Song // 602*207e5cccSFangrui Song float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { 603*207e5cccSFangrui Song return vfmsq_laneq_f64(a, b, v, 1); 604*207e5cccSFangrui Song } 605*207e5cccSFangrui Song 606*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmas_laneq_f32( 607*207e5cccSFangrui Song // CHECK-NEXT: entry: 608*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[V:%.*]], i32 3 609*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.fma.f32(float [[B:%.*]], float [[EXTRACT]], float [[A:%.*]]) 610*207e5cccSFangrui Song // CHECK-NEXT: ret float [[TMP0]] 611*207e5cccSFangrui Song // 612*207e5cccSFangrui Song float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { 613*207e5cccSFangrui Song return vfmas_laneq_f32(a, b, v, 3); 614*207e5cccSFangrui Song } 615*207e5cccSFangrui Song 616*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsd_lane_f64( 617*207e5cccSFangrui Song // CHECK-NEXT: entry: 618*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]] 619*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[V:%.*]], i32 0 620*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.fma.f64(double [[FNEG]], double [[EXTRACT]], double [[A:%.*]]) 621*207e5cccSFangrui Song // CHECK-NEXT: ret double [[TMP0]] 622*207e5cccSFangrui Song // 623*207e5cccSFangrui Song float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { 624*207e5cccSFangrui Song return vfmsd_lane_f64(a, b, v, 0); 625*207e5cccSFangrui Song } 626*207e5cccSFangrui Song 627*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmss_laneq_f32( 628*207e5cccSFangrui Song // CHECK-NEXT: entry: 629*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg float [[B:%.*]] 630*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[V:%.*]], i32 3 631*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = call float @llvm.fma.f32(float [[FNEG]], float [[EXTRACT]], float [[A:%.*]]) 632*207e5cccSFangrui Song // CHECK-NEXT: ret float [[TMP0]] 633*207e5cccSFangrui Song // 634*207e5cccSFangrui Song float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { 635*207e5cccSFangrui Song return vfmss_laneq_f32(a, b, v, 3); 636*207e5cccSFangrui Song } 637*207e5cccSFangrui Song 638*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsd_laneq_f64( 639*207e5cccSFangrui Song // CHECK-NEXT: entry: 640*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg double [[B:%.*]] 641*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[V:%.*]], i32 1 642*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.fma.f64(double [[FNEG]], double [[EXTRACT]], double [[A:%.*]]) 643*207e5cccSFangrui Song // CHECK-NEXT: ret double [[TMP0]] 644*207e5cccSFangrui Song // 645*207e5cccSFangrui Song float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { 646*207e5cccSFangrui Song return vfmsd_laneq_f64(a, b, v, 1); 647*207e5cccSFangrui Song } 648*207e5cccSFangrui Song 649*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_s16( 650*207e5cccSFangrui Song // CHECK-NEXT: entry: 651*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 652*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 653*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 654*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 655*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 656*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 657*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 658*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 659*207e5cccSFangrui Song // 660*207e5cccSFangrui Song int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 661*207e5cccSFangrui Song return vmlal_lane_s16(a, b, v, 3); 662*207e5cccSFangrui Song } 663*207e5cccSFangrui Song 664*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_s32( 665*207e5cccSFangrui Song // CHECK-NEXT: entry: 666*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 667*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 668*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 669*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 670*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 671*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 672*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 673*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 674*207e5cccSFangrui Song // 675*207e5cccSFangrui Song int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 676*207e5cccSFangrui Song return vmlal_lane_s32(a, b, v, 1); 677*207e5cccSFangrui Song } 678*207e5cccSFangrui Song 679*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_s16( 680*207e5cccSFangrui Song // CHECK-NEXT: entry: 681*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 682*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 683*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 684*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 685*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 686*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 687*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 688*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 689*207e5cccSFangrui Song // 690*207e5cccSFangrui Song int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 691*207e5cccSFangrui Song return vmlal_laneq_s16(a, b, v, 7); 692*207e5cccSFangrui Song } 693*207e5cccSFangrui Song 694*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_s32( 695*207e5cccSFangrui Song // CHECK-NEXT: entry: 696*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 697*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 698*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 699*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 700*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 701*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 702*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 703*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 704*207e5cccSFangrui Song // 705*207e5cccSFangrui Song int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 706*207e5cccSFangrui Song return vmlal_laneq_s32(a, b, v, 3); 707*207e5cccSFangrui Song } 708*207e5cccSFangrui Song 709*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_s16( 710*207e5cccSFangrui Song // CHECK-NEXT: entry: 711*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 712*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 713*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 714*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 715*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 716*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 717*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 718*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 719*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 720*207e5cccSFangrui Song // 721*207e5cccSFangrui Song int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 722*207e5cccSFangrui Song return vmlal_high_lane_s16(a, b, v, 3); 723*207e5cccSFangrui Song } 724*207e5cccSFangrui Song 725*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_s32( 726*207e5cccSFangrui Song // CHECK-NEXT: entry: 727*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 728*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 729*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 730*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 731*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 732*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 733*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 734*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 735*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 736*207e5cccSFangrui Song // 737*207e5cccSFangrui Song int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 738*207e5cccSFangrui Song return vmlal_high_lane_s32(a, b, v, 1); 739*207e5cccSFangrui Song } 740*207e5cccSFangrui Song 741*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_s16( 742*207e5cccSFangrui Song // CHECK-NEXT: entry: 743*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 744*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 745*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 746*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 747*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 748*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 749*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 750*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 751*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 752*207e5cccSFangrui Song // 753*207e5cccSFangrui Song int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 754*207e5cccSFangrui Song return vmlal_high_laneq_s16(a, b, v, 7); 755*207e5cccSFangrui Song } 756*207e5cccSFangrui Song 757*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_s32( 758*207e5cccSFangrui Song // CHECK-NEXT: entry: 759*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 760*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 761*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 762*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 763*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 764*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 765*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 766*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 767*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 768*207e5cccSFangrui Song // 769*207e5cccSFangrui Song int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 770*207e5cccSFangrui Song return vmlal_high_laneq_s32(a, b, v, 3); 771*207e5cccSFangrui Song } 772*207e5cccSFangrui Song 773*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_s16( 774*207e5cccSFangrui Song // CHECK-NEXT: entry: 775*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 776*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 777*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 778*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 779*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 780*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 781*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 782*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 783*207e5cccSFangrui Song // 784*207e5cccSFangrui Song int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 785*207e5cccSFangrui Song return vmlsl_lane_s16(a, b, v, 3); 786*207e5cccSFangrui Song } 787*207e5cccSFangrui Song 788*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_s32( 789*207e5cccSFangrui Song // CHECK-NEXT: entry: 790*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 791*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 792*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 793*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 794*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 795*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 796*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 797*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 798*207e5cccSFangrui Song // 799*207e5cccSFangrui Song int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 800*207e5cccSFangrui Song return vmlsl_lane_s32(a, b, v, 1); 801*207e5cccSFangrui Song } 802*207e5cccSFangrui Song 803*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_s16( 804*207e5cccSFangrui Song // CHECK-NEXT: entry: 805*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 806*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 807*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 808*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 809*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 810*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 811*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 812*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 813*207e5cccSFangrui Song // 814*207e5cccSFangrui Song int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 815*207e5cccSFangrui Song return vmlsl_laneq_s16(a, b, v, 7); 816*207e5cccSFangrui Song } 817*207e5cccSFangrui Song 818*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_s32( 819*207e5cccSFangrui Song // CHECK-NEXT: entry: 820*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 821*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 822*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 823*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 824*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 825*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 826*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 827*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 828*207e5cccSFangrui Song // 829*207e5cccSFangrui Song int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 830*207e5cccSFangrui Song return vmlsl_laneq_s32(a, b, v, 3); 831*207e5cccSFangrui Song } 832*207e5cccSFangrui Song 833*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_s16( 834*207e5cccSFangrui Song // CHECK-NEXT: entry: 835*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 836*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 837*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 838*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 839*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 840*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 841*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 842*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 843*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 844*207e5cccSFangrui Song // 845*207e5cccSFangrui Song int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 846*207e5cccSFangrui Song return vmlsl_high_lane_s16(a, b, v, 3); 847*207e5cccSFangrui Song } 848*207e5cccSFangrui Song 849*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_s32( 850*207e5cccSFangrui Song // CHECK-NEXT: entry: 851*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 852*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 853*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 854*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 855*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 856*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 857*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 858*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 859*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 860*207e5cccSFangrui Song // 861*207e5cccSFangrui Song int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 862*207e5cccSFangrui Song return vmlsl_high_lane_s32(a, b, v, 1); 863*207e5cccSFangrui Song } 864*207e5cccSFangrui Song 865*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_s16( 866*207e5cccSFangrui Song // CHECK-NEXT: entry: 867*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 868*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 869*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 870*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 871*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 872*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 873*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 874*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 875*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 876*207e5cccSFangrui Song // 877*207e5cccSFangrui Song int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 878*207e5cccSFangrui Song return vmlsl_high_laneq_s16(a, b, v, 7); 879*207e5cccSFangrui Song } 880*207e5cccSFangrui Song 881*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_s32( 882*207e5cccSFangrui Song // CHECK-NEXT: entry: 883*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 884*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 885*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 886*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 887*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 888*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 889*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 890*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 891*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 892*207e5cccSFangrui Song // 893*207e5cccSFangrui Song int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 894*207e5cccSFangrui Song return vmlsl_high_laneq_s32(a, b, v, 3); 895*207e5cccSFangrui Song } 896*207e5cccSFangrui Song 897*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_u16( 898*207e5cccSFangrui Song // CHECK-NEXT: entry: 899*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 900*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 901*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 902*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 903*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 904*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 905*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 906*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 907*207e5cccSFangrui Song // 908*207e5cccSFangrui Song int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { 909*207e5cccSFangrui Song return vmlal_lane_u16(a, b, v, 3); 910*207e5cccSFangrui Song } 911*207e5cccSFangrui Song 912*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_u32( 913*207e5cccSFangrui Song // CHECK-NEXT: entry: 914*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 915*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 916*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 917*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 918*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 919*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 920*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 921*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 922*207e5cccSFangrui Song // 923*207e5cccSFangrui Song int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { 924*207e5cccSFangrui Song return vmlal_lane_u32(a, b, v, 1); 925*207e5cccSFangrui Song } 926*207e5cccSFangrui Song 927*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_u16( 928*207e5cccSFangrui Song // CHECK-NEXT: entry: 929*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 930*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 931*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 932*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 933*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 934*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 935*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 936*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 937*207e5cccSFangrui Song // 938*207e5cccSFangrui Song int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { 939*207e5cccSFangrui Song return vmlal_laneq_u16(a, b, v, 7); 940*207e5cccSFangrui Song } 941*207e5cccSFangrui Song 942*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_u32( 943*207e5cccSFangrui Song // CHECK-NEXT: entry: 944*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 945*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 946*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 947*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 948*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 949*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 950*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 951*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 952*207e5cccSFangrui Song // 953*207e5cccSFangrui Song int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { 954*207e5cccSFangrui Song return vmlal_laneq_u32(a, b, v, 3); 955*207e5cccSFangrui Song } 956*207e5cccSFangrui Song 957*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_u16( 958*207e5cccSFangrui Song // CHECK-NEXT: entry: 959*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 960*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 961*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 962*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 963*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 964*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 965*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 966*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 967*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 968*207e5cccSFangrui Song // 969*207e5cccSFangrui Song int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { 970*207e5cccSFangrui Song return vmlal_high_lane_u16(a, b, v, 3); 971*207e5cccSFangrui Song } 972*207e5cccSFangrui Song 973*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_u32( 974*207e5cccSFangrui Song // CHECK-NEXT: entry: 975*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 976*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 977*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 978*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 979*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 980*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 981*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 982*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 983*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 984*207e5cccSFangrui Song // 985*207e5cccSFangrui Song int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { 986*207e5cccSFangrui Song return vmlal_high_lane_u32(a, b, v, 1); 987*207e5cccSFangrui Song } 988*207e5cccSFangrui Song 989*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_u16( 990*207e5cccSFangrui Song // CHECK-NEXT: entry: 991*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 992*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 993*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 994*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 995*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 996*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 997*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 998*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 999*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 1000*207e5cccSFangrui Song // 1001*207e5cccSFangrui Song int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { 1002*207e5cccSFangrui Song return vmlal_high_laneq_u16(a, b, v, 7); 1003*207e5cccSFangrui Song } 1004*207e5cccSFangrui Song 1005*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_u32( 1006*207e5cccSFangrui Song // CHECK-NEXT: entry: 1007*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 1008*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1009*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1010*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1011*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1012*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1013*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1014*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 1015*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 1016*207e5cccSFangrui Song // 1017*207e5cccSFangrui Song int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { 1018*207e5cccSFangrui Song return vmlal_high_laneq_u32(a, b, v, 3); 1019*207e5cccSFangrui Song } 1020*207e5cccSFangrui Song 1021*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_u16( 1022*207e5cccSFangrui Song // CHECK-NEXT: entry: 1023*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1024*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1025*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1026*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 1027*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1028*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 1029*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 1030*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 1031*207e5cccSFangrui Song // 1032*207e5cccSFangrui Song int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { 1033*207e5cccSFangrui Song return vmlsl_lane_u16(a, b, v, 3); 1034*207e5cccSFangrui Song } 1035*207e5cccSFangrui Song 1036*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_u32( 1037*207e5cccSFangrui Song // CHECK-NEXT: entry: 1038*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1039*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1040*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1041*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 1042*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1043*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 1044*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 1045*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 1046*207e5cccSFangrui Song // 1047*207e5cccSFangrui Song int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { 1048*207e5cccSFangrui Song return vmlsl_lane_u32(a, b, v, 1); 1049*207e5cccSFangrui Song } 1050*207e5cccSFangrui Song 1051*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_u16( 1052*207e5cccSFangrui Song // CHECK-NEXT: entry: 1053*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1054*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1055*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1056*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 1057*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1058*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 1059*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 1060*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 1061*207e5cccSFangrui Song // 1062*207e5cccSFangrui Song int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { 1063*207e5cccSFangrui Song return vmlsl_laneq_u16(a, b, v, 7); 1064*207e5cccSFangrui Song } 1065*207e5cccSFangrui Song 1066*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_u32( 1067*207e5cccSFangrui Song // CHECK-NEXT: entry: 1068*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1069*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1070*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1071*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 1072*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1073*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 1074*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 1075*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 1076*207e5cccSFangrui Song // 1077*207e5cccSFangrui Song int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { 1078*207e5cccSFangrui Song return vmlsl_laneq_u32(a, b, v, 3); 1079*207e5cccSFangrui Song } 1080*207e5cccSFangrui Song 1081*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_u16( 1082*207e5cccSFangrui Song // CHECK-NEXT: entry: 1083*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1084*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1085*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1086*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1087*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1088*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1089*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1090*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 1091*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 1092*207e5cccSFangrui Song // 1093*207e5cccSFangrui Song int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { 1094*207e5cccSFangrui Song return vmlsl_high_lane_u16(a, b, v, 3); 1095*207e5cccSFangrui Song } 1096*207e5cccSFangrui Song 1097*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_u32( 1098*207e5cccSFangrui Song // CHECK-NEXT: entry: 1099*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 1100*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1101*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1102*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1103*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1104*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1105*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1106*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 1107*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 1108*207e5cccSFangrui Song // 1109*207e5cccSFangrui Song int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { 1110*207e5cccSFangrui Song return vmlsl_high_lane_u32(a, b, v, 1); 1111*207e5cccSFangrui Song } 1112*207e5cccSFangrui Song 1113*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_u16( 1114*207e5cccSFangrui Song // CHECK-NEXT: entry: 1115*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1116*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1117*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1118*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1119*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1120*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1121*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1122*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 1123*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 1124*207e5cccSFangrui Song // 1125*207e5cccSFangrui Song int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { 1126*207e5cccSFangrui Song return vmlsl_high_laneq_u16(a, b, v, 7); 1127*207e5cccSFangrui Song } 1128*207e5cccSFangrui Song 1129*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_u32( 1130*207e5cccSFangrui Song // CHECK-NEXT: entry: 1131*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 1132*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1133*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1134*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1135*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1136*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1137*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1138*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 1139*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 1140*207e5cccSFangrui Song // 1141*207e5cccSFangrui Song int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { 1142*207e5cccSFangrui Song return vmlsl_high_laneq_u32(a, b, v, 3); 1143*207e5cccSFangrui Song } 1144*207e5cccSFangrui Song 1145*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_s16( 1146*207e5cccSFangrui Song // CHECK-NEXT: entry: 1147*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1148*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1149*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1150*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1151*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1152*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 1153*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1154*207e5cccSFangrui Song // 1155*207e5cccSFangrui Song int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { 1156*207e5cccSFangrui Song return vmull_lane_s16(a, v, 3); 1157*207e5cccSFangrui Song } 1158*207e5cccSFangrui Song 1159*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_s32( 1160*207e5cccSFangrui Song // CHECK-NEXT: entry: 1161*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1162*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1163*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1164*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1165*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1166*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 1167*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1168*207e5cccSFangrui Song // 1169*207e5cccSFangrui Song int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { 1170*207e5cccSFangrui Song return vmull_lane_s32(a, v, 1); 1171*207e5cccSFangrui Song } 1172*207e5cccSFangrui Song 1173*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_u16( 1174*207e5cccSFangrui Song // CHECK-NEXT: entry: 1175*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1176*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1177*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1178*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1179*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1180*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 1181*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1182*207e5cccSFangrui Song // 1183*207e5cccSFangrui Song uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { 1184*207e5cccSFangrui Song return vmull_lane_u16(a, v, 3); 1185*207e5cccSFangrui Song } 1186*207e5cccSFangrui Song 1187*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_u32( 1188*207e5cccSFangrui Song // CHECK-NEXT: entry: 1189*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1190*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1191*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1192*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1193*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1194*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 1195*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1196*207e5cccSFangrui Song // 1197*207e5cccSFangrui Song uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { 1198*207e5cccSFangrui Song return vmull_lane_u32(a, v, 1); 1199*207e5cccSFangrui Song } 1200*207e5cccSFangrui Song 1201*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_s16( 1202*207e5cccSFangrui Song // CHECK-NEXT: entry: 1203*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1204*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1205*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1206*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1207*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1208*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1209*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1210*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1211*207e5cccSFangrui Song // 1212*207e5cccSFangrui Song int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { 1213*207e5cccSFangrui Song return vmull_high_lane_s16(a, v, 3); 1214*207e5cccSFangrui Song } 1215*207e5cccSFangrui Song 1216*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_s32( 1217*207e5cccSFangrui Song // CHECK-NEXT: entry: 1218*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 1219*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1220*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1221*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1222*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1223*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1224*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1225*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1226*207e5cccSFangrui Song // 1227*207e5cccSFangrui Song int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { 1228*207e5cccSFangrui Song return vmull_high_lane_s32(a, v, 1); 1229*207e5cccSFangrui Song } 1230*207e5cccSFangrui Song 1231*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_u16( 1232*207e5cccSFangrui Song // CHECK-NEXT: entry: 1233*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1234*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1235*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1236*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1237*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1238*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1239*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1240*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1241*207e5cccSFangrui Song // 1242*207e5cccSFangrui Song uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { 1243*207e5cccSFangrui Song return vmull_high_lane_u16(a, v, 3); 1244*207e5cccSFangrui Song } 1245*207e5cccSFangrui Song 1246*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_u32( 1247*207e5cccSFangrui Song // CHECK-NEXT: entry: 1248*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 1249*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1250*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1251*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1252*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1253*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1254*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1255*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1256*207e5cccSFangrui Song // 1257*207e5cccSFangrui Song uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { 1258*207e5cccSFangrui Song return vmull_high_lane_u32(a, v, 1); 1259*207e5cccSFangrui Song } 1260*207e5cccSFangrui Song 1261*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_s16( 1262*207e5cccSFangrui Song // CHECK-NEXT: entry: 1263*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1264*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1265*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1266*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1267*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1268*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 1269*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1270*207e5cccSFangrui Song // 1271*207e5cccSFangrui Song int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { 1272*207e5cccSFangrui Song return vmull_laneq_s16(a, v, 7); 1273*207e5cccSFangrui Song } 1274*207e5cccSFangrui Song 1275*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_s32( 1276*207e5cccSFangrui Song // CHECK-NEXT: entry: 1277*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1278*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1279*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1280*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1281*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1282*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 1283*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1284*207e5cccSFangrui Song // 1285*207e5cccSFangrui Song int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { 1286*207e5cccSFangrui Song return vmull_laneq_s32(a, v, 3); 1287*207e5cccSFangrui Song } 1288*207e5cccSFangrui Song 1289*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_u16( 1290*207e5cccSFangrui Song // CHECK-NEXT: entry: 1291*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1292*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1293*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1294*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1295*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1296*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 1297*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1298*207e5cccSFangrui Song // 1299*207e5cccSFangrui Song uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { 1300*207e5cccSFangrui Song return vmull_laneq_u16(a, v, 7); 1301*207e5cccSFangrui Song } 1302*207e5cccSFangrui Song 1303*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_u32( 1304*207e5cccSFangrui Song // CHECK-NEXT: entry: 1305*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1306*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1307*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1308*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1309*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1310*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 1311*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1312*207e5cccSFangrui Song // 1313*207e5cccSFangrui Song uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { 1314*207e5cccSFangrui Song return vmull_laneq_u32(a, v, 3); 1315*207e5cccSFangrui Song } 1316*207e5cccSFangrui Song 1317*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_s16( 1318*207e5cccSFangrui Song // CHECK-NEXT: entry: 1319*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1320*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1321*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1322*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1323*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1324*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1325*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1326*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1327*207e5cccSFangrui Song // 1328*207e5cccSFangrui Song int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { 1329*207e5cccSFangrui Song return vmull_high_laneq_s16(a, v, 7); 1330*207e5cccSFangrui Song } 1331*207e5cccSFangrui Song 1332*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_s32( 1333*207e5cccSFangrui Song // CHECK-NEXT: entry: 1334*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 1335*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1336*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1337*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1338*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1339*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1340*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1341*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1342*207e5cccSFangrui Song // 1343*207e5cccSFangrui Song int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { 1344*207e5cccSFangrui Song return vmull_high_laneq_s32(a, v, 3); 1345*207e5cccSFangrui Song } 1346*207e5cccSFangrui Song 1347*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_u16( 1348*207e5cccSFangrui Song // CHECK-NEXT: entry: 1349*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1350*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1351*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1352*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1353*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1354*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1355*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1356*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 1357*207e5cccSFangrui Song // 1358*207e5cccSFangrui Song uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { 1359*207e5cccSFangrui Song return vmull_high_laneq_u16(a, v, 7); 1360*207e5cccSFangrui Song } 1361*207e5cccSFangrui Song 1362*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_u32( 1363*207e5cccSFangrui Song // CHECK-NEXT: entry: 1364*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 1365*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1366*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1367*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1368*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1369*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1370*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1371*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 1372*207e5cccSFangrui Song // 1373*207e5cccSFangrui Song uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { 1374*207e5cccSFangrui Song return vmull_high_laneq_u32(a, v, 3); 1375*207e5cccSFangrui Song } 1376*207e5cccSFangrui Song 1377*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_lane_s16( 1378*207e5cccSFangrui Song // CHECK-NEXT: entry: 1379*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1380*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1381*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1382*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 1383*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 1384*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1385*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 1386*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 1387*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 1388*207e5cccSFangrui Song // 1389*207e5cccSFangrui Song int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 1390*207e5cccSFangrui Song return vqdmlal_lane_s16(a, b, v, 3); 1391*207e5cccSFangrui Song } 1392*207e5cccSFangrui Song 1393*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_lane_s32( 1394*207e5cccSFangrui Song // CHECK-NEXT: entry: 1395*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1396*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1397*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1398*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 1399*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 1400*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1401*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 1402*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 1403*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 1404*207e5cccSFangrui Song // 1405*207e5cccSFangrui Song int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 1406*207e5cccSFangrui Song return vqdmlal_lane_s32(a, b, v, 1); 1407*207e5cccSFangrui Song } 1408*207e5cccSFangrui Song 1409*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_lane_s16( 1410*207e5cccSFangrui Song // CHECK-NEXT: entry: 1411*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1412*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1413*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1414*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1415*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 1416*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1417*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1418*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1419*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 1420*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 1421*207e5cccSFangrui Song // 1422*207e5cccSFangrui Song int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 1423*207e5cccSFangrui Song return vqdmlal_high_lane_s16(a, b, v, 3); 1424*207e5cccSFangrui Song } 1425*207e5cccSFangrui Song 1426*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_lane_s32( 1427*207e5cccSFangrui Song // CHECK-NEXT: entry: 1428*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 1429*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1430*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1431*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1432*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 1433*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1434*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1435*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1436*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 1437*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 1438*207e5cccSFangrui Song // 1439*207e5cccSFangrui Song int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 1440*207e5cccSFangrui Song return vqdmlal_high_lane_s32(a, b, v, 1); 1441*207e5cccSFangrui Song } 1442*207e5cccSFangrui Song 1443*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_lane_s16( 1444*207e5cccSFangrui Song // CHECK-NEXT: entry: 1445*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1446*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1447*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1448*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 1449*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 1450*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1451*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 1452*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 1453*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 1454*207e5cccSFangrui Song // 1455*207e5cccSFangrui Song int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { 1456*207e5cccSFangrui Song return vqdmlsl_lane_s16(a, b, v, 3); 1457*207e5cccSFangrui Song } 1458*207e5cccSFangrui Song 1459*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_lane_s32( 1460*207e5cccSFangrui Song // CHECK-NEXT: entry: 1461*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1462*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1463*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1464*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 1465*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 1466*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1467*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 1468*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 1469*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 1470*207e5cccSFangrui Song // 1471*207e5cccSFangrui Song int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { 1472*207e5cccSFangrui Song return vqdmlsl_lane_s32(a, b, v, 1); 1473*207e5cccSFangrui Song } 1474*207e5cccSFangrui Song 1475*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_lane_s16( 1476*207e5cccSFangrui Song // CHECK-NEXT: entry: 1477*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1478*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1479*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1480*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1481*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 1482*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1483*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1484*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1485*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 1486*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 1487*207e5cccSFangrui Song // 1488*207e5cccSFangrui Song int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { 1489*207e5cccSFangrui Song return vqdmlsl_high_lane_s16(a, b, v, 3); 1490*207e5cccSFangrui Song } 1491*207e5cccSFangrui Song 1492*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_lane_s32( 1493*207e5cccSFangrui Song // CHECK-NEXT: entry: 1494*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 1495*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1496*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1497*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1498*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 1499*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1500*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1501*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1502*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 1503*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 1504*207e5cccSFangrui Song // 1505*207e5cccSFangrui Song int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { 1506*207e5cccSFangrui Song return vqdmlsl_high_lane_s32(a, b, v, 1); 1507*207e5cccSFangrui Song } 1508*207e5cccSFangrui Song 1509*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_lane_s16( 1510*207e5cccSFangrui Song // CHECK-NEXT: entry: 1511*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1512*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1513*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1514*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1515*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1516*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 1517*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1518*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 1519*207e5cccSFangrui Song // 1520*207e5cccSFangrui Song int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { 1521*207e5cccSFangrui Song return vqdmull_lane_s16(a, v, 3); 1522*207e5cccSFangrui Song } 1523*207e5cccSFangrui Song 1524*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_lane_s32( 1525*207e5cccSFangrui Song // CHECK-NEXT: entry: 1526*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1527*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1528*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1529*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1530*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1531*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 1532*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1533*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 1534*207e5cccSFangrui Song // 1535*207e5cccSFangrui Song int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { 1536*207e5cccSFangrui Song return vqdmull_lane_s32(a, v, 1); 1537*207e5cccSFangrui Song } 1538*207e5cccSFangrui Song 1539*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_laneq_s16( 1540*207e5cccSFangrui Song // CHECK-NEXT: entry: 1541*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1542*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1543*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1544*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1545*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1546*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 1547*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1548*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 1549*207e5cccSFangrui Song // 1550*207e5cccSFangrui Song int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { 1551*207e5cccSFangrui Song return vqdmull_laneq_s16(a, v, 3); 1552*207e5cccSFangrui Song } 1553*207e5cccSFangrui Song 1554*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_laneq_s32( 1555*207e5cccSFangrui Song // CHECK-NEXT: entry: 1556*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1557*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1558*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1559*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1560*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1561*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 1562*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1563*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 1564*207e5cccSFangrui Song // 1565*207e5cccSFangrui Song int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { 1566*207e5cccSFangrui Song return vqdmull_laneq_s32(a, v, 3); 1567*207e5cccSFangrui Song } 1568*207e5cccSFangrui Song 1569*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_lane_s16( 1570*207e5cccSFangrui Song // CHECK-NEXT: entry: 1571*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1572*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1573*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1574*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1575*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1576*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1577*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1578*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1579*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 1580*207e5cccSFangrui Song // 1581*207e5cccSFangrui Song int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { 1582*207e5cccSFangrui Song return vqdmull_high_lane_s16(a, v, 3); 1583*207e5cccSFangrui Song } 1584*207e5cccSFangrui Song 1585*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_lane_s32( 1586*207e5cccSFangrui Song // CHECK-NEXT: entry: 1587*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 1588*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1589*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1590*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 1591*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1592*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1593*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1594*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1595*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 1596*207e5cccSFangrui Song // 1597*207e5cccSFangrui Song int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { 1598*207e5cccSFangrui Song return vqdmull_high_lane_s32(a, v, 1); 1599*207e5cccSFangrui Song } 1600*207e5cccSFangrui Song 1601*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_laneq_s16( 1602*207e5cccSFangrui Song // CHECK-NEXT: entry: 1603*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1604*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1605*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1606*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 1607*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 1608*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 1609*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 1610*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 1611*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 1612*207e5cccSFangrui Song // 1613*207e5cccSFangrui Song int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { 1614*207e5cccSFangrui Song return vqdmull_high_laneq_s16(a, v, 7); 1615*207e5cccSFangrui Song } 1616*207e5cccSFangrui Song 1617*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_laneq_s32( 1618*207e5cccSFangrui Song // CHECK-NEXT: entry: 1619*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 1620*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 1621*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1622*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 1623*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 1624*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 1625*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 1626*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 1627*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 1628*207e5cccSFangrui Song // 1629*207e5cccSFangrui Song int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { 1630*207e5cccSFangrui Song return vqdmull_high_laneq_s32(a, v, 3); 1631*207e5cccSFangrui Song } 1632*207e5cccSFangrui Song 1633*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_lane_s16( 1634*207e5cccSFangrui Song // CHECK-NEXT: entry: 1635*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1636*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1637*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1638*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1639*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> [[VQDMULH_LANE_V]], <4 x i16> [[VQDMULH_LANE_V1]], i32 3) 1640*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQDMULH_LANE_V2]] 1641*207e5cccSFangrui Song // 1642*207e5cccSFangrui Song int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { 1643*207e5cccSFangrui Song return vqdmulh_lane_s16(a, v, 3); 1644*207e5cccSFangrui Song } 1645*207e5cccSFangrui Song 1646*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_lane_s16( 1647*207e5cccSFangrui Song // CHECK-NEXT: entry: 1648*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 1649*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1650*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1651*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1652*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16> [[VQDMULHQ_LANE_V]], <4 x i16> [[VQDMULHQ_LANE_V1]], i32 3) 1653*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQDMULHQ_LANE_V2]] 1654*207e5cccSFangrui Song // 1655*207e5cccSFangrui Song int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { 1656*207e5cccSFangrui Song return vqdmulhq_lane_s16(a, v, 3); 1657*207e5cccSFangrui Song } 1658*207e5cccSFangrui Song 1659*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_lane_s32( 1660*207e5cccSFangrui Song // CHECK-NEXT: entry: 1661*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1662*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1663*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1664*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1665*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32> [[VQDMULH_LANE_V]], <2 x i32> [[VQDMULH_LANE_V1]], i32 1) 1666*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQDMULH_LANE_V2]] 1667*207e5cccSFangrui Song // 1668*207e5cccSFangrui Song int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { 1669*207e5cccSFangrui Song return vqdmulh_lane_s32(a, v, 1); 1670*207e5cccSFangrui Song } 1671*207e5cccSFangrui Song 1672*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_lane_s32( 1673*207e5cccSFangrui Song // CHECK-NEXT: entry: 1674*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 1675*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1676*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1677*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1678*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32> [[VQDMULHQ_LANE_V]], <2 x i32> [[VQDMULHQ_LANE_V1]], i32 1) 1679*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULHQ_LANE_V2]] 1680*207e5cccSFangrui Song // 1681*207e5cccSFangrui Song int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { 1682*207e5cccSFangrui Song return vqdmulhq_lane_s32(a, v, 1); 1683*207e5cccSFangrui Song } 1684*207e5cccSFangrui Song 1685*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_lane_s16( 1686*207e5cccSFangrui Song // CHECK-NEXT: entry: 1687*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 1688*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1689*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1690*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1691*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16> [[VQRDMULH_LANE_V]], <4 x i16> [[VQRDMULH_LANE_V1]], i32 3) 1692*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMULH_LANE_V2]] 1693*207e5cccSFangrui Song // 1694*207e5cccSFangrui Song int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { 1695*207e5cccSFangrui Song return vqrdmulh_lane_s16(a, v, 3); 1696*207e5cccSFangrui Song } 1697*207e5cccSFangrui Song 1698*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_lane_s16( 1699*207e5cccSFangrui Song // CHECK-NEXT: entry: 1700*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 1701*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1702*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1703*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 1704*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16> [[VQRDMULHQ_LANE_V]], <4 x i16> [[VQRDMULHQ_LANE_V1]], i32 3) 1705*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMULHQ_LANE_V2]] 1706*207e5cccSFangrui Song // 1707*207e5cccSFangrui Song int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { 1708*207e5cccSFangrui Song return vqrdmulhq_lane_s16(a, v, 3); 1709*207e5cccSFangrui Song } 1710*207e5cccSFangrui Song 1711*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_lane_s32( 1712*207e5cccSFangrui Song // CHECK-NEXT: entry: 1713*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 1714*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1715*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1716*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1717*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32> [[VQRDMULH_LANE_V]], <2 x i32> [[VQRDMULH_LANE_V1]], i32 1) 1718*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMULH_LANE_V2]] 1719*207e5cccSFangrui Song // 1720*207e5cccSFangrui Song int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { 1721*207e5cccSFangrui Song return vqrdmulh_lane_s32(a, v, 1); 1722*207e5cccSFangrui Song } 1723*207e5cccSFangrui Song 1724*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_lane_s32( 1725*207e5cccSFangrui Song // CHECK-NEXT: entry: 1726*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 1727*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1728*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 1729*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 1730*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32> [[VQRDMULHQ_LANE_V]], <2 x i32> [[VQRDMULHQ_LANE_V1]], i32 1) 1731*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMULHQ_LANE_V2]] 1732*207e5cccSFangrui Song // 1733*207e5cccSFangrui Song int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t v) { 1734*207e5cccSFangrui Song return vqrdmulhq_lane_s32(a, v, 1); 1735*207e5cccSFangrui Song } 1736*207e5cccSFangrui Song 1737*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_f32( 1738*207e5cccSFangrui Song // CHECK-NEXT: entry: 1739*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 1740*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1741*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1> 1742*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]] 1743*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[MUL]] 1744*207e5cccSFangrui Song // 1745*207e5cccSFangrui Song float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) { 1746*207e5cccSFangrui Song return vmul_lane_f32(a, v, 1); 1747*207e5cccSFangrui Song } 1748*207e5cccSFangrui Song 1749*207e5cccSFangrui Song 1750*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_f64( 1751*207e5cccSFangrui Song // CHECK-NEXT: entry: 1752*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8> 1753*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8> 1754*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double 1755*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 1756*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 1757*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] 1758*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> 1759*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x double> [[TMP5]] 1760*207e5cccSFangrui Song // 1761*207e5cccSFangrui Song float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) { 1762*207e5cccSFangrui Song return vmul_lane_f64(a, v, 0); 1763*207e5cccSFangrui Song } 1764*207e5cccSFangrui Song 1765*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_f32( 1766*207e5cccSFangrui Song // CHECK-NEXT: entry: 1767*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 1768*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1769*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1770*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]] 1771*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[MUL]] 1772*207e5cccSFangrui Song // 1773*207e5cccSFangrui Song float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) { 1774*207e5cccSFangrui Song return vmulq_lane_f32(a, v, 1); 1775*207e5cccSFangrui Song } 1776*207e5cccSFangrui Song 1777*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_f64( 1778*207e5cccSFangrui Song // CHECK-NEXT: entry: 1779*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8> 1780*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1781*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer 1782*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[A:%.*]], [[LANE]] 1783*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[MUL]] 1784*207e5cccSFangrui Song // 1785*207e5cccSFangrui Song float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) { 1786*207e5cccSFangrui Song return vmulq_lane_f64(a, v, 0); 1787*207e5cccSFangrui Song } 1788*207e5cccSFangrui Song 1789*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_f32( 1790*207e5cccSFangrui Song // CHECK-NEXT: entry: 1791*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 1792*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1793*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3> 1794*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]] 1795*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[MUL]] 1796*207e5cccSFangrui Song // 1797*207e5cccSFangrui Song float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) { 1798*207e5cccSFangrui Song return vmul_laneq_f32(a, v, 3); 1799*207e5cccSFangrui Song } 1800*207e5cccSFangrui Song 1801*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_f64( 1802*207e5cccSFangrui Song // CHECK-NEXT: entry: 1803*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8> 1804*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 1805*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double 1806*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 1807*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 1808*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] 1809*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> 1810*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x double> [[TMP5]] 1811*207e5cccSFangrui Song // 1812*207e5cccSFangrui Song float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) { 1813*207e5cccSFangrui Song return vmul_laneq_f64(a, v, 1); 1814*207e5cccSFangrui Song } 1815*207e5cccSFangrui Song 1816*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_f32( 1817*207e5cccSFangrui Song // CHECK-NEXT: entry: 1818*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 1819*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1820*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1821*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]] 1822*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[MUL]] 1823*207e5cccSFangrui Song // 1824*207e5cccSFangrui Song float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) { 1825*207e5cccSFangrui Song return vmulq_laneq_f32(a, v, 3); 1826*207e5cccSFangrui Song } 1827*207e5cccSFangrui Song 1828*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_f64( 1829*207e5cccSFangrui Song // CHECK-NEXT: entry: 1830*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 1831*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1832*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 1> 1833*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[A:%.*]], [[LANE]] 1834*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[MUL]] 1835*207e5cccSFangrui Song // 1836*207e5cccSFangrui Song float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) { 1837*207e5cccSFangrui Song return vmulq_laneq_f64(a, v, 1); 1838*207e5cccSFangrui Song } 1839*207e5cccSFangrui Song 1840*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulx_lane_f32( 1841*207e5cccSFangrui Song // CHECK-NEXT: entry: 1842*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 1843*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1844*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1> 1845*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 1846*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8> 1847*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]]) 1848*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VMULX2_I]] 1849*207e5cccSFangrui Song // 1850*207e5cccSFangrui Song float32x2_t test_vmulx_lane_f32(float32x2_t a, float32x2_t v) { 1851*207e5cccSFangrui Song return vmulx_lane_f32(a, v, 1); 1852*207e5cccSFangrui Song } 1853*207e5cccSFangrui Song 1854*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_lane_f32( 1855*207e5cccSFangrui Song // CHECK-NEXT: entry: 1856*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 1857*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 1858*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1859*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 1860*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8> 1861*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]]) 1862*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VMULX2_I]] 1863*207e5cccSFangrui Song // 1864*207e5cccSFangrui Song float32x4_t test_vmulxq_lane_f32(float32x4_t a, float32x2_t v) { 1865*207e5cccSFangrui Song return vmulxq_lane_f32(a, v, 1); 1866*207e5cccSFangrui Song } 1867*207e5cccSFangrui Song 1868*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_lane_f64( 1869*207e5cccSFangrui Song // CHECK-NEXT: entry: 1870*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8> 1871*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 1872*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer 1873*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 1874*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8> 1875*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]]) 1876*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[VMULX2_I]] 1877*207e5cccSFangrui Song // 1878*207e5cccSFangrui Song float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { 1879*207e5cccSFangrui Song return vmulxq_lane_f64(a, v, 0); 1880*207e5cccSFangrui Song } 1881*207e5cccSFangrui Song 1882*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulx_laneq_f32( 1883*207e5cccSFangrui Song // CHECK-NEXT: entry: 1884*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 1885*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1886*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> <i32 3, i32 3> 1887*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 1888*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8> 1889*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]]) 1890*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VMULX2_I]] 1891*207e5cccSFangrui Song // 1892*207e5cccSFangrui Song float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { 1893*207e5cccSFangrui Song return vmulx_laneq_f32(a, v, 3); 1894*207e5cccSFangrui Song } 1895*207e5cccSFangrui Song 1896*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_laneq_f32( 1897*207e5cccSFangrui Song // CHECK-NEXT: entry: 1898*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 1899*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 1900*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1901*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 1902*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8> 1903*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]]) 1904*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VMULX2_I]] 1905*207e5cccSFangrui Song // 1906*207e5cccSFangrui Song float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { 1907*207e5cccSFangrui Song return vmulxq_laneq_f32(a, v, 3); 1908*207e5cccSFangrui Song } 1909*207e5cccSFangrui Song 1910*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_laneq_f64( 1911*207e5cccSFangrui Song // CHECK-NEXT: entry: 1912*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 1913*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 1914*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> <i32 1, i32 1> 1915*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 1916*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8> 1917*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]]) 1918*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[VMULX2_I]] 1919*207e5cccSFangrui Song // 1920*207e5cccSFangrui Song float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { 1921*207e5cccSFangrui Song return vmulxq_laneq_f64(a, v, 1); 1922*207e5cccSFangrui Song } 1923*207e5cccSFangrui Song 1924*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_s16_0( 1925*207e5cccSFangrui Song // CHECK-NEXT: entry: 1926*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1927*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1928*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 1929*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 1930*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 1931*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 1932*207e5cccSFangrui Song // 1933*207e5cccSFangrui Song int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { 1934*207e5cccSFangrui Song return vmla_lane_s16(a, b, v, 0); 1935*207e5cccSFangrui Song } 1936*207e5cccSFangrui Song 1937*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_s16_0( 1938*207e5cccSFangrui Song // CHECK-NEXT: entry: 1939*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 1940*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 1941*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> zeroinitializer 1942*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 1943*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 1944*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 1945*207e5cccSFangrui Song // 1946*207e5cccSFangrui Song int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { 1947*207e5cccSFangrui Song return vmlaq_lane_s16(a, b, v, 0); 1948*207e5cccSFangrui Song } 1949*207e5cccSFangrui Song 1950*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_s32_0( 1951*207e5cccSFangrui Song // CHECK-NEXT: entry: 1952*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1953*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1954*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 1955*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 1956*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 1957*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 1958*207e5cccSFangrui Song // 1959*207e5cccSFangrui Song int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { 1960*207e5cccSFangrui Song return vmla_lane_s32(a, b, v, 0); 1961*207e5cccSFangrui Song } 1962*207e5cccSFangrui Song 1963*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_s32_0( 1964*207e5cccSFangrui Song // CHECK-NEXT: entry: 1965*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 1966*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 1967*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> zeroinitializer 1968*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 1969*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 1970*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 1971*207e5cccSFangrui Song // 1972*207e5cccSFangrui Song int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { 1973*207e5cccSFangrui Song return vmlaq_lane_s32(a, b, v, 0); 1974*207e5cccSFangrui Song } 1975*207e5cccSFangrui Song 1976*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_s16_0( 1977*207e5cccSFangrui Song // CHECK-NEXT: entry: 1978*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1979*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1980*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 1981*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 1982*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 1983*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 1984*207e5cccSFangrui Song // 1985*207e5cccSFangrui Song int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { 1986*207e5cccSFangrui Song return vmla_laneq_s16(a, b, v, 0); 1987*207e5cccSFangrui Song } 1988*207e5cccSFangrui Song 1989*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_s16_0( 1990*207e5cccSFangrui Song // CHECK-NEXT: entry: 1991*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 1992*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 1993*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer 1994*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 1995*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 1996*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 1997*207e5cccSFangrui Song // 1998*207e5cccSFangrui Song int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { 1999*207e5cccSFangrui Song return vmlaq_laneq_s16(a, b, v, 0); 2000*207e5cccSFangrui Song } 2001*207e5cccSFangrui Song 2002*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_s32_0( 2003*207e5cccSFangrui Song // CHECK-NEXT: entry: 2004*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2005*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2006*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2007*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 2008*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 2009*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 2010*207e5cccSFangrui Song // 2011*207e5cccSFangrui Song int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { 2012*207e5cccSFangrui Song return vmla_laneq_s32(a, b, v, 0); 2013*207e5cccSFangrui Song } 2014*207e5cccSFangrui Song 2015*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_s32_0( 2016*207e5cccSFangrui Song // CHECK-NEXT: entry: 2017*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2018*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2019*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer 2020*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 2021*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 2022*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2023*207e5cccSFangrui Song // 2024*207e5cccSFangrui Song int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { 2025*207e5cccSFangrui Song return vmlaq_laneq_s32(a, b, v, 0); 2026*207e5cccSFangrui Song } 2027*207e5cccSFangrui Song 2028*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_s16_0( 2029*207e5cccSFangrui Song // CHECK-NEXT: entry: 2030*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2031*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2032*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2033*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 2034*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 2035*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 2036*207e5cccSFangrui Song // 2037*207e5cccSFangrui Song int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { 2038*207e5cccSFangrui Song return vmls_lane_s16(a, b, v, 0); 2039*207e5cccSFangrui Song } 2040*207e5cccSFangrui Song 2041*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_s16_0( 2042*207e5cccSFangrui Song // CHECK-NEXT: entry: 2043*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2044*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2045*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> zeroinitializer 2046*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 2047*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 2048*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 2049*207e5cccSFangrui Song // 2050*207e5cccSFangrui Song int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { 2051*207e5cccSFangrui Song return vmlsq_lane_s16(a, b, v, 0); 2052*207e5cccSFangrui Song } 2053*207e5cccSFangrui Song 2054*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_s32_0( 2055*207e5cccSFangrui Song // CHECK-NEXT: entry: 2056*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2057*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2058*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2059*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 2060*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 2061*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 2062*207e5cccSFangrui Song // 2063*207e5cccSFangrui Song int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { 2064*207e5cccSFangrui Song return vmls_lane_s32(a, b, v, 0); 2065*207e5cccSFangrui Song } 2066*207e5cccSFangrui Song 2067*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_s32_0( 2068*207e5cccSFangrui Song // CHECK-NEXT: entry: 2069*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2070*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2071*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> zeroinitializer 2072*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 2073*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 2074*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2075*207e5cccSFangrui Song // 2076*207e5cccSFangrui Song int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { 2077*207e5cccSFangrui Song return vmlsq_lane_s32(a, b, v, 0); 2078*207e5cccSFangrui Song } 2079*207e5cccSFangrui Song 2080*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_s16_0( 2081*207e5cccSFangrui Song // CHECK-NEXT: entry: 2082*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2083*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2084*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2085*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 2086*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 2087*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 2088*207e5cccSFangrui Song // 2089*207e5cccSFangrui Song int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { 2090*207e5cccSFangrui Song return vmls_laneq_s16(a, b, v, 0); 2091*207e5cccSFangrui Song } 2092*207e5cccSFangrui Song 2093*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_s16_0( 2094*207e5cccSFangrui Song // CHECK-NEXT: entry: 2095*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2096*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2097*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer 2098*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 2099*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 2100*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 2101*207e5cccSFangrui Song // 2102*207e5cccSFangrui Song int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { 2103*207e5cccSFangrui Song return vmlsq_laneq_s16(a, b, v, 0); 2104*207e5cccSFangrui Song } 2105*207e5cccSFangrui Song 2106*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_s32_0( 2107*207e5cccSFangrui Song // CHECK-NEXT: entry: 2108*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2109*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2110*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2111*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 2112*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 2113*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 2114*207e5cccSFangrui Song // 2115*207e5cccSFangrui Song int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { 2116*207e5cccSFangrui Song return vmls_laneq_s32(a, b, v, 0); 2117*207e5cccSFangrui Song } 2118*207e5cccSFangrui Song 2119*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_s32_0( 2120*207e5cccSFangrui Song // CHECK-NEXT: entry: 2121*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2122*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2123*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer 2124*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 2125*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 2126*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2127*207e5cccSFangrui Song // 2128*207e5cccSFangrui Song int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { 2129*207e5cccSFangrui Song return vmlsq_laneq_s32(a, b, v, 0); 2130*207e5cccSFangrui Song } 2131*207e5cccSFangrui Song 2132*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_s16_0( 2133*207e5cccSFangrui Song // CHECK-NEXT: entry: 2134*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2135*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2136*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2137*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 2138*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 2139*207e5cccSFangrui Song // 2140*207e5cccSFangrui Song int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) { 2141*207e5cccSFangrui Song return vmul_lane_s16(a, v, 0); 2142*207e5cccSFangrui Song } 2143*207e5cccSFangrui Song 2144*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_s16_0( 2145*207e5cccSFangrui Song // CHECK-NEXT: entry: 2146*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2147*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2148*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> zeroinitializer 2149*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 2150*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 2151*207e5cccSFangrui Song // 2152*207e5cccSFangrui Song int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) { 2153*207e5cccSFangrui Song return vmulq_lane_s16(a, v, 0); 2154*207e5cccSFangrui Song } 2155*207e5cccSFangrui Song 2156*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_s32_0( 2157*207e5cccSFangrui Song // CHECK-NEXT: entry: 2158*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2159*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2160*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2161*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 2162*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 2163*207e5cccSFangrui Song // 2164*207e5cccSFangrui Song int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) { 2165*207e5cccSFangrui Song return vmul_lane_s32(a, v, 0); 2166*207e5cccSFangrui Song } 2167*207e5cccSFangrui Song 2168*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_s32_0( 2169*207e5cccSFangrui Song // CHECK-NEXT: entry: 2170*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2171*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2172*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> zeroinitializer 2173*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 2174*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 2175*207e5cccSFangrui Song // 2176*207e5cccSFangrui Song int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) { 2177*207e5cccSFangrui Song return vmulq_lane_s32(a, v, 0); 2178*207e5cccSFangrui Song } 2179*207e5cccSFangrui Song 2180*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_u16_0( 2181*207e5cccSFangrui Song // CHECK-NEXT: entry: 2182*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2183*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2184*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2185*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 2186*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 2187*207e5cccSFangrui Song // 2188*207e5cccSFangrui Song uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) { 2189*207e5cccSFangrui Song return vmul_lane_u16(a, v, 0); 2190*207e5cccSFangrui Song } 2191*207e5cccSFangrui Song 2192*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_u16_0( 2193*207e5cccSFangrui Song // CHECK-NEXT: entry: 2194*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2195*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2196*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> zeroinitializer 2197*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 2198*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 2199*207e5cccSFangrui Song // 2200*207e5cccSFangrui Song uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) { 2201*207e5cccSFangrui Song return vmulq_lane_u16(a, v, 0); 2202*207e5cccSFangrui Song } 2203*207e5cccSFangrui Song 2204*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_u32_0( 2205*207e5cccSFangrui Song // CHECK-NEXT: entry: 2206*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2207*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2208*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2209*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 2210*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 2211*207e5cccSFangrui Song // 2212*207e5cccSFangrui Song uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) { 2213*207e5cccSFangrui Song return vmul_lane_u32(a, v, 0); 2214*207e5cccSFangrui Song } 2215*207e5cccSFangrui Song 2216*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_u32_0( 2217*207e5cccSFangrui Song // CHECK-NEXT: entry: 2218*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2219*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2220*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> zeroinitializer 2221*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 2222*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 2223*207e5cccSFangrui Song // 2224*207e5cccSFangrui Song uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) { 2225*207e5cccSFangrui Song return vmulq_lane_u32(a, v, 0); 2226*207e5cccSFangrui Song } 2227*207e5cccSFangrui Song 2228*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_s16_0( 2229*207e5cccSFangrui Song // CHECK-NEXT: entry: 2230*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2231*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2232*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2233*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 2234*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 2235*207e5cccSFangrui Song // 2236*207e5cccSFangrui Song int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) { 2237*207e5cccSFangrui Song return vmul_laneq_s16(a, v, 0); 2238*207e5cccSFangrui Song } 2239*207e5cccSFangrui Song 2240*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_s16_0( 2241*207e5cccSFangrui Song // CHECK-NEXT: entry: 2242*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2243*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2244*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer 2245*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 2246*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 2247*207e5cccSFangrui Song // 2248*207e5cccSFangrui Song int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) { 2249*207e5cccSFangrui Song return vmulq_laneq_s16(a, v, 0); 2250*207e5cccSFangrui Song } 2251*207e5cccSFangrui Song 2252*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_s32_0( 2253*207e5cccSFangrui Song // CHECK-NEXT: entry: 2254*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2255*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2256*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2257*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 2258*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 2259*207e5cccSFangrui Song // 2260*207e5cccSFangrui Song int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) { 2261*207e5cccSFangrui Song return vmul_laneq_s32(a, v, 0); 2262*207e5cccSFangrui Song } 2263*207e5cccSFangrui Song 2264*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_s32_0( 2265*207e5cccSFangrui Song // CHECK-NEXT: entry: 2266*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2267*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2268*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer 2269*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 2270*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 2271*207e5cccSFangrui Song // 2272*207e5cccSFangrui Song int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) { 2273*207e5cccSFangrui Song return vmulq_laneq_s32(a, v, 0); 2274*207e5cccSFangrui Song } 2275*207e5cccSFangrui Song 2276*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_u16_0( 2277*207e5cccSFangrui Song // CHECK-NEXT: entry: 2278*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2279*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2280*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2281*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]] 2282*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL]] 2283*207e5cccSFangrui Song // 2284*207e5cccSFangrui Song uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) { 2285*207e5cccSFangrui Song return vmul_laneq_u16(a, v, 0); 2286*207e5cccSFangrui Song } 2287*207e5cccSFangrui Song 2288*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_u16_0( 2289*207e5cccSFangrui Song // CHECK-NEXT: entry: 2290*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2291*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2292*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer 2293*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]] 2294*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL]] 2295*207e5cccSFangrui Song // 2296*207e5cccSFangrui Song uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) { 2297*207e5cccSFangrui Song return vmulq_laneq_u16(a, v, 0); 2298*207e5cccSFangrui Song } 2299*207e5cccSFangrui Song 2300*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_u32_0( 2301*207e5cccSFangrui Song // CHECK-NEXT: entry: 2302*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2303*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2304*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2305*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]] 2306*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL]] 2307*207e5cccSFangrui Song // 2308*207e5cccSFangrui Song uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) { 2309*207e5cccSFangrui Song return vmul_laneq_u32(a, v, 0); 2310*207e5cccSFangrui Song } 2311*207e5cccSFangrui Song 2312*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_u32_0( 2313*207e5cccSFangrui Song // CHECK-NEXT: entry: 2314*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2315*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2316*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer 2317*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]] 2318*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL]] 2319*207e5cccSFangrui Song // 2320*207e5cccSFangrui Song uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) { 2321*207e5cccSFangrui Song return vmulq_laneq_u32(a, v, 0); 2322*207e5cccSFangrui Song } 2323*207e5cccSFangrui Song 2324*207e5cccSFangrui Song // CHECK-LABEL: @test_vfma_lane_f32_0( 2325*207e5cccSFangrui Song // CHECK-NEXT: entry: 2326*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 2327*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8> 2328*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 2329*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 2330*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 2331*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 2332*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2333*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 2334*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[FMLA2]] 2335*207e5cccSFangrui Song // 2336*207e5cccSFangrui Song float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 2337*207e5cccSFangrui Song return vfma_lane_f32(a, b, v, 0); 2338*207e5cccSFangrui Song } 2339*207e5cccSFangrui Song 2340*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_lane_f32_0( 2341*207e5cccSFangrui Song // CHECK-NEXT: entry: 2342*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 2343*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8> 2344*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 2345*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 2346*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer 2347*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 2348*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2349*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 2350*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[FMLA2]] 2351*207e5cccSFangrui Song // 2352*207e5cccSFangrui Song float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 2353*207e5cccSFangrui Song return vfmaq_lane_f32(a, b, v, 0); 2354*207e5cccSFangrui Song } 2355*207e5cccSFangrui Song 2356*207e5cccSFangrui Song // CHECK-LABEL: @test_vfma_laneq_f32_0( 2357*207e5cccSFangrui Song // CHECK-NEXT: entry: 2358*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 2359*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8> 2360*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 2361*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2362*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 2363*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 2364*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer 2365*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 2366*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[TMP6]] 2367*207e5cccSFangrui Song // 2368*207e5cccSFangrui Song float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 2369*207e5cccSFangrui Song return vfma_laneq_f32(a, b, v, 0); 2370*207e5cccSFangrui Song } 2371*207e5cccSFangrui Song 2372*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_laneq_f32_0( 2373*207e5cccSFangrui Song // CHECK-NEXT: entry: 2374*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 2375*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8> 2376*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 2377*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2378*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 2379*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 2380*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer 2381*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 2382*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[TMP6]] 2383*207e5cccSFangrui Song // 2384*207e5cccSFangrui Song float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 2385*207e5cccSFangrui Song return vfmaq_laneq_f32(a, b, v, 0); 2386*207e5cccSFangrui Song } 2387*207e5cccSFangrui Song 2388*207e5cccSFangrui Song // CHECK-LABEL: @test_vfms_lane_f32_0( 2389*207e5cccSFangrui Song // CHECK-NEXT: entry: 2390*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]] 2391*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 2392*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8> 2393*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 2394*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 2395*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 2396*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 2397*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2398*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) 2399*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[FMLA2]] 2400*207e5cccSFangrui Song // 2401*207e5cccSFangrui Song float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { 2402*207e5cccSFangrui Song return vfms_lane_f32(a, b, v, 0); 2403*207e5cccSFangrui Song } 2404*207e5cccSFangrui Song 2405*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_lane_f32_0( 2406*207e5cccSFangrui Song // CHECK-NEXT: entry: 2407*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]] 2408*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 2409*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8> 2410*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 2411*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> 2412*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer 2413*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 2414*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2415*207e5cccSFangrui Song // CHECK-NEXT: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) 2416*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[FMLA2]] 2417*207e5cccSFangrui Song // 2418*207e5cccSFangrui Song float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { 2419*207e5cccSFangrui Song return vfmsq_lane_f32(a, b, v, 0); 2420*207e5cccSFangrui Song } 2421*207e5cccSFangrui Song 2422*207e5cccSFangrui Song // CHECK-LABEL: @test_vfms_laneq_f32_0( 2423*207e5cccSFangrui Song // CHECK-NEXT: entry: 2424*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x float> [[B:%.*]] 2425*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 2426*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG]] to <8 x i8> 2427*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 2428*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 2429*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 2430*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 2431*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <2 x i32> zeroinitializer 2432*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LANE]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 2433*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[TMP6]] 2434*207e5cccSFangrui Song // 2435*207e5cccSFangrui Song float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { 2436*207e5cccSFangrui Song return vfms_laneq_f32(a, b, v, 0); 2437*207e5cccSFangrui Song } 2438*207e5cccSFangrui Song 2439*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_laneq_f32_0( 2440*207e5cccSFangrui Song // CHECK-NEXT: entry: 2441*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x float> [[B:%.*]] 2442*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 2443*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG]] to <16 x i8> 2444*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 2445*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 2446*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 2447*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> 2448*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP5]], <4 x i32> zeroinitializer 2449*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LANE]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 2450*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[TMP6]] 2451*207e5cccSFangrui Song // 2452*207e5cccSFangrui Song float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { 2453*207e5cccSFangrui Song return vfmsq_laneq_f32(a, b, v, 0); 2454*207e5cccSFangrui Song } 2455*207e5cccSFangrui Song 2456*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_laneq_f64_0( 2457*207e5cccSFangrui Song // CHECK-NEXT: entry: 2458*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 2459*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 2460*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 2461*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 2462*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 2463*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 2464*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer 2465*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 2466*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[TMP6]] 2467*207e5cccSFangrui Song // 2468*207e5cccSFangrui Song float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { 2469*207e5cccSFangrui Song return vfmaq_laneq_f64(a, b, v, 0); 2470*207e5cccSFangrui Song } 2471*207e5cccSFangrui Song 2472*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_laneq_f64_0( 2473*207e5cccSFangrui Song // CHECK-NEXT: entry: 2474*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[B:%.*]] 2475*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 2476*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[FNEG]] to <16 x i8> 2477*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 2478*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 2479*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 2480*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> 2481*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP5]], <2 x i32> zeroinitializer 2482*207e5cccSFangrui Song // CHECK-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LANE]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 2483*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[TMP6]] 2484*207e5cccSFangrui Song // 2485*207e5cccSFangrui Song float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { 2486*207e5cccSFangrui Song return vfmsq_laneq_f64(a, b, v, 0); 2487*207e5cccSFangrui Song } 2488*207e5cccSFangrui Song 2489*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_s16_0( 2490*207e5cccSFangrui Song // CHECK-NEXT: entry: 2491*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2492*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2493*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2494*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2495*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2496*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2497*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2498*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2499*207e5cccSFangrui Song // 2500*207e5cccSFangrui Song int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2501*207e5cccSFangrui Song return vmlal_lane_s16(a, b, v, 0); 2502*207e5cccSFangrui Song } 2503*207e5cccSFangrui Song 2504*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_s32_0( 2505*207e5cccSFangrui Song // CHECK-NEXT: entry: 2506*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2507*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2508*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2509*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2510*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2511*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2512*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2513*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2514*207e5cccSFangrui Song // 2515*207e5cccSFangrui Song int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2516*207e5cccSFangrui Song return vmlal_lane_s32(a, b, v, 0); 2517*207e5cccSFangrui Song } 2518*207e5cccSFangrui Song 2519*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_s16_0( 2520*207e5cccSFangrui Song // CHECK-NEXT: entry: 2521*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2522*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2523*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2524*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2525*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2526*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2527*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2528*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2529*207e5cccSFangrui Song // 2530*207e5cccSFangrui Song int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2531*207e5cccSFangrui Song return vmlal_laneq_s16(a, b, v, 0); 2532*207e5cccSFangrui Song } 2533*207e5cccSFangrui Song 2534*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_s32_0( 2535*207e5cccSFangrui Song // CHECK-NEXT: entry: 2536*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2537*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2538*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2539*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2540*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2541*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2542*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2543*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2544*207e5cccSFangrui Song // 2545*207e5cccSFangrui Song int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2546*207e5cccSFangrui Song return vmlal_laneq_s32(a, b, v, 0); 2547*207e5cccSFangrui Song } 2548*207e5cccSFangrui Song 2549*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_s16_0( 2550*207e5cccSFangrui Song // CHECK-NEXT: entry: 2551*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2552*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2553*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2554*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2555*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2556*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2557*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2558*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2559*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2560*207e5cccSFangrui Song // 2561*207e5cccSFangrui Song int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2562*207e5cccSFangrui Song return vmlal_high_lane_s16(a, b, v, 0); 2563*207e5cccSFangrui Song } 2564*207e5cccSFangrui Song 2565*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_s32_0( 2566*207e5cccSFangrui Song // CHECK-NEXT: entry: 2567*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2568*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2569*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2570*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2571*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2572*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2573*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2574*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2575*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2576*207e5cccSFangrui Song // 2577*207e5cccSFangrui Song int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2578*207e5cccSFangrui Song return vmlal_high_lane_s32(a, b, v, 0); 2579*207e5cccSFangrui Song } 2580*207e5cccSFangrui Song 2581*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_s16_0( 2582*207e5cccSFangrui Song // CHECK-NEXT: entry: 2583*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2584*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2585*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2586*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2587*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2588*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2589*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2590*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2591*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2592*207e5cccSFangrui Song // 2593*207e5cccSFangrui Song int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2594*207e5cccSFangrui Song return vmlal_high_laneq_s16(a, b, v, 0); 2595*207e5cccSFangrui Song } 2596*207e5cccSFangrui Song 2597*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_s32_0( 2598*207e5cccSFangrui Song // CHECK-NEXT: entry: 2599*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2600*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2601*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2602*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2603*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2604*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2605*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2606*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2607*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2608*207e5cccSFangrui Song // 2609*207e5cccSFangrui Song int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2610*207e5cccSFangrui Song return vmlal_high_laneq_s32(a, b, v, 0); 2611*207e5cccSFangrui Song } 2612*207e5cccSFangrui Song 2613*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_s16_0( 2614*207e5cccSFangrui Song // CHECK-NEXT: entry: 2615*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2616*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2617*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2618*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2619*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2620*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2621*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2622*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2623*207e5cccSFangrui Song // 2624*207e5cccSFangrui Song int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2625*207e5cccSFangrui Song return vmlsl_lane_s16(a, b, v, 0); 2626*207e5cccSFangrui Song } 2627*207e5cccSFangrui Song 2628*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_s32_0( 2629*207e5cccSFangrui Song // CHECK-NEXT: entry: 2630*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2631*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2632*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2633*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2634*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2635*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2636*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2637*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2638*207e5cccSFangrui Song // 2639*207e5cccSFangrui Song int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2640*207e5cccSFangrui Song return vmlsl_lane_s32(a, b, v, 0); 2641*207e5cccSFangrui Song } 2642*207e5cccSFangrui Song 2643*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_s16_0( 2644*207e5cccSFangrui Song // CHECK-NEXT: entry: 2645*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2646*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2647*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2648*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2649*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2650*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2651*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2652*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2653*207e5cccSFangrui Song // 2654*207e5cccSFangrui Song int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2655*207e5cccSFangrui Song return vmlsl_laneq_s16(a, b, v, 0); 2656*207e5cccSFangrui Song } 2657*207e5cccSFangrui Song 2658*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_s32_0( 2659*207e5cccSFangrui Song // CHECK-NEXT: entry: 2660*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2661*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2662*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2663*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2664*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2665*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2666*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2667*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2668*207e5cccSFangrui Song // 2669*207e5cccSFangrui Song int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2670*207e5cccSFangrui Song return vmlsl_laneq_s32(a, b, v, 0); 2671*207e5cccSFangrui Song } 2672*207e5cccSFangrui Song 2673*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_s16_0( 2674*207e5cccSFangrui Song // CHECK-NEXT: entry: 2675*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2676*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2677*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2678*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2679*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2680*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2681*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2682*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2683*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2684*207e5cccSFangrui Song // 2685*207e5cccSFangrui Song int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2686*207e5cccSFangrui Song return vmlsl_high_lane_s16(a, b, v, 0); 2687*207e5cccSFangrui Song } 2688*207e5cccSFangrui Song 2689*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_s32_0( 2690*207e5cccSFangrui Song // CHECK-NEXT: entry: 2691*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2692*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2693*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2694*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2695*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2696*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2697*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2698*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2699*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2700*207e5cccSFangrui Song // 2701*207e5cccSFangrui Song int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2702*207e5cccSFangrui Song return vmlsl_high_lane_s32(a, b, v, 0); 2703*207e5cccSFangrui Song } 2704*207e5cccSFangrui Song 2705*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_s16_0( 2706*207e5cccSFangrui Song // CHECK-NEXT: entry: 2707*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2708*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2709*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2710*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2711*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2712*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2713*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2714*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2715*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2716*207e5cccSFangrui Song // 2717*207e5cccSFangrui Song int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2718*207e5cccSFangrui Song return vmlsl_high_laneq_s16(a, b, v, 0); 2719*207e5cccSFangrui Song } 2720*207e5cccSFangrui Song 2721*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_s32_0( 2722*207e5cccSFangrui Song // CHECK-NEXT: entry: 2723*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2724*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2725*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2726*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2727*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2728*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2729*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2730*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2731*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2732*207e5cccSFangrui Song // 2733*207e5cccSFangrui Song int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2734*207e5cccSFangrui Song return vmlsl_high_laneq_s32(a, b, v, 0); 2735*207e5cccSFangrui Song } 2736*207e5cccSFangrui Song 2737*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_u16_0( 2738*207e5cccSFangrui Song // CHECK-NEXT: entry: 2739*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2740*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2741*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2742*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2743*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2744*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2745*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2746*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2747*207e5cccSFangrui Song // 2748*207e5cccSFangrui Song int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2749*207e5cccSFangrui Song return vmlal_lane_u16(a, b, v, 0); 2750*207e5cccSFangrui Song } 2751*207e5cccSFangrui Song 2752*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_lane_u32_0( 2753*207e5cccSFangrui Song // CHECK-NEXT: entry: 2754*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2755*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2756*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2757*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2758*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2759*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2760*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2761*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2762*207e5cccSFangrui Song // 2763*207e5cccSFangrui Song int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2764*207e5cccSFangrui Song return vmlal_lane_u32(a, b, v, 0); 2765*207e5cccSFangrui Song } 2766*207e5cccSFangrui Song 2767*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_u16_0( 2768*207e5cccSFangrui Song // CHECK-NEXT: entry: 2769*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2770*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2771*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2772*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2773*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2774*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2775*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2776*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2777*207e5cccSFangrui Song // 2778*207e5cccSFangrui Song int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2779*207e5cccSFangrui Song return vmlal_laneq_u16(a, b, v, 0); 2780*207e5cccSFangrui Song } 2781*207e5cccSFangrui Song 2782*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_laneq_u32_0( 2783*207e5cccSFangrui Song // CHECK-NEXT: entry: 2784*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2785*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2786*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2787*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2788*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2789*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2790*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2791*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2792*207e5cccSFangrui Song // 2793*207e5cccSFangrui Song int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2794*207e5cccSFangrui Song return vmlal_laneq_u32(a, b, v, 0); 2795*207e5cccSFangrui Song } 2796*207e5cccSFangrui Song 2797*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_u16_0( 2798*207e5cccSFangrui Song // CHECK-NEXT: entry: 2799*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2800*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2801*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2802*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2803*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2804*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2805*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2806*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2807*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2808*207e5cccSFangrui Song // 2809*207e5cccSFangrui Song int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2810*207e5cccSFangrui Song return vmlal_high_lane_u16(a, b, v, 0); 2811*207e5cccSFangrui Song } 2812*207e5cccSFangrui Song 2813*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_lane_u32_0( 2814*207e5cccSFangrui Song // CHECK-NEXT: entry: 2815*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2816*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2817*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2818*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2819*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2820*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2821*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2822*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2823*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2824*207e5cccSFangrui Song // 2825*207e5cccSFangrui Song int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2826*207e5cccSFangrui Song return vmlal_high_lane_u32(a, b, v, 0); 2827*207e5cccSFangrui Song } 2828*207e5cccSFangrui Song 2829*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_u16_0( 2830*207e5cccSFangrui Song // CHECK-NEXT: entry: 2831*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2832*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2833*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2834*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2835*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2836*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2837*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2838*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]] 2839*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 2840*207e5cccSFangrui Song // 2841*207e5cccSFangrui Song int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2842*207e5cccSFangrui Song return vmlal_high_laneq_u16(a, b, v, 0); 2843*207e5cccSFangrui Song } 2844*207e5cccSFangrui Song 2845*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_laneq_u32_0( 2846*207e5cccSFangrui Song // CHECK-NEXT: entry: 2847*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2848*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2849*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2850*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2851*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2852*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2853*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2854*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]] 2855*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD]] 2856*207e5cccSFangrui Song // 2857*207e5cccSFangrui Song int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2858*207e5cccSFangrui Song return vmlal_high_laneq_u32(a, b, v, 0); 2859*207e5cccSFangrui Song } 2860*207e5cccSFangrui Song 2861*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_u16_0( 2862*207e5cccSFangrui Song // CHECK-NEXT: entry: 2863*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2864*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2865*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2866*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2867*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2868*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2869*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2870*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2871*207e5cccSFangrui Song // 2872*207e5cccSFangrui Song int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 2873*207e5cccSFangrui Song return vmlsl_lane_u16(a, b, v, 0); 2874*207e5cccSFangrui Song } 2875*207e5cccSFangrui Song 2876*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_lane_u32_0( 2877*207e5cccSFangrui Song // CHECK-NEXT: entry: 2878*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2879*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2880*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2881*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2882*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2883*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2884*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2885*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2886*207e5cccSFangrui Song // 2887*207e5cccSFangrui Song int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 2888*207e5cccSFangrui Song return vmlsl_lane_u32(a, b, v, 0); 2889*207e5cccSFangrui Song } 2890*207e5cccSFangrui Song 2891*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_u16_0( 2892*207e5cccSFangrui Song // CHECK-NEXT: entry: 2893*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2894*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2895*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2896*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 2897*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2898*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 2899*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2900*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2901*207e5cccSFangrui Song // 2902*207e5cccSFangrui Song int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 2903*207e5cccSFangrui Song return vmlsl_laneq_u16(a, b, v, 0); 2904*207e5cccSFangrui Song } 2905*207e5cccSFangrui Song 2906*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_laneq_u32_0( 2907*207e5cccSFangrui Song // CHECK-NEXT: entry: 2908*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2909*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2910*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2911*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 2912*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2913*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 2914*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2915*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2916*207e5cccSFangrui Song // 2917*207e5cccSFangrui Song int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 2918*207e5cccSFangrui Song return vmlsl_laneq_u32(a, b, v, 0); 2919*207e5cccSFangrui Song } 2920*207e5cccSFangrui Song 2921*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_u16_0( 2922*207e5cccSFangrui Song // CHECK-NEXT: entry: 2923*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2924*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2925*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2926*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2927*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2928*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2929*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2930*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2931*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2932*207e5cccSFangrui Song // 2933*207e5cccSFangrui Song int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 2934*207e5cccSFangrui Song return vmlsl_high_lane_u16(a, b, v, 0); 2935*207e5cccSFangrui Song } 2936*207e5cccSFangrui Song 2937*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_lane_u32_0( 2938*207e5cccSFangrui Song // CHECK-NEXT: entry: 2939*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2940*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 2941*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 2942*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 2943*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2944*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2945*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2946*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2947*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2948*207e5cccSFangrui Song // 2949*207e5cccSFangrui Song int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 2950*207e5cccSFangrui Song return vmlsl_high_lane_u32(a, b, v, 0); 2951*207e5cccSFangrui Song } 2952*207e5cccSFangrui Song 2953*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_u16_0( 2954*207e5cccSFangrui Song // CHECK-NEXT: entry: 2955*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 2956*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 2957*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 2958*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 2959*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 2960*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2961*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 2962*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]] 2963*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 2964*207e5cccSFangrui Song // 2965*207e5cccSFangrui Song int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 2966*207e5cccSFangrui Song return vmlsl_high_laneq_u16(a, b, v, 0); 2967*207e5cccSFangrui Song } 2968*207e5cccSFangrui Song 2969*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_laneq_u32_0( 2970*207e5cccSFangrui Song // CHECK-NEXT: entry: 2971*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 2972*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 2973*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 2974*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 2975*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 2976*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 2977*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 2978*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]] 2979*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB]] 2980*207e5cccSFangrui Song // 2981*207e5cccSFangrui Song int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 2982*207e5cccSFangrui Song return vmlsl_high_laneq_u32(a, b, v, 0); 2983*207e5cccSFangrui Song } 2984*207e5cccSFangrui Song 2985*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_s16_0( 2986*207e5cccSFangrui Song // CHECK-NEXT: entry: 2987*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 2988*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 2989*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 2990*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 2991*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 2992*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 2993*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 2994*207e5cccSFangrui Song // 2995*207e5cccSFangrui Song int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { 2996*207e5cccSFangrui Song return vmull_lane_s16(a, v, 0); 2997*207e5cccSFangrui Song } 2998*207e5cccSFangrui Song 2999*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_s32_0( 3000*207e5cccSFangrui Song // CHECK-NEXT: entry: 3001*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3002*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3003*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3004*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3005*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3006*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 3007*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3008*207e5cccSFangrui Song // 3009*207e5cccSFangrui Song int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { 3010*207e5cccSFangrui Song return vmull_lane_s32(a, v, 0); 3011*207e5cccSFangrui Song } 3012*207e5cccSFangrui Song 3013*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_u16_0( 3014*207e5cccSFangrui Song // CHECK-NEXT: entry: 3015*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3016*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3017*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3018*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3019*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3020*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 3021*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3022*207e5cccSFangrui Song // 3023*207e5cccSFangrui Song uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { 3024*207e5cccSFangrui Song return vmull_lane_u16(a, v, 0); 3025*207e5cccSFangrui Song } 3026*207e5cccSFangrui Song 3027*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_lane_u32_0( 3028*207e5cccSFangrui Song // CHECK-NEXT: entry: 3029*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3030*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3031*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3032*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3033*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3034*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 3035*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3036*207e5cccSFangrui Song // 3037*207e5cccSFangrui Song uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { 3038*207e5cccSFangrui Song return vmull_lane_u32(a, v, 0); 3039*207e5cccSFangrui Song } 3040*207e5cccSFangrui Song 3041*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_s16_0( 3042*207e5cccSFangrui Song // CHECK-NEXT: entry: 3043*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3044*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3045*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3046*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3047*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3048*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3049*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3050*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3051*207e5cccSFangrui Song // 3052*207e5cccSFangrui Song int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { 3053*207e5cccSFangrui Song return vmull_high_lane_s16(a, v, 0); 3054*207e5cccSFangrui Song } 3055*207e5cccSFangrui Song 3056*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_s32_0( 3057*207e5cccSFangrui Song // CHECK-NEXT: entry: 3058*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3059*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3060*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3061*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3062*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3063*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3064*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3065*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3066*207e5cccSFangrui Song // 3067*207e5cccSFangrui Song int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { 3068*207e5cccSFangrui Song return vmull_high_lane_s32(a, v, 0); 3069*207e5cccSFangrui Song } 3070*207e5cccSFangrui Song 3071*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_u16_0( 3072*207e5cccSFangrui Song // CHECK-NEXT: entry: 3073*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3074*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3075*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3076*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3077*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3078*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3079*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3080*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3081*207e5cccSFangrui Song // 3082*207e5cccSFangrui Song uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { 3083*207e5cccSFangrui Song return vmull_high_lane_u16(a, v, 0); 3084*207e5cccSFangrui Song } 3085*207e5cccSFangrui Song 3086*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_lane_u32_0( 3087*207e5cccSFangrui Song // CHECK-NEXT: entry: 3088*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3089*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3090*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3091*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3092*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3093*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3094*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3095*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3096*207e5cccSFangrui Song // 3097*207e5cccSFangrui Song uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { 3098*207e5cccSFangrui Song return vmull_high_lane_u32(a, v, 0); 3099*207e5cccSFangrui Song } 3100*207e5cccSFangrui Song 3101*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_s16_0( 3102*207e5cccSFangrui Song // CHECK-NEXT: entry: 3103*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 3104*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3105*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 3106*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3107*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3108*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 3109*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3110*207e5cccSFangrui Song // 3111*207e5cccSFangrui Song int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { 3112*207e5cccSFangrui Song return vmull_laneq_s16(a, v, 0); 3113*207e5cccSFangrui Song } 3114*207e5cccSFangrui Song 3115*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_s32_0( 3116*207e5cccSFangrui Song // CHECK-NEXT: entry: 3117*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 3118*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3119*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 3120*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3121*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3122*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 3123*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3124*207e5cccSFangrui Song // 3125*207e5cccSFangrui Song int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { 3126*207e5cccSFangrui Song return vmull_laneq_s32(a, v, 0); 3127*207e5cccSFangrui Song } 3128*207e5cccSFangrui Song 3129*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_u16_0( 3130*207e5cccSFangrui Song // CHECK-NEXT: entry: 3131*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 3132*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3133*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 3134*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3135*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3136*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 3137*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3138*207e5cccSFangrui Song // 3139*207e5cccSFangrui Song uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { 3140*207e5cccSFangrui Song return vmull_laneq_u16(a, v, 0); 3141*207e5cccSFangrui Song } 3142*207e5cccSFangrui Song 3143*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_laneq_u32_0( 3144*207e5cccSFangrui Song // CHECK-NEXT: entry: 3145*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 3146*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3147*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 3148*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3149*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3150*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 3151*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3152*207e5cccSFangrui Song // 3153*207e5cccSFangrui Song uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { 3154*207e5cccSFangrui Song return vmull_laneq_u32(a, v, 0); 3155*207e5cccSFangrui Song } 3156*207e5cccSFangrui Song 3157*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_s16_0( 3158*207e5cccSFangrui Song // CHECK-NEXT: entry: 3159*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3160*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 3161*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3162*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 3163*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3164*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3165*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3166*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3167*207e5cccSFangrui Song // 3168*207e5cccSFangrui Song int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { 3169*207e5cccSFangrui Song return vmull_high_laneq_s16(a, v, 0); 3170*207e5cccSFangrui Song } 3171*207e5cccSFangrui Song 3172*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_s32_0( 3173*207e5cccSFangrui Song // CHECK-NEXT: entry: 3174*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3175*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 3176*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3177*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 3178*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3179*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3180*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3181*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3182*207e5cccSFangrui Song // 3183*207e5cccSFangrui Song int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { 3184*207e5cccSFangrui Song return vmull_high_laneq_s32(a, v, 0); 3185*207e5cccSFangrui Song } 3186*207e5cccSFangrui Song 3187*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_u16_0( 3188*207e5cccSFangrui Song // CHECK-NEXT: entry: 3189*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3190*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 3191*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3192*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 3193*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3194*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3195*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3196*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 3197*207e5cccSFangrui Song // 3198*207e5cccSFangrui Song uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { 3199*207e5cccSFangrui Song return vmull_high_laneq_u16(a, v, 0); 3200*207e5cccSFangrui Song } 3201*207e5cccSFangrui Song 3202*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_laneq_u32_0( 3203*207e5cccSFangrui Song // CHECK-NEXT: entry: 3204*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3205*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 3206*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3207*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 3208*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3209*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3210*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3211*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 3212*207e5cccSFangrui Song // 3213*207e5cccSFangrui Song uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { 3214*207e5cccSFangrui Song return vmull_high_laneq_u32(a, v, 0); 3215*207e5cccSFangrui Song } 3216*207e5cccSFangrui Song 3217*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_lane_s16_0( 3218*207e5cccSFangrui Song // CHECK-NEXT: entry: 3219*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3220*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3221*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3222*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3223*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 3224*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3225*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 3226*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 3227*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 3228*207e5cccSFangrui Song // 3229*207e5cccSFangrui Song int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 3230*207e5cccSFangrui Song return vqdmlal_lane_s16(a, b, v, 0); 3231*207e5cccSFangrui Song } 3232*207e5cccSFangrui Song 3233*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_lane_s32_0( 3234*207e5cccSFangrui Song // CHECK-NEXT: entry: 3235*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3236*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3237*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3238*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 3239*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 3240*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3241*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 3242*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 3243*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 3244*207e5cccSFangrui Song // 3245*207e5cccSFangrui Song int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 3246*207e5cccSFangrui Song return vqdmlal_lane_s32(a, b, v, 0); 3247*207e5cccSFangrui Song } 3248*207e5cccSFangrui Song 3249*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_lane_s16_0( 3250*207e5cccSFangrui Song // CHECK-NEXT: entry: 3251*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3252*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3253*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3254*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3255*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3256*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3257*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3258*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3259*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 3260*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 3261*207e5cccSFangrui Song // 3262*207e5cccSFangrui Song int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 3263*207e5cccSFangrui Song return vqdmlal_high_lane_s16(a, b, v, 0); 3264*207e5cccSFangrui Song } 3265*207e5cccSFangrui Song 3266*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_lane_s32_0( 3267*207e5cccSFangrui Song // CHECK-NEXT: entry: 3268*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3269*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3270*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3271*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3272*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 3273*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3274*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3275*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3276*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 3277*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 3278*207e5cccSFangrui Song // 3279*207e5cccSFangrui Song int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 3280*207e5cccSFangrui Song return vqdmlal_high_lane_s32(a, b, v, 0); 3281*207e5cccSFangrui Song } 3282*207e5cccSFangrui Song 3283*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_lane_s16_0( 3284*207e5cccSFangrui Song // CHECK-NEXT: entry: 3285*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3286*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3287*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3288*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3289*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 3290*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3291*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 3292*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 3293*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 3294*207e5cccSFangrui Song // 3295*207e5cccSFangrui Song int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { 3296*207e5cccSFangrui Song return vqdmlsl_lane_s16(a, b, v, 0); 3297*207e5cccSFangrui Song } 3298*207e5cccSFangrui Song 3299*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_lane_s32_0( 3300*207e5cccSFangrui Song // CHECK-NEXT: entry: 3301*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3302*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3303*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3304*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 3305*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 3306*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3307*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 3308*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 3309*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 3310*207e5cccSFangrui Song // 3311*207e5cccSFangrui Song int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { 3312*207e5cccSFangrui Song return vqdmlsl_lane_s32(a, b, v, 0); 3313*207e5cccSFangrui Song } 3314*207e5cccSFangrui Song 3315*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_lane_s16_0( 3316*207e5cccSFangrui Song // CHECK-NEXT: entry: 3317*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3318*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3319*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3320*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3321*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3322*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3323*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3324*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3325*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 3326*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 3327*207e5cccSFangrui Song // 3328*207e5cccSFangrui Song int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { 3329*207e5cccSFangrui Song return vqdmlsl_high_lane_s16(a, b, v, 0); 3330*207e5cccSFangrui Song } 3331*207e5cccSFangrui Song 3332*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_lane_s32_0( 3333*207e5cccSFangrui Song // CHECK-NEXT: entry: 3334*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3335*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3336*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3337*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3338*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 3339*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3340*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3341*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3342*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 3343*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 3344*207e5cccSFangrui Song // 3345*207e5cccSFangrui Song int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { 3346*207e5cccSFangrui Song return vqdmlsl_high_lane_s32(a, b, v, 0); 3347*207e5cccSFangrui Song } 3348*207e5cccSFangrui Song 3349*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_lane_s16_0( 3350*207e5cccSFangrui Song // CHECK-NEXT: entry: 3351*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3352*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3353*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3354*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3355*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3356*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 3357*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 3358*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 3359*207e5cccSFangrui Song // 3360*207e5cccSFangrui Song int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { 3361*207e5cccSFangrui Song return vqdmull_lane_s16(a, v, 0); 3362*207e5cccSFangrui Song } 3363*207e5cccSFangrui Song 3364*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_lane_s32_0( 3365*207e5cccSFangrui Song // CHECK-NEXT: entry: 3366*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3367*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3368*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3369*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3370*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3371*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 3372*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 3373*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 3374*207e5cccSFangrui Song // 3375*207e5cccSFangrui Song int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { 3376*207e5cccSFangrui Song return vqdmull_lane_s32(a, v, 0); 3377*207e5cccSFangrui Song } 3378*207e5cccSFangrui Song 3379*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_laneq_s16_0( 3380*207e5cccSFangrui Song // CHECK-NEXT: entry: 3381*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 3382*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3383*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 3384*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3385*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3386*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]]) 3387*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 3388*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 3389*207e5cccSFangrui Song // 3390*207e5cccSFangrui Song int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { 3391*207e5cccSFangrui Song return vqdmull_laneq_s16(a, v, 0); 3392*207e5cccSFangrui Song } 3393*207e5cccSFangrui Song 3394*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_laneq_s32_0( 3395*207e5cccSFangrui Song // CHECK-NEXT: entry: 3396*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 3397*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3398*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 3399*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3400*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3401*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]]) 3402*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 3403*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 3404*207e5cccSFangrui Song // 3405*207e5cccSFangrui Song int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { 3406*207e5cccSFangrui Song return vqdmull_laneq_s32(a, v, 0); 3407*207e5cccSFangrui Song } 3408*207e5cccSFangrui Song 3409*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_lane_s16_0( 3410*207e5cccSFangrui Song // CHECK-NEXT: entry: 3411*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3412*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3413*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3414*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 3415*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3416*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3417*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3418*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 3419*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 3420*207e5cccSFangrui Song // 3421*207e5cccSFangrui Song int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { 3422*207e5cccSFangrui Song return vqdmull_high_lane_s16(a, v, 0); 3423*207e5cccSFangrui Song } 3424*207e5cccSFangrui Song 3425*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_lane_s32_0( 3426*207e5cccSFangrui Song // CHECK-NEXT: entry: 3427*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3428*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3429*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3430*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 3431*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3432*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3433*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3434*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 3435*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 3436*207e5cccSFangrui Song // 3437*207e5cccSFangrui Song int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { 3438*207e5cccSFangrui Song return vqdmull_high_lane_s32(a, v, 0); 3439*207e5cccSFangrui Song } 3440*207e5cccSFangrui Song 3441*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_laneq_s16_0( 3442*207e5cccSFangrui Song // CHECK-NEXT: entry: 3443*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3444*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 3445*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3446*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 3447*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 3448*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 3449*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 3450*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 3451*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I]] 3452*207e5cccSFangrui Song // 3453*207e5cccSFangrui Song int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { 3454*207e5cccSFangrui Song return vqdmull_high_laneq_s16(a, v, 0); 3455*207e5cccSFangrui Song } 3456*207e5cccSFangrui Song 3457*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_laneq_s32_0( 3458*207e5cccSFangrui Song // CHECK-NEXT: entry: 3459*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3460*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 3461*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3462*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 3463*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 3464*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 3465*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 3466*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 3467*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I]] 3468*207e5cccSFangrui Song // 3469*207e5cccSFangrui Song int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { 3470*207e5cccSFangrui Song return vqdmull_high_laneq_s32(a, v, 0); 3471*207e5cccSFangrui Song } 3472*207e5cccSFangrui Song 3473*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_lane_s16_0( 3474*207e5cccSFangrui Song // CHECK-NEXT: entry: 3475*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3476*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3477*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3478*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3479*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.lane.v4i16.v4i16(<4 x i16> [[VQDMULH_LANE_V]], <4 x i16> [[VQDMULH_LANE_V1]], i32 0) 3480*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQDMULH_LANE_V2]] 3481*207e5cccSFangrui Song // 3482*207e5cccSFangrui Song int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { 3483*207e5cccSFangrui Song return vqdmulh_lane_s16(a, v, 0); 3484*207e5cccSFangrui Song } 3485*207e5cccSFangrui Song 3486*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_lane_s16_0( 3487*207e5cccSFangrui Song // CHECK-NEXT: entry: 3488*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 3489*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3490*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3491*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3492*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.lane.v8i16.v4i16(<8 x i16> [[VQDMULHQ_LANE_V]], <4 x i16> [[VQDMULHQ_LANE_V1]], i32 0) 3493*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQDMULHQ_LANE_V2]] 3494*207e5cccSFangrui Song // 3495*207e5cccSFangrui Song int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { 3496*207e5cccSFangrui Song return vqdmulhq_lane_s16(a, v, 0); 3497*207e5cccSFangrui Song } 3498*207e5cccSFangrui Song 3499*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_lane_s32_0( 3500*207e5cccSFangrui Song // CHECK-NEXT: entry: 3501*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3502*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3503*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3504*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3505*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANE_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.lane.v2i32.v2i32(<2 x i32> [[VQDMULH_LANE_V]], <2 x i32> [[VQDMULH_LANE_V1]], i32 0) 3506*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQDMULH_LANE_V2]] 3507*207e5cccSFangrui Song // 3508*207e5cccSFangrui Song int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { 3509*207e5cccSFangrui Song return vqdmulh_lane_s32(a, v, 0); 3510*207e5cccSFangrui Song } 3511*207e5cccSFangrui Song 3512*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_lane_s32_0( 3513*207e5cccSFangrui Song // CHECK-NEXT: entry: 3514*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3515*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3516*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3517*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3518*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANE_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.lane.v4i32.v2i32(<4 x i32> [[VQDMULHQ_LANE_V]], <2 x i32> [[VQDMULHQ_LANE_V1]], i32 0) 3519*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULHQ_LANE_V2]] 3520*207e5cccSFangrui Song // 3521*207e5cccSFangrui Song int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { 3522*207e5cccSFangrui Song return vqdmulhq_lane_s32(a, v, 0); 3523*207e5cccSFangrui Song } 3524*207e5cccSFangrui Song 3525*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_lane_s16_0( 3526*207e5cccSFangrui Song // CHECK-NEXT: entry: 3527*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 3528*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3529*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 3530*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3531*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v4i16.v4i16(<4 x i16> [[VQRDMULH_LANE_V]], <4 x i16> [[VQRDMULH_LANE_V1]], i32 0) 3532*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMULH_LANE_V2]] 3533*207e5cccSFangrui Song // 3534*207e5cccSFangrui Song int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { 3535*207e5cccSFangrui Song return vqrdmulh_lane_s16(a, v, 0); 3536*207e5cccSFangrui Song } 3537*207e5cccSFangrui Song 3538*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_lane_s16_0( 3539*207e5cccSFangrui Song // CHECK-NEXT: entry: 3540*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 3541*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 3542*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 3543*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3544*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.lane.v8i16.v4i16(<8 x i16> [[VQRDMULHQ_LANE_V]], <4 x i16> [[VQRDMULHQ_LANE_V1]], i32 0) 3545*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMULHQ_LANE_V2]] 3546*207e5cccSFangrui Song // 3547*207e5cccSFangrui Song int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { 3548*207e5cccSFangrui Song return vqrdmulhq_lane_s16(a, v, 0); 3549*207e5cccSFangrui Song } 3550*207e5cccSFangrui Song 3551*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_lane_s32_0( 3552*207e5cccSFangrui Song // CHECK-NEXT: entry: 3553*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 3554*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3555*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 3556*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3557*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANE_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v2i32.v2i32(<2 x i32> [[VQRDMULH_LANE_V]], <2 x i32> [[VQRDMULH_LANE_V1]], i32 0) 3558*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMULH_LANE_V2]] 3559*207e5cccSFangrui Song // 3560*207e5cccSFangrui Song int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { 3561*207e5cccSFangrui Song return vqrdmulh_lane_s32(a, v, 0); 3562*207e5cccSFangrui Song } 3563*207e5cccSFangrui Song 3564*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_lane_s32_0( 3565*207e5cccSFangrui Song // CHECK-NEXT: entry: 3566*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3567*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 3568*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 3569*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3570*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANE_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.lane.v4i32.v2i32(<4 x i32> [[VQRDMULHQ_LANE_V]], <2 x i32> [[VQRDMULHQ_LANE_V1]], i32 0) 3571*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMULHQ_LANE_V2]] 3572*207e5cccSFangrui Song // 3573*207e5cccSFangrui Song int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { 3574*207e5cccSFangrui Song return vqrdmulhq_lane_s32(a, v, 0); 3575*207e5cccSFangrui Song } 3576*207e5cccSFangrui Song 3577*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_lane_f32_0( 3578*207e5cccSFangrui Song // CHECK-NEXT: entry: 3579*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 3580*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3581*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer 3582*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]] 3583*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[MUL]] 3584*207e5cccSFangrui Song // 3585*207e5cccSFangrui Song float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) { 3586*207e5cccSFangrui Song return vmul_lane_f32(a, v, 0); 3587*207e5cccSFangrui Song } 3588*207e5cccSFangrui Song 3589*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_lane_f32_0( 3590*207e5cccSFangrui Song // CHECK-NEXT: entry: 3591*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 3592*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3593*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer 3594*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]] 3595*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[MUL]] 3596*207e5cccSFangrui Song // 3597*207e5cccSFangrui Song float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) { 3598*207e5cccSFangrui Song return vmulq_lane_f32(a, v, 0); 3599*207e5cccSFangrui Song } 3600*207e5cccSFangrui Song 3601*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_f32_0( 3602*207e5cccSFangrui Song // CHECK-NEXT: entry: 3603*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 3604*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3605*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer 3606*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]] 3607*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[MUL]] 3608*207e5cccSFangrui Song // 3609*207e5cccSFangrui Song float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) { 3610*207e5cccSFangrui Song return vmul_laneq_f32(a, v, 0); 3611*207e5cccSFangrui Song } 3612*207e5cccSFangrui Song 3613*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_laneq_f64_0( 3614*207e5cccSFangrui Song // CHECK-NEXT: entry: 3615*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8> 3616*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 3617*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to double 3618*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 3619*207e5cccSFangrui Song // CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 3620*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP2]], [[EXTRACT]] 3621*207e5cccSFangrui Song // CHECK-NEXT: [[TMP5:%.*]] = bitcast double [[TMP4]] to <1 x double> 3622*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x double> [[TMP5]] 3623*207e5cccSFangrui Song // 3624*207e5cccSFangrui Song float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) { 3625*207e5cccSFangrui Song return vmul_laneq_f64(a, v, 0); 3626*207e5cccSFangrui Song } 3627*207e5cccSFangrui Song 3628*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_f32_0( 3629*207e5cccSFangrui Song // CHECK-NEXT: entry: 3630*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 3631*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3632*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer 3633*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]] 3634*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[MUL]] 3635*207e5cccSFangrui Song // 3636*207e5cccSFangrui Song float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) { 3637*207e5cccSFangrui Song return vmulq_laneq_f32(a, v, 0); 3638*207e5cccSFangrui Song } 3639*207e5cccSFangrui Song 3640*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_laneq_f64_0( 3641*207e5cccSFangrui Song // CHECK-NEXT: entry: 3642*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 3643*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 3644*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> zeroinitializer 3645*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = fmul <2 x double> [[A:%.*]], [[LANE]] 3646*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[MUL]] 3647*207e5cccSFangrui Song // 3648*207e5cccSFangrui Song float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { 3649*207e5cccSFangrui Song return vmulq_laneq_f64(a, v, 0); 3650*207e5cccSFangrui Song } 3651*207e5cccSFangrui Song 3652*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulx_lane_f32_0( 3653*207e5cccSFangrui Song // CHECK-NEXT: entry: 3654*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 3655*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3656*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> zeroinitializer 3657*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 3658*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8> 3659*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]]) 3660*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VMULX2_I]] 3661*207e5cccSFangrui Song // 3662*207e5cccSFangrui Song float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { 3663*207e5cccSFangrui Song return vmulx_lane_f32(a, v, 0); 3664*207e5cccSFangrui Song } 3665*207e5cccSFangrui Song 3666*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_lane_f32_0( 3667*207e5cccSFangrui Song // CHECK-NEXT: entry: 3668*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8> 3669*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 3670*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> zeroinitializer 3671*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 3672*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8> 3673*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]]) 3674*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VMULX2_I]] 3675*207e5cccSFangrui Song // 3676*207e5cccSFangrui Song float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { 3677*207e5cccSFangrui Song return vmulxq_lane_f32(a, v, 0); 3678*207e5cccSFangrui Song } 3679*207e5cccSFangrui Song 3680*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_lane_f64_0( 3681*207e5cccSFangrui Song // CHECK-NEXT: entry: 3682*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[V:%.*]] to <8 x i8> 3683*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 3684*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <1 x double> [[TMP1]], <1 x double> [[TMP1]], <2 x i32> zeroinitializer 3685*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 3686*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8> 3687*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]]) 3688*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[VMULX2_I]] 3689*207e5cccSFangrui Song // 3690*207e5cccSFangrui Song float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { 3691*207e5cccSFangrui Song return vmulxq_lane_f64(a, v, 0); 3692*207e5cccSFangrui Song } 3693*207e5cccSFangrui Song 3694*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulx_laneq_f32_0( 3695*207e5cccSFangrui Song // CHECK-NEXT: entry: 3696*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 3697*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3698*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <2 x i32> zeroinitializer 3699*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 3700*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LANE]] to <8 x i8> 3701*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[LANE]]) 3702*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[VMULX2_I]] 3703*207e5cccSFangrui Song // 3704*207e5cccSFangrui Song float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { 3705*207e5cccSFangrui Song return vmulx_laneq_f32(a, v, 0); 3706*207e5cccSFangrui Song } 3707*207e5cccSFangrui Song 3708*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_laneq_f32_0( 3709*207e5cccSFangrui Song // CHECK-NEXT: entry: 3710*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[V:%.*]] to <16 x i8> 3711*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 3712*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP1]], <4 x i32> zeroinitializer 3713*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 3714*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[LANE]] to <16 x i8> 3715*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[LANE]]) 3716*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[VMULX2_I]] 3717*207e5cccSFangrui Song // 3718*207e5cccSFangrui Song float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { 3719*207e5cccSFangrui Song return vmulxq_laneq_f32(a, v, 0); 3720*207e5cccSFangrui Song } 3721*207e5cccSFangrui Song 3722*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulxq_laneq_f64_0( 3723*207e5cccSFangrui Song // CHECK-NEXT: entry: 3724*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[V:%.*]] to <16 x i8> 3725*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 3726*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> [[TMP1]], <2 x i32> zeroinitializer 3727*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[A:%.*]] to <16 x i8> 3728*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[LANE]] to <16 x i8> 3729*207e5cccSFangrui Song // CHECK-NEXT: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[LANE]]) 3730*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[VMULX2_I]] 3731*207e5cccSFangrui Song // 3732*207e5cccSFangrui Song float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { 3733*207e5cccSFangrui Song return vmulxq_laneq_f64(a, v, 0); 3734*207e5cccSFangrui Song } 3735*207e5cccSFangrui Song 3736*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_n_s16( 3737*207e5cccSFangrui Song // CHECK-NEXT: entry: 3738*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3739*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 3740*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 3741*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 3742*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 3743*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3744*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3745*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3746*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 3747*207e5cccSFangrui Song // 3748*207e5cccSFangrui Song int32x4_t test_vmull_high_n_s16(int16x8_t a, int16_t b) { 3749*207e5cccSFangrui Song return vmull_high_n_s16(a, b); 3750*207e5cccSFangrui Song } 3751*207e5cccSFangrui Song 3752*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_n_s32( 3753*207e5cccSFangrui Song // CHECK-NEXT: entry: 3754*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3755*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 3756*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 3757*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3758*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3759*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3760*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 3761*207e5cccSFangrui Song // 3762*207e5cccSFangrui Song int64x2_t test_vmull_high_n_s32(int32x4_t a, int32_t b) { 3763*207e5cccSFangrui Song return vmull_high_n_s32(a, b); 3764*207e5cccSFangrui Song } 3765*207e5cccSFangrui Song 3766*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_n_u16( 3767*207e5cccSFangrui Song // CHECK-NEXT: entry: 3768*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3769*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 3770*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 3771*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 3772*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 3773*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3774*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3775*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3776*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 3777*207e5cccSFangrui Song // 3778*207e5cccSFangrui Song uint32x4_t test_vmull_high_n_u16(uint16x8_t a, uint16_t b) { 3779*207e5cccSFangrui Song return vmull_high_n_u16(a, b); 3780*207e5cccSFangrui Song } 3781*207e5cccSFangrui Song 3782*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_high_n_u32( 3783*207e5cccSFangrui Song // CHECK-NEXT: entry: 3784*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3785*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 3786*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 3787*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3788*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3789*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3790*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 3791*207e5cccSFangrui Song // 3792*207e5cccSFangrui Song uint64x2_t test_vmull_high_n_u32(uint32x4_t a, uint32_t b) { 3793*207e5cccSFangrui Song return vmull_high_n_u32(a, b); 3794*207e5cccSFangrui Song } 3795*207e5cccSFangrui Song 3796*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_n_s16( 3797*207e5cccSFangrui Song // CHECK-NEXT: entry: 3798*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3799*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 3800*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 3801*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 3802*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 3803*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3804*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3805*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3806*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> 3807*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I_I]] 3808*207e5cccSFangrui Song // 3809*207e5cccSFangrui Song int32x4_t test_vqdmull_high_n_s16(int16x8_t a, int16_t b) { 3810*207e5cccSFangrui Song return vqdmull_high_n_s16(a, b); 3811*207e5cccSFangrui Song } 3812*207e5cccSFangrui Song 3813*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_high_n_s32( 3814*207e5cccSFangrui Song // CHECK-NEXT: entry: 3815*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[A]], <2 x i32> <i32 2, i32 3> 3816*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 3817*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 3818*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3819*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3820*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3821*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> 3822*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I_I]] 3823*207e5cccSFangrui Song // 3824*207e5cccSFangrui Song int64x2_t test_vqdmull_high_n_s32(int32x4_t a, int32_t b) { 3825*207e5cccSFangrui Song return vqdmull_high_n_s32(a, b); 3826*207e5cccSFangrui Song } 3827*207e5cccSFangrui Song 3828*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_n_s16( 3829*207e5cccSFangrui Song // CHECK-NEXT: entry: 3830*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3831*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 3832*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 3833*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 3834*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 3835*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3836*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3837*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3838*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 3839*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD_I]] 3840*207e5cccSFangrui Song // 3841*207e5cccSFangrui Song int32x4_t test_vmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3842*207e5cccSFangrui Song return vmlal_high_n_s16(a, b, c); 3843*207e5cccSFangrui Song } 3844*207e5cccSFangrui Song 3845*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_n_s32( 3846*207e5cccSFangrui Song // CHECK-NEXT: entry: 3847*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3848*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 3849*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 3850*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3851*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3852*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3853*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 3854*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD_I]] 3855*207e5cccSFangrui Song // 3856*207e5cccSFangrui Song int64x2_t test_vmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3857*207e5cccSFangrui Song return vmlal_high_n_s32(a, b, c); 3858*207e5cccSFangrui Song } 3859*207e5cccSFangrui Song 3860*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_n_u16( 3861*207e5cccSFangrui Song // CHECK-NEXT: entry: 3862*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3863*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 3864*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 3865*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 3866*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 3867*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3868*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3869*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3870*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 3871*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD_I]] 3872*207e5cccSFangrui Song // 3873*207e5cccSFangrui Song uint32x4_t test_vmlal_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { 3874*207e5cccSFangrui Song return vmlal_high_n_u16(a, b, c); 3875*207e5cccSFangrui Song } 3876*207e5cccSFangrui Song 3877*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_high_n_u32( 3878*207e5cccSFangrui Song // CHECK-NEXT: entry: 3879*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3880*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 3881*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 3882*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3883*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3884*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3885*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 3886*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD_I]] 3887*207e5cccSFangrui Song // 3888*207e5cccSFangrui Song uint64x2_t test_vmlal_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { 3889*207e5cccSFangrui Song return vmlal_high_n_u32(a, b, c); 3890*207e5cccSFangrui Song } 3891*207e5cccSFangrui Song 3892*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_n_s16( 3893*207e5cccSFangrui Song // CHECK-NEXT: entry: 3894*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3895*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 3896*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 3897*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 3898*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 3899*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3900*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3901*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3902*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3903*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]]) 3904*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I_I]] 3905*207e5cccSFangrui Song // 3906*207e5cccSFangrui Song int32x4_t test_vqdmlal_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3907*207e5cccSFangrui Song return vqdmlal_high_n_s16(a, b, c); 3908*207e5cccSFangrui Song } 3909*207e5cccSFangrui Song 3910*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_n_s32( 3911*207e5cccSFangrui Song // CHECK-NEXT: entry: 3912*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3913*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 3914*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 3915*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 3916*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3917*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3918*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3919*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]]) 3920*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I_I]] 3921*207e5cccSFangrui Song // 3922*207e5cccSFangrui Song int64x2_t test_vqdmlal_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3923*207e5cccSFangrui Song return vqdmlal_high_n_s32(a, b, c); 3924*207e5cccSFangrui Song } 3925*207e5cccSFangrui Song 3926*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_n_s16( 3927*207e5cccSFangrui Song // CHECK-NEXT: entry: 3928*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3929*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 3930*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 3931*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 3932*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 3933*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3934*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3935*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3936*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 3937*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB_I]] 3938*207e5cccSFangrui Song // 3939*207e5cccSFangrui Song int32x4_t test_vmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 3940*207e5cccSFangrui Song return vmlsl_high_n_s16(a, b, c); 3941*207e5cccSFangrui Song } 3942*207e5cccSFangrui Song 3943*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_n_s32( 3944*207e5cccSFangrui Song // CHECK-NEXT: entry: 3945*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3946*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 3947*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 3948*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3949*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3950*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3951*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 3952*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB_I]] 3953*207e5cccSFangrui Song // 3954*207e5cccSFangrui Song int64x2_t test_vmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 3955*207e5cccSFangrui Song return vmlsl_high_n_s32(a, b, c); 3956*207e5cccSFangrui Song } 3957*207e5cccSFangrui Song 3958*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_n_u16( 3959*207e5cccSFangrui Song // CHECK-NEXT: entry: 3960*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3961*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 3962*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 3963*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 3964*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 3965*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3966*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 3967*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 3968*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 3969*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB_I]] 3970*207e5cccSFangrui Song // 3971*207e5cccSFangrui Song uint32x4_t test_vmlsl_high_n_u16(uint32x4_t a, uint16x8_t b, uint16_t c) { 3972*207e5cccSFangrui Song return vmlsl_high_n_u16(a, b, c); 3973*207e5cccSFangrui Song } 3974*207e5cccSFangrui Song 3975*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_high_n_u32( 3976*207e5cccSFangrui Song // CHECK-NEXT: entry: 3977*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 3978*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 3979*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 3980*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 3981*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 3982*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 3983*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 3984*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB_I]] 3985*207e5cccSFangrui Song // 3986*207e5cccSFangrui Song uint64x2_t test_vmlsl_high_n_u32(uint64x2_t a, uint32x4_t b, uint32_t c) { 3987*207e5cccSFangrui Song return vmlsl_high_n_u32(a, b, c); 3988*207e5cccSFangrui Song } 3989*207e5cccSFangrui Song 3990*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_n_s16( 3991*207e5cccSFangrui Song // CHECK-NEXT: entry: 3992*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3993*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 3994*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 3995*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 3996*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 3997*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 3998*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 3999*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4000*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[VECINIT3_I]]) 4001*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]]) 4002*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I_I]] 4003*207e5cccSFangrui Song // 4004*207e5cccSFangrui Song int32x4_t test_vqdmlsl_high_n_s16(int32x4_t a, int16x8_t b, int16_t c) { 4005*207e5cccSFangrui Song return vqdmlsl_high_n_s16(a, b, c); 4006*207e5cccSFangrui Song } 4007*207e5cccSFangrui Song 4008*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_n_s32( 4009*207e5cccSFangrui Song // CHECK-NEXT: entry: 4010*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 4011*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4012*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4013*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 4014*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 4015*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4016*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[VECINIT1_I]]) 4017*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]]) 4018*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I_I]] 4019*207e5cccSFangrui Song // 4020*207e5cccSFangrui Song int64x2_t test_vqdmlsl_high_n_s32(int64x2_t a, int32x4_t b, int32_t c) { 4021*207e5cccSFangrui Song return vqdmlsl_high_n_s32(a, b, c); 4022*207e5cccSFangrui Song } 4023*207e5cccSFangrui Song 4024*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_n_f32( 4025*207e5cccSFangrui Song // CHECK-NEXT: entry: 4026*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0 4027*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[B]], i32 1 4028*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[A:%.*]], [[VECINIT1_I]] 4029*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[MUL_I]] 4030*207e5cccSFangrui Song // 4031*207e5cccSFangrui Song float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { 4032*207e5cccSFangrui Song return vmul_n_f32(a, b); 4033*207e5cccSFangrui Song } 4034*207e5cccSFangrui Song 4035*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_n_f32( 4036*207e5cccSFangrui Song // CHECK-NEXT: entry: 4037*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i32 0 4038*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[B]], i32 1 4039*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[B]], i32 2 4040*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[B]], i32 3 4041*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[A:%.*]], [[VECINIT3_I]] 4042*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[MUL_I]] 4043*207e5cccSFangrui Song // 4044*207e5cccSFangrui Song float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { 4045*207e5cccSFangrui Song return vmulq_n_f32(a, b); 4046*207e5cccSFangrui Song } 4047*207e5cccSFangrui Song 4048*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_n_f64( 4049*207e5cccSFangrui Song // CHECK-NEXT: entry: 4050*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[B:%.*]], i32 0 4051*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[B]], i32 1 4052*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[A:%.*]], [[VECINIT1_I]] 4053*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[MUL_I]] 4054*207e5cccSFangrui Song // 4055*207e5cccSFangrui Song float64x2_t test_vmulq_n_f64(float64x2_t a, float64_t b) { 4056*207e5cccSFangrui Song return vmulq_n_f64(a, b); 4057*207e5cccSFangrui Song } 4058*207e5cccSFangrui Song 4059*207e5cccSFangrui Song // CHECK-LABEL: @test_vfma_n_f32( 4060*207e5cccSFangrui Song // CHECK-NEXT: entry: 4061*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[N:%.*]], i32 0 4062*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[N]], i32 1 4063*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 4064*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8> 4065*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> 4066*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[B]], <2 x float> [[VECINIT1_I]], <2 x float> [[A]]) 4067*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[TMP3]] 4068*207e5cccSFangrui Song // 4069*207e5cccSFangrui Song float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) { 4070*207e5cccSFangrui Song return vfma_n_f32(a, b, n); 4071*207e5cccSFangrui Song } 4072*207e5cccSFangrui Song 4073*207e5cccSFangrui Song // CHECK-LABEL: @test_vfma_n_f64( 4074*207e5cccSFangrui Song // CHECK-NEXT: entry: 4075*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x double> poison, double [[N:%.*]], i32 0 4076*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8> 4077*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> 4078*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> 4079*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[B]], <1 x double> [[VECINIT_I]], <1 x double> [[A]]) 4080*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x double> [[TMP3]] 4081*207e5cccSFangrui Song // 4082*207e5cccSFangrui Song float64x1_t test_vfma_n_f64(float64x1_t a, float64x1_t b, float64_t n) { 4083*207e5cccSFangrui Song return vfma_n_f64(a, b, n); 4084*207e5cccSFangrui Song } 4085*207e5cccSFangrui Song 4086*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmaq_n_f32( 4087*207e5cccSFangrui Song // CHECK-NEXT: entry: 4088*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[N:%.*]], i32 0 4089*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[N]], i32 1 4090*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[N]], i32 2 4091*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[N]], i32 3 4092*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 4093*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <16 x i8> 4094*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> 4095*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[VECINIT3_I]], <4 x float> [[A]]) 4096*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[TMP3]] 4097*207e5cccSFangrui Song // 4098*207e5cccSFangrui Song float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { 4099*207e5cccSFangrui Song return vfmaq_n_f32(a, b, n); 4100*207e5cccSFangrui Song } 4101*207e5cccSFangrui Song 4102*207e5cccSFangrui Song // CHECK-LABEL: @test_vfms_n_f32( 4103*207e5cccSFangrui Song // CHECK-NEXT: entry: 4104*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <2 x float> [[B:%.*]] 4105*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> poison, float [[N:%.*]], i32 0 4106*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[N]], i32 1 4107*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8> 4108*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[FNEG_I]] to <8 x i8> 4109*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> 4110*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FNEG_I]], <2 x float> [[VECINIT1_I]], <2 x float> [[A]]) 4111*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x float> [[TMP3]] 4112*207e5cccSFangrui Song // 4113*207e5cccSFangrui Song float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { 4114*207e5cccSFangrui Song return vfms_n_f32(a, b, n); 4115*207e5cccSFangrui Song } 4116*207e5cccSFangrui Song 4117*207e5cccSFangrui Song // CHECK-LABEL: @test_vfms_n_f64( 4118*207e5cccSFangrui Song // CHECK-NEXT: entry: 4119*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <1 x double> [[B:%.*]] 4120*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <1 x double> poison, double [[N:%.*]], i32 0 4121*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[A:%.*]] to <8 x i8> 4122*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x double> [[FNEG_I]] to <8 x i8> 4123*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x double> [[VECINIT_I]] to <8 x i8> 4124*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FNEG_I]], <1 x double> [[VECINIT_I]], <1 x double> [[A]]) 4125*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x double> [[TMP3]] 4126*207e5cccSFangrui Song // 4127*207e5cccSFangrui Song float64x1_t test_vfms_n_f64(float64x1_t a, float64x1_t b, float64_t n) { 4128*207e5cccSFangrui Song return vfms_n_f64(a, b, n); 4129*207e5cccSFangrui Song } 4130*207e5cccSFangrui Song 4131*207e5cccSFangrui Song // CHECK-LABEL: @test_vfmsq_n_f32( 4132*207e5cccSFangrui Song // CHECK-NEXT: entry: 4133*207e5cccSFangrui Song // CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x float> [[B:%.*]] 4134*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[N:%.*]], i32 0 4135*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[N]], i32 1 4136*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[N]], i32 2 4137*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[N]], i32 3 4138*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 4139*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <16 x i8> 4140*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> 4141*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FNEG_I]], <4 x float> [[VECINIT3_I]], <4 x float> [[A]]) 4142*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x float> [[TMP3]] 4143*207e5cccSFangrui Song // 4144*207e5cccSFangrui Song float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { 4145*207e5cccSFangrui Song return vfmsq_n_f32(a, b, n); 4146*207e5cccSFangrui Song } 4147*207e5cccSFangrui Song 4148*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_n_s16( 4149*207e5cccSFangrui Song // CHECK-NEXT: entry: 4150*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4151*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4152*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4153*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4154*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[A:%.*]], [[VECINIT3_I]] 4155*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL_I]] 4156*207e5cccSFangrui Song // 4157*207e5cccSFangrui Song int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { 4158*207e5cccSFangrui Song return vmul_n_s16(a, b); 4159*207e5cccSFangrui Song } 4160*207e5cccSFangrui Song 4161*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_n_s16( 4162*207e5cccSFangrui Song // CHECK-NEXT: entry: 4163*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 4164*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4165*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4166*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4167*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4 4168*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5 4169*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6 4170*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7 4171*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[A:%.*]], [[VECINIT7_I]] 4172*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL_I]] 4173*207e5cccSFangrui Song // 4174*207e5cccSFangrui Song int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { 4175*207e5cccSFangrui Song return vmulq_n_s16(a, b); 4176*207e5cccSFangrui Song } 4177*207e5cccSFangrui Song 4178*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_n_s32( 4179*207e5cccSFangrui Song // CHECK-NEXT: entry: 4180*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4181*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4182*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[A:%.*]], [[VECINIT1_I]] 4183*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL_I]] 4184*207e5cccSFangrui Song // 4185*207e5cccSFangrui Song int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { 4186*207e5cccSFangrui Song return vmul_n_s32(a, b); 4187*207e5cccSFangrui Song } 4188*207e5cccSFangrui Song 4189*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_n_s32( 4190*207e5cccSFangrui Song // CHECK-NEXT: entry: 4191*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 4192*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4193*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2 4194*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3 4195*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[A:%.*]], [[VECINIT3_I]] 4196*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL_I]] 4197*207e5cccSFangrui Song // 4198*207e5cccSFangrui Song int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { 4199*207e5cccSFangrui Song return vmulq_n_s32(a, b); 4200*207e5cccSFangrui Song } 4201*207e5cccSFangrui Song 4202*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_n_u16( 4203*207e5cccSFangrui Song // CHECK-NEXT: entry: 4204*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4205*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4206*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4207*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4208*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[A:%.*]], [[VECINIT3_I]] 4209*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[MUL_I]] 4210*207e5cccSFangrui Song // 4211*207e5cccSFangrui Song uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { 4212*207e5cccSFangrui Song return vmul_n_u16(a, b); 4213*207e5cccSFangrui Song } 4214*207e5cccSFangrui Song 4215*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_n_u16( 4216*207e5cccSFangrui Song // CHECK-NEXT: entry: 4217*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 4218*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4219*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4220*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4221*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4 4222*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5 4223*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6 4224*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7 4225*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[A:%.*]], [[VECINIT7_I]] 4226*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[MUL_I]] 4227*207e5cccSFangrui Song // 4228*207e5cccSFangrui Song uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { 4229*207e5cccSFangrui Song return vmulq_n_u16(a, b); 4230*207e5cccSFangrui Song } 4231*207e5cccSFangrui Song 4232*207e5cccSFangrui Song // CHECK-LABEL: @test_vmul_n_u32( 4233*207e5cccSFangrui Song // CHECK-NEXT: entry: 4234*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4235*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4236*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[A:%.*]], [[VECINIT1_I]] 4237*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[MUL_I]] 4238*207e5cccSFangrui Song // 4239*207e5cccSFangrui Song uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { 4240*207e5cccSFangrui Song return vmul_n_u32(a, b); 4241*207e5cccSFangrui Song } 4242*207e5cccSFangrui Song 4243*207e5cccSFangrui Song // CHECK-LABEL: @test_vmulq_n_u32( 4244*207e5cccSFangrui Song // CHECK-NEXT: entry: 4245*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 4246*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4247*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2 4248*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3 4249*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[A:%.*]], [[VECINIT3_I]] 4250*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[MUL_I]] 4251*207e5cccSFangrui Song // 4252*207e5cccSFangrui Song uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { 4253*207e5cccSFangrui Song return vmulq_n_u32(a, b); 4254*207e5cccSFangrui Song } 4255*207e5cccSFangrui Song 4256*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_n_s16( 4257*207e5cccSFangrui Song // CHECK-NEXT: entry: 4258*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4259*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4260*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4261*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4262*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 4263*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4264*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]]) 4265*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 4266*207e5cccSFangrui Song // 4267*207e5cccSFangrui Song int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { 4268*207e5cccSFangrui Song return vmull_n_s16(a, b); 4269*207e5cccSFangrui Song } 4270*207e5cccSFangrui Song 4271*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_n_s32( 4272*207e5cccSFangrui Song // CHECK-NEXT: entry: 4273*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4274*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4275*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 4276*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4277*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]]) 4278*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 4279*207e5cccSFangrui Song // 4280*207e5cccSFangrui Song int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { 4281*207e5cccSFangrui Song return vmull_n_s32(a, b); 4282*207e5cccSFangrui Song } 4283*207e5cccSFangrui Song 4284*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_n_u16( 4285*207e5cccSFangrui Song // CHECK-NEXT: entry: 4286*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4287*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4288*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4289*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4290*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 4291*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4292*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]]) 4293*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 4294*207e5cccSFangrui Song // 4295*207e5cccSFangrui Song uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { 4296*207e5cccSFangrui Song return vmull_n_u16(a, b); 4297*207e5cccSFangrui Song } 4298*207e5cccSFangrui Song 4299*207e5cccSFangrui Song // CHECK-LABEL: @test_vmull_n_u32( 4300*207e5cccSFangrui Song // CHECK-NEXT: entry: 4301*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4302*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4303*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 4304*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4305*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]]) 4306*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 4307*207e5cccSFangrui Song // 4308*207e5cccSFangrui Song uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { 4309*207e5cccSFangrui Song return vmull_n_u32(a, b); 4310*207e5cccSFangrui Song } 4311*207e5cccSFangrui Song 4312*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_n_s16( 4313*207e5cccSFangrui Song // CHECK-NEXT: entry: 4314*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4315*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4316*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4317*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4318*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 4319*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4320*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]]) 4321*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> 4322*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULL_V2_I_I]] 4323*207e5cccSFangrui Song // 4324*207e5cccSFangrui Song int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { 4325*207e5cccSFangrui Song return vqdmull_n_s16(a, b); 4326*207e5cccSFangrui Song } 4327*207e5cccSFangrui Song 4328*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmull_n_s32( 4329*207e5cccSFangrui Song // CHECK-NEXT: entry: 4330*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4331*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4332*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 4333*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4334*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]]) 4335*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> 4336*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMULL_V2_I_I]] 4337*207e5cccSFangrui Song // 4338*207e5cccSFangrui Song int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { 4339*207e5cccSFangrui Song return vqdmull_n_s32(a, b); 4340*207e5cccSFangrui Song } 4341*207e5cccSFangrui Song 4342*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_n_s16( 4343*207e5cccSFangrui Song // CHECK-NEXT: entry: 4344*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4345*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4346*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4347*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4348*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 4349*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4350*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]]) 4351*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 4352*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQDMULH_V2_I]] 4353*207e5cccSFangrui Song // 4354*207e5cccSFangrui Song int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { 4355*207e5cccSFangrui Song return vqdmulh_n_s16(a, b); 4356*207e5cccSFangrui Song } 4357*207e5cccSFangrui Song 4358*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_n_s16( 4359*207e5cccSFangrui Song // CHECK-NEXT: entry: 4360*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 4361*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4362*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4363*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4364*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4 4365*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5 4366*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6 4367*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7 4368*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 4369*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 4370*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[VECINIT7_I]]) 4371*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 4372*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQDMULHQ_V2_I]] 4373*207e5cccSFangrui Song // 4374*207e5cccSFangrui Song int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { 4375*207e5cccSFangrui Song return vqdmulhq_n_s16(a, b); 4376*207e5cccSFangrui Song } 4377*207e5cccSFangrui Song 4378*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_n_s32( 4379*207e5cccSFangrui Song // CHECK-NEXT: entry: 4380*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4381*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4382*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 4383*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4384*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]]) 4385*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 4386*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQDMULH_V2_I]] 4387*207e5cccSFangrui Song // 4388*207e5cccSFangrui Song int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { 4389*207e5cccSFangrui Song return vqdmulh_n_s32(a, b); 4390*207e5cccSFangrui Song } 4391*207e5cccSFangrui Song 4392*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_n_s32( 4393*207e5cccSFangrui Song // CHECK-NEXT: entry: 4394*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 4395*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4396*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2 4397*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3 4398*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 4399*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 4400*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[VECINIT3_I]]) 4401*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 4402*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULHQ_V2_I]] 4403*207e5cccSFangrui Song // 4404*207e5cccSFangrui Song int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { 4405*207e5cccSFangrui Song return vqdmulhq_n_s32(a, b); 4406*207e5cccSFangrui Song } 4407*207e5cccSFangrui Song 4408*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_n_s16( 4409*207e5cccSFangrui Song // CHECK-NEXT: entry: 4410*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i32 0 4411*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4412*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4413*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4414*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 4415*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4416*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[VECINIT3_I]]) 4417*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 4418*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMULH_V2_I]] 4419*207e5cccSFangrui Song // 4420*207e5cccSFangrui Song int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { 4421*207e5cccSFangrui Song return vqrdmulh_n_s16(a, b); 4422*207e5cccSFangrui Song } 4423*207e5cccSFangrui Song 4424*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_n_s16( 4425*207e5cccSFangrui Song // CHECK-NEXT: entry: 4426*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[B:%.*]], i32 0 4427*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[B]], i32 1 4428*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[B]], i32 2 4429*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[B]], i32 3 4430*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[B]], i32 4 4431*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[B]], i32 5 4432*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[B]], i32 6 4433*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[B]], i32 7 4434*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 4435*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> 4436*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[VECINIT7_I]]) 4437*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 4438*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMULHQ_V2_I]] 4439*207e5cccSFangrui Song // 4440*207e5cccSFangrui Song int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { 4441*207e5cccSFangrui Song return vqrdmulhq_n_s16(a, b); 4442*207e5cccSFangrui Song } 4443*207e5cccSFangrui Song 4444*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_n_s32( 4445*207e5cccSFangrui Song // CHECK-NEXT: entry: 4446*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0 4447*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4448*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 4449*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4450*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[VECINIT1_I]]) 4451*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 4452*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMULH_V2_I]] 4453*207e5cccSFangrui Song // 4454*207e5cccSFangrui Song int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { 4455*207e5cccSFangrui Song return vqrdmulh_n_s32(a, b); 4456*207e5cccSFangrui Song } 4457*207e5cccSFangrui Song 4458*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_n_s32( 4459*207e5cccSFangrui Song // CHECK-NEXT: entry: 4460*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i32 0 4461*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[B]], i32 1 4462*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[B]], i32 2 4463*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[B]], i32 3 4464*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 4465*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> 4466*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[VECINIT3_I]]) 4467*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 4468*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMULHQ_V2_I]] 4469*207e5cccSFangrui Song // 4470*207e5cccSFangrui Song int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { 4471*207e5cccSFangrui Song return vqrdmulhq_n_s32(a, b); 4472*207e5cccSFangrui Song } 4473*207e5cccSFangrui Song 4474*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_n_s16( 4475*207e5cccSFangrui Song // CHECK-NEXT: entry: 4476*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4477*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4478*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4479*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4480*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B:%.*]], [[VECINIT3_I]] 4481*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A:%.*]], [[MUL_I]] 4482*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD_I]] 4483*207e5cccSFangrui Song // 4484*207e5cccSFangrui Song int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 4485*207e5cccSFangrui Song return vmla_n_s16(a, b, c); 4486*207e5cccSFangrui Song } 4487*207e5cccSFangrui Song 4488*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_n_s16( 4489*207e5cccSFangrui Song // CHECK-NEXT: entry: 4490*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 4491*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4492*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4493*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4494*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4 4495*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5 4496*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6 4497*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7 4498*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B:%.*]], [[VECINIT7_I]] 4499*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A:%.*]], [[MUL_I]] 4500*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD_I]] 4501*207e5cccSFangrui Song // 4502*207e5cccSFangrui Song int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 4503*207e5cccSFangrui Song return vmlaq_n_s16(a, b, c); 4504*207e5cccSFangrui Song } 4505*207e5cccSFangrui Song 4506*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_n_s32( 4507*207e5cccSFangrui Song // CHECK-NEXT: entry: 4508*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4509*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4510*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B:%.*]], [[VECINIT1_I]] 4511*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A:%.*]], [[MUL_I]] 4512*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD_I]] 4513*207e5cccSFangrui Song // 4514*207e5cccSFangrui Song int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 4515*207e5cccSFangrui Song return vmla_n_s32(a, b, c); 4516*207e5cccSFangrui Song } 4517*207e5cccSFangrui Song 4518*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_n_s32( 4519*207e5cccSFangrui Song // CHECK-NEXT: entry: 4520*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 4521*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4522*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2 4523*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3 4524*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[VECINIT3_I]] 4525*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[MUL_I]] 4526*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD_I]] 4527*207e5cccSFangrui Song // 4528*207e5cccSFangrui Song int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 4529*207e5cccSFangrui Song return vmlaq_n_s32(a, b, c); 4530*207e5cccSFangrui Song } 4531*207e5cccSFangrui Song 4532*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_n_u16( 4533*207e5cccSFangrui Song // CHECK-NEXT: entry: 4534*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4535*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4536*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4537*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4538*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B:%.*]], [[VECINIT3_I]] 4539*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[A:%.*]], [[MUL_I]] 4540*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD_I]] 4541*207e5cccSFangrui Song // 4542*207e5cccSFangrui Song uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 4543*207e5cccSFangrui Song return vmla_n_u16(a, b, c); 4544*207e5cccSFangrui Song } 4545*207e5cccSFangrui Song 4546*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_n_u16( 4547*207e5cccSFangrui Song // CHECK-NEXT: entry: 4548*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 4549*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4550*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4551*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4552*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4 4553*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5 4554*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6 4555*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7 4556*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B:%.*]], [[VECINIT7_I]] 4557*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[A:%.*]], [[MUL_I]] 4558*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD_I]] 4559*207e5cccSFangrui Song // 4560*207e5cccSFangrui Song uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 4561*207e5cccSFangrui Song return vmlaq_n_u16(a, b, c); 4562*207e5cccSFangrui Song } 4563*207e5cccSFangrui Song 4564*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_n_u32( 4565*207e5cccSFangrui Song // CHECK-NEXT: entry: 4566*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4567*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4568*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B:%.*]], [[VECINIT1_I]] 4569*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[A:%.*]], [[MUL_I]] 4570*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD_I]] 4571*207e5cccSFangrui Song // 4572*207e5cccSFangrui Song uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 4573*207e5cccSFangrui Song return vmla_n_u32(a, b, c); 4574*207e5cccSFangrui Song } 4575*207e5cccSFangrui Song 4576*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_n_u32( 4577*207e5cccSFangrui Song // CHECK-NEXT: entry: 4578*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 4579*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4580*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2 4581*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3 4582*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[VECINIT3_I]] 4583*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[MUL_I]] 4584*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD_I]] 4585*207e5cccSFangrui Song // 4586*207e5cccSFangrui Song uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 4587*207e5cccSFangrui Song return vmlaq_n_u32(a, b, c); 4588*207e5cccSFangrui Song } 4589*207e5cccSFangrui Song 4590*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_n_s16( 4591*207e5cccSFangrui Song // CHECK-NEXT: entry: 4592*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4593*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4594*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4595*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4596*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 4597*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4598*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]]) 4599*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 4600*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD_I]] 4601*207e5cccSFangrui Song // 4602*207e5cccSFangrui Song int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4603*207e5cccSFangrui Song return vmlal_n_s16(a, b, c); 4604*207e5cccSFangrui Song } 4605*207e5cccSFangrui Song 4606*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_n_s32( 4607*207e5cccSFangrui Song // CHECK-NEXT: entry: 4608*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4609*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4610*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 4611*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4612*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]]) 4613*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 4614*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD_I]] 4615*207e5cccSFangrui Song // 4616*207e5cccSFangrui Song int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4617*207e5cccSFangrui Song return vmlal_n_s32(a, b, c); 4618*207e5cccSFangrui Song } 4619*207e5cccSFangrui Song 4620*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_n_u16( 4621*207e5cccSFangrui Song // CHECK-NEXT: entry: 4622*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4623*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4624*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4625*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4626*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 4627*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4628*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]]) 4629*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 4630*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD_I]] 4631*207e5cccSFangrui Song // 4632*207e5cccSFangrui Song uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 4633*207e5cccSFangrui Song return vmlal_n_u16(a, b, c); 4634*207e5cccSFangrui Song } 4635*207e5cccSFangrui Song 4636*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlal_n_u32( 4637*207e5cccSFangrui Song // CHECK-NEXT: entry: 4638*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4639*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4640*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 4641*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4642*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]]) 4643*207e5cccSFangrui Song // CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 4644*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[ADD_I]] 4645*207e5cccSFangrui Song // 4646*207e5cccSFangrui Song uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 4647*207e5cccSFangrui Song return vmlal_n_u32(a, b, c); 4648*207e5cccSFangrui Song } 4649*207e5cccSFangrui Song 4650*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_n_s16( 4651*207e5cccSFangrui Song // CHECK-NEXT: entry: 4652*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4653*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4654*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4655*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4656*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 4657*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 4658*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4659*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]]) 4660*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]]) 4661*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I_I]] 4662*207e5cccSFangrui Song // 4663*207e5cccSFangrui Song int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4664*207e5cccSFangrui Song return vqdmlal_n_s16(a, b, c); 4665*207e5cccSFangrui Song } 4666*207e5cccSFangrui Song 4667*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_n_s32( 4668*207e5cccSFangrui Song // CHECK-NEXT: entry: 4669*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4670*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4671*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 4672*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 4673*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4674*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]]) 4675*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]]) 4676*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I_I]] 4677*207e5cccSFangrui Song // 4678*207e5cccSFangrui Song int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4679*207e5cccSFangrui Song return vqdmlal_n_s32(a, b, c); 4680*207e5cccSFangrui Song } 4681*207e5cccSFangrui Song 4682*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_n_s16( 4683*207e5cccSFangrui Song // CHECK-NEXT: entry: 4684*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4685*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4686*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4687*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4688*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B:%.*]], [[VECINIT3_I]] 4689*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL_I]] 4690*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB_I]] 4691*207e5cccSFangrui Song // 4692*207e5cccSFangrui Song int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { 4693*207e5cccSFangrui Song return vmls_n_s16(a, b, c); 4694*207e5cccSFangrui Song } 4695*207e5cccSFangrui Song 4696*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_n_s16( 4697*207e5cccSFangrui Song // CHECK-NEXT: entry: 4698*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 4699*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4700*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4701*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4702*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4 4703*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5 4704*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6 4705*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7 4706*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B:%.*]], [[VECINIT7_I]] 4707*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL_I]] 4708*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB_I]] 4709*207e5cccSFangrui Song // 4710*207e5cccSFangrui Song int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { 4711*207e5cccSFangrui Song return vmlsq_n_s16(a, b, c); 4712*207e5cccSFangrui Song } 4713*207e5cccSFangrui Song 4714*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_n_s32( 4715*207e5cccSFangrui Song // CHECK-NEXT: entry: 4716*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4717*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4718*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B:%.*]], [[VECINIT1_I]] 4719*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL_I]] 4720*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB_I]] 4721*207e5cccSFangrui Song // 4722*207e5cccSFangrui Song int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { 4723*207e5cccSFangrui Song return vmls_n_s32(a, b, c); 4724*207e5cccSFangrui Song } 4725*207e5cccSFangrui Song 4726*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_n_s32( 4727*207e5cccSFangrui Song // CHECK-NEXT: entry: 4728*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 4729*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4730*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2 4731*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3 4732*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[VECINIT3_I]] 4733*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL_I]] 4734*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB_I]] 4735*207e5cccSFangrui Song // 4736*207e5cccSFangrui Song int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { 4737*207e5cccSFangrui Song return vmlsq_n_s32(a, b, c); 4738*207e5cccSFangrui Song } 4739*207e5cccSFangrui Song 4740*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_n_u16( 4741*207e5cccSFangrui Song // CHECK-NEXT: entry: 4742*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4743*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4744*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4745*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4746*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i16> [[B:%.*]], [[VECINIT3_I]] 4747*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL_I]] 4748*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB_I]] 4749*207e5cccSFangrui Song // 4750*207e5cccSFangrui Song uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { 4751*207e5cccSFangrui Song return vmls_n_u16(a, b, c); 4752*207e5cccSFangrui Song } 4753*207e5cccSFangrui Song 4754*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_n_u16( 4755*207e5cccSFangrui Song // CHECK-NEXT: entry: 4756*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C:%.*]], i32 0 4757*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4758*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4759*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4760*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C]], i32 4 4761*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C]], i32 5 4762*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C]], i32 6 4763*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C]], i32 7 4764*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <8 x i16> [[B:%.*]], [[VECINIT7_I]] 4765*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL_I]] 4766*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB_I]] 4767*207e5cccSFangrui Song // 4768*207e5cccSFangrui Song uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { 4769*207e5cccSFangrui Song return vmlsq_n_u16(a, b, c); 4770*207e5cccSFangrui Song } 4771*207e5cccSFangrui Song 4772*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_n_u32( 4773*207e5cccSFangrui Song // CHECK-NEXT: entry: 4774*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4775*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4776*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <2 x i32> [[B:%.*]], [[VECINIT1_I]] 4777*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL_I]] 4778*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB_I]] 4779*207e5cccSFangrui Song // 4780*207e5cccSFangrui Song uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { 4781*207e5cccSFangrui Song return vmls_n_u32(a, b, c); 4782*207e5cccSFangrui Song } 4783*207e5cccSFangrui Song 4784*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_n_u32( 4785*207e5cccSFangrui Song // CHECK-NEXT: entry: 4786*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0 4787*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4788*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C]], i32 2 4789*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C]], i32 3 4790*207e5cccSFangrui Song // CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[VECINIT3_I]] 4791*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL_I]] 4792*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB_I]] 4793*207e5cccSFangrui Song // 4794*207e5cccSFangrui Song uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { 4795*207e5cccSFangrui Song return vmlsq_n_u32(a, b, c); 4796*207e5cccSFangrui Song } 4797*207e5cccSFangrui Song 4798*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_n_s16( 4799*207e5cccSFangrui Song // CHECK-NEXT: entry: 4800*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4801*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4802*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4803*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4804*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 4805*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4806*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]]) 4807*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 4808*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB_I]] 4809*207e5cccSFangrui Song // 4810*207e5cccSFangrui Song int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4811*207e5cccSFangrui Song return vmlsl_n_s16(a, b, c); 4812*207e5cccSFangrui Song } 4813*207e5cccSFangrui Song 4814*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_n_s32( 4815*207e5cccSFangrui Song // CHECK-NEXT: entry: 4816*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4817*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4818*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 4819*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4820*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]]) 4821*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 4822*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB_I]] 4823*207e5cccSFangrui Song // 4824*207e5cccSFangrui Song int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4825*207e5cccSFangrui Song return vmlsl_n_s32(a, b, c); 4826*207e5cccSFangrui Song } 4827*207e5cccSFangrui Song 4828*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_n_u16( 4829*207e5cccSFangrui Song // CHECK-NEXT: entry: 4830*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4831*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4832*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4833*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4834*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 4835*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4836*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]]) 4837*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I_I]] 4838*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB_I]] 4839*207e5cccSFangrui Song // 4840*207e5cccSFangrui Song uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { 4841*207e5cccSFangrui Song return vmlsl_n_u16(a, b, c); 4842*207e5cccSFangrui Song } 4843*207e5cccSFangrui Song 4844*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsl_n_u32( 4845*207e5cccSFangrui Song // CHECK-NEXT: entry: 4846*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4847*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4848*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 4849*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4850*207e5cccSFangrui Song // CHECK-NEXT: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]]) 4851*207e5cccSFangrui Song // CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I_I]] 4852*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[SUB_I]] 4853*207e5cccSFangrui Song // 4854*207e5cccSFangrui Song uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { 4855*207e5cccSFangrui Song return vmlsl_n_u32(a, b, c); 4856*207e5cccSFangrui Song } 4857*207e5cccSFangrui Song 4858*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_n_s16( 4859*207e5cccSFangrui Song // CHECK-NEXT: entry: 4860*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i32 0 4861*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[C]], i32 1 4862*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[C]], i32 2 4863*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[C]], i32 3 4864*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 4865*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 4866*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> 4867*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[VECINIT3_I]]) 4868*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I_I]]) 4869*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I_I]] 4870*207e5cccSFangrui Song // 4871*207e5cccSFangrui Song int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { 4872*207e5cccSFangrui Song return vqdmlsl_n_s16(a, b, c); 4873*207e5cccSFangrui Song } 4874*207e5cccSFangrui Song 4875*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_n_s32( 4876*207e5cccSFangrui Song // CHECK-NEXT: entry: 4877*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[C:%.*]], i32 0 4878*207e5cccSFangrui Song // CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[C]], i32 1 4879*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 4880*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 4881*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> 4882*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[VECINIT1_I]]) 4883*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I_I]]) 4884*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I_I]] 4885*207e5cccSFangrui Song // 4886*207e5cccSFangrui Song int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { 4887*207e5cccSFangrui Song return vqdmlsl_n_s32(a, b, c); 4888*207e5cccSFangrui Song } 4889*207e5cccSFangrui Song 4890*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_u16_0( 4891*207e5cccSFangrui Song // CHECK-NEXT: entry: 4892*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 4893*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4894*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 4895*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 4896*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 4897*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 4898*207e5cccSFangrui Song // 4899*207e5cccSFangrui Song uint16x4_t test_vmla_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 4900*207e5cccSFangrui Song return vmla_lane_u16(a, b, v, 0); 4901*207e5cccSFangrui Song } 4902*207e5cccSFangrui Song 4903*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_u16_0( 4904*207e5cccSFangrui Song // CHECK-NEXT: entry: 4905*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 4906*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4907*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> zeroinitializer 4908*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 4909*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 4910*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 4911*207e5cccSFangrui Song // 4912*207e5cccSFangrui Song uint16x8_t test_vmlaq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 4913*207e5cccSFangrui Song return vmlaq_lane_u16(a, b, v, 0); 4914*207e5cccSFangrui Song } 4915*207e5cccSFangrui Song 4916*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_u32_0( 4917*207e5cccSFangrui Song // CHECK-NEXT: entry: 4918*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 4919*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4920*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 4921*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 4922*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 4923*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 4924*207e5cccSFangrui Song // 4925*207e5cccSFangrui Song uint32x2_t test_vmla_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 4926*207e5cccSFangrui Song return vmla_lane_u32(a, b, v, 0); 4927*207e5cccSFangrui Song } 4928*207e5cccSFangrui Song 4929*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_u32_0( 4930*207e5cccSFangrui Song // CHECK-NEXT: entry: 4931*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 4932*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4933*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> zeroinitializer 4934*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 4935*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 4936*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 4937*207e5cccSFangrui Song // 4938*207e5cccSFangrui Song uint32x4_t test_vmlaq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 4939*207e5cccSFangrui Song return vmlaq_lane_u32(a, b, v, 0); 4940*207e5cccSFangrui Song } 4941*207e5cccSFangrui Song 4942*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_u16_0( 4943*207e5cccSFangrui Song // CHECK-NEXT: entry: 4944*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 4945*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4946*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 4947*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 4948*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 4949*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 4950*207e5cccSFangrui Song // 4951*207e5cccSFangrui Song uint16x4_t test_vmla_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 4952*207e5cccSFangrui Song return vmla_laneq_u16(a, b, v, 0); 4953*207e5cccSFangrui Song } 4954*207e5cccSFangrui Song 4955*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_u16_0( 4956*207e5cccSFangrui Song // CHECK-NEXT: entry: 4957*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 4958*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4959*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer 4960*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 4961*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 4962*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 4963*207e5cccSFangrui Song // 4964*207e5cccSFangrui Song uint16x8_t test_vmlaq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 4965*207e5cccSFangrui Song return vmlaq_laneq_u16(a, b, v, 0); 4966*207e5cccSFangrui Song } 4967*207e5cccSFangrui Song 4968*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_u32_0( 4969*207e5cccSFangrui Song // CHECK-NEXT: entry: 4970*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 4971*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4972*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 4973*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 4974*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 4975*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 4976*207e5cccSFangrui Song // 4977*207e5cccSFangrui Song uint32x2_t test_vmla_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 4978*207e5cccSFangrui Song return vmla_laneq_u32(a, b, v, 0); 4979*207e5cccSFangrui Song } 4980*207e5cccSFangrui Song 4981*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_u32_0( 4982*207e5cccSFangrui Song // CHECK-NEXT: entry: 4983*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 4984*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4985*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer 4986*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 4987*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 4988*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 4989*207e5cccSFangrui Song // 4990*207e5cccSFangrui Song uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 4991*207e5cccSFangrui Song return vmlaq_laneq_u32(a, b, v, 0); 4992*207e5cccSFangrui Song } 4993*207e5cccSFangrui Song 4994*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_laneq_s16_0( 4995*207e5cccSFangrui Song // CHECK-NEXT: entry: 4996*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 4997*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4998*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 4999*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5000*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 5001*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5002*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 5003*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5004*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 5005*207e5cccSFangrui Song // 5006*207e5cccSFangrui Song int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 5007*207e5cccSFangrui Song return vqdmlal_laneq_s16(a, b, v, 0); 5008*207e5cccSFangrui Song } 5009*207e5cccSFangrui Song 5010*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_laneq_s32_0( 5011*207e5cccSFangrui Song // CHECK-NEXT: entry: 5012*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5013*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5014*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 5015*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5016*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 5017*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5018*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 5019*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5020*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 5021*207e5cccSFangrui Song // 5022*207e5cccSFangrui Song int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 5023*207e5cccSFangrui Song return vqdmlal_laneq_s32(a, b, v, 0); 5024*207e5cccSFangrui Song } 5025*207e5cccSFangrui Song 5026*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_laneq_s16_0( 5027*207e5cccSFangrui Song // CHECK-NEXT: entry: 5028*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 5029*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5030*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5031*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 5032*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5033*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 5034*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5035*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 5036*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5037*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 5038*207e5cccSFangrui Song // 5039*207e5cccSFangrui Song int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 5040*207e5cccSFangrui Song return vqdmlal_high_laneq_s16(a, b, v, 0); 5041*207e5cccSFangrui Song } 5042*207e5cccSFangrui Song 5043*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_laneq_s32_0( 5044*207e5cccSFangrui Song // CHECK-NEXT: entry: 5045*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 5046*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5047*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5048*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 5049*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5050*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 5051*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5052*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 5053*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5054*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 5055*207e5cccSFangrui Song // 5056*207e5cccSFangrui Song int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 5057*207e5cccSFangrui Song return vqdmlal_high_laneq_s32(a, b, v, 0); 5058*207e5cccSFangrui Song } 5059*207e5cccSFangrui Song 5060*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_u16_0( 5061*207e5cccSFangrui Song // CHECK-NEXT: entry: 5062*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 5063*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5064*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> zeroinitializer 5065*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 5066*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 5067*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 5068*207e5cccSFangrui Song // 5069*207e5cccSFangrui Song uint16x4_t test_vmls_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 5070*207e5cccSFangrui Song return vmls_lane_u16(a, b, v, 0); 5071*207e5cccSFangrui Song } 5072*207e5cccSFangrui Song 5073*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_u16_0( 5074*207e5cccSFangrui Song // CHECK-NEXT: entry: 5075*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 5076*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5077*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> zeroinitializer 5078*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 5079*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 5080*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 5081*207e5cccSFangrui Song // 5082*207e5cccSFangrui Song uint16x8_t test_vmlsq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 5083*207e5cccSFangrui Song return vmlsq_lane_u16(a, b, v, 0); 5084*207e5cccSFangrui Song } 5085*207e5cccSFangrui Song 5086*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_u32_0( 5087*207e5cccSFangrui Song // CHECK-NEXT: entry: 5088*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 5089*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5090*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer 5091*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 5092*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 5093*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 5094*207e5cccSFangrui Song // 5095*207e5cccSFangrui Song uint32x2_t test_vmls_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 5096*207e5cccSFangrui Song return vmls_lane_u32(a, b, v, 0); 5097*207e5cccSFangrui Song } 5098*207e5cccSFangrui Song 5099*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_u32_0( 5100*207e5cccSFangrui Song // CHECK-NEXT: entry: 5101*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 5102*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5103*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> zeroinitializer 5104*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 5105*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 5106*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 5107*207e5cccSFangrui Song // 5108*207e5cccSFangrui Song uint32x4_t test_vmlsq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 5109*207e5cccSFangrui Song return vmlsq_lane_u32(a, b, v, 0); 5110*207e5cccSFangrui Song } 5111*207e5cccSFangrui Song 5112*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_u16_0( 5113*207e5cccSFangrui Song // CHECK-NEXT: entry: 5114*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5115*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5116*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 5117*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 5118*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 5119*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 5120*207e5cccSFangrui Song // 5121*207e5cccSFangrui Song uint16x4_t test_vmls_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 5122*207e5cccSFangrui Song return vmls_laneq_u16(a, b, v, 0); 5123*207e5cccSFangrui Song } 5124*207e5cccSFangrui Song 5125*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_u16_0( 5126*207e5cccSFangrui Song // CHECK-NEXT: entry: 5127*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5128*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5129*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> zeroinitializer 5130*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 5131*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 5132*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 5133*207e5cccSFangrui Song // 5134*207e5cccSFangrui Song uint16x8_t test_vmlsq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 5135*207e5cccSFangrui Song return vmlsq_laneq_u16(a, b, v, 0); 5136*207e5cccSFangrui Song } 5137*207e5cccSFangrui Song 5138*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_u32_0( 5139*207e5cccSFangrui Song // CHECK-NEXT: entry: 5140*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5141*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5142*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 5143*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 5144*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 5145*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 5146*207e5cccSFangrui Song // 5147*207e5cccSFangrui Song uint32x2_t test_vmls_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 5148*207e5cccSFangrui Song return vmls_laneq_u32(a, b, v, 0); 5149*207e5cccSFangrui Song } 5150*207e5cccSFangrui Song 5151*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_u32_0( 5152*207e5cccSFangrui Song // CHECK-NEXT: entry: 5153*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5154*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5155*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer 5156*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 5157*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 5158*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 5159*207e5cccSFangrui Song // 5160*207e5cccSFangrui Song uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 5161*207e5cccSFangrui Song return vmlsq_laneq_u32(a, b, v, 0); 5162*207e5cccSFangrui Song } 5163*207e5cccSFangrui Song 5164*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_laneq_s16_0( 5165*207e5cccSFangrui Song // CHECK-NEXT: entry: 5166*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5167*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5168*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 5169*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5170*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 5171*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5172*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 5173*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5174*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 5175*207e5cccSFangrui Song // 5176*207e5cccSFangrui Song int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { 5177*207e5cccSFangrui Song return vqdmlsl_laneq_s16(a, b, v, 0); 5178*207e5cccSFangrui Song } 5179*207e5cccSFangrui Song 5180*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_laneq_s32_0( 5181*207e5cccSFangrui Song // CHECK-NEXT: entry: 5182*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5183*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5184*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 5185*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5186*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 5187*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5188*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 5189*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5190*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 5191*207e5cccSFangrui Song // 5192*207e5cccSFangrui Song int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { 5193*207e5cccSFangrui Song return vqdmlsl_laneq_s32(a, b, v, 0); 5194*207e5cccSFangrui Song } 5195*207e5cccSFangrui Song 5196*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_laneq_s16_0( 5197*207e5cccSFangrui Song // CHECK-NEXT: entry: 5198*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 5199*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5200*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5201*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> zeroinitializer 5202*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5203*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 5204*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5205*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 5206*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5207*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 5208*207e5cccSFangrui Song // 5209*207e5cccSFangrui Song int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { 5210*207e5cccSFangrui Song return vqdmlsl_high_laneq_s16(a, b, v, 0); 5211*207e5cccSFangrui Song } 5212*207e5cccSFangrui Song 5213*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_laneq_s32_0( 5214*207e5cccSFangrui Song // CHECK-NEXT: entry: 5215*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 5216*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5217*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5218*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> zeroinitializer 5219*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5220*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 5221*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5222*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 5223*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5224*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 5225*207e5cccSFangrui Song // 5226*207e5cccSFangrui Song int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { 5227*207e5cccSFangrui Song return vqdmlsl_high_laneq_s32(a, b, v, 0); 5228*207e5cccSFangrui Song } 5229*207e5cccSFangrui Song 5230*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_laneq_s16_0( 5231*207e5cccSFangrui Song // CHECK-NEXT: entry: 5232*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 5233*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5234*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5235*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5236*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> [[VQDMULH_LANEQ_V]], <8 x i16> [[VQDMULH_LANEQ_V1]], i32 0) 5237*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQDMULH_LANEQ_V2]] 5238*207e5cccSFangrui Song // 5239*207e5cccSFangrui Song int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { 5240*207e5cccSFangrui Song return vqdmulh_laneq_s16(a, v, 0); 5241*207e5cccSFangrui Song } 5242*207e5cccSFangrui Song 5243*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_laneq_s16_0( 5244*207e5cccSFangrui Song // CHECK-NEXT: entry: 5245*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 5246*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5247*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5248*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5249*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> [[VQDMULHQ_LANEQ_V]], <8 x i16> [[VQDMULHQ_LANEQ_V1]], i32 0) 5250*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQDMULHQ_LANEQ_V2]] 5251*207e5cccSFangrui Song // 5252*207e5cccSFangrui Song int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { 5253*207e5cccSFangrui Song return vqdmulhq_laneq_s16(a, v, 0); 5254*207e5cccSFangrui Song } 5255*207e5cccSFangrui Song 5256*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_laneq_s32_0( 5257*207e5cccSFangrui Song // CHECK-NEXT: entry: 5258*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 5259*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5260*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5261*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5262*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> [[VQDMULH_LANEQ_V]], <4 x i32> [[VQDMULH_LANEQ_V1]], i32 0) 5263*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQDMULH_LANEQ_V2]] 5264*207e5cccSFangrui Song // 5265*207e5cccSFangrui Song int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { 5266*207e5cccSFangrui Song return vqdmulh_laneq_s32(a, v, 0); 5267*207e5cccSFangrui Song } 5268*207e5cccSFangrui Song 5269*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_laneq_s32_0( 5270*207e5cccSFangrui Song // CHECK-NEXT: entry: 5271*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5272*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5273*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5274*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5275*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> [[VQDMULHQ_LANEQ_V]], <4 x i32> [[VQDMULHQ_LANEQ_V1]], i32 0) 5276*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULHQ_LANEQ_V2]] 5277*207e5cccSFangrui Song // 5278*207e5cccSFangrui Song int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { 5279*207e5cccSFangrui Song return vqdmulhq_laneq_s32(a, v, 0); 5280*207e5cccSFangrui Song } 5281*207e5cccSFangrui Song 5282*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_laneq_s16_0( 5283*207e5cccSFangrui Song // CHECK-NEXT: entry: 5284*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 5285*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5286*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5287*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5288*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> [[VQRDMULH_LANEQ_V]], <8 x i16> [[VQRDMULH_LANEQ_V1]], i32 0) 5289*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMULH_LANEQ_V2]] 5290*207e5cccSFangrui Song // 5291*207e5cccSFangrui Song int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { 5292*207e5cccSFangrui Song return vqrdmulh_laneq_s16(a, v, 0); 5293*207e5cccSFangrui Song } 5294*207e5cccSFangrui Song 5295*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_laneq_s16_0( 5296*207e5cccSFangrui Song // CHECK-NEXT: entry: 5297*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 5298*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5299*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5300*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5301*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> [[VQRDMULHQ_LANEQ_V]], <8 x i16> [[VQRDMULHQ_LANEQ_V1]], i32 0) 5302*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMULHQ_LANEQ_V2]] 5303*207e5cccSFangrui Song // 5304*207e5cccSFangrui Song int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { 5305*207e5cccSFangrui Song return vqrdmulhq_laneq_s16(a, v, 0); 5306*207e5cccSFangrui Song } 5307*207e5cccSFangrui Song 5308*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_laneq_s32_0( 5309*207e5cccSFangrui Song // CHECK-NEXT: entry: 5310*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 5311*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5312*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5313*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5314*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> [[VQRDMULH_LANEQ_V]], <4 x i32> [[VQRDMULH_LANEQ_V1]], i32 0) 5315*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMULH_LANEQ_V2]] 5316*207e5cccSFangrui Song // 5317*207e5cccSFangrui Song int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { 5318*207e5cccSFangrui Song return vqrdmulh_laneq_s32(a, v, 0); 5319*207e5cccSFangrui Song } 5320*207e5cccSFangrui Song 5321*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_laneq_s32_0( 5322*207e5cccSFangrui Song // CHECK-NEXT: entry: 5323*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5324*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5325*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5326*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5327*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> [[VQRDMULHQ_LANEQ_V]], <4 x i32> [[VQRDMULHQ_LANEQ_V1]], i32 0) 5328*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMULHQ_LANEQ_V2]] 5329*207e5cccSFangrui Song // 5330*207e5cccSFangrui Song int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { 5331*207e5cccSFangrui Song return vqrdmulhq_laneq_s32(a, v, 0); 5332*207e5cccSFangrui Song } 5333*207e5cccSFangrui Song 5334*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_u16( 5335*207e5cccSFangrui Song // CHECK-NEXT: entry: 5336*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 5337*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5338*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5339*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 5340*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 5341*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 5342*207e5cccSFangrui Song // 5343*207e5cccSFangrui Song uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 5344*207e5cccSFangrui Song return vmla_lane_u16(a, b, v, 3); 5345*207e5cccSFangrui Song } 5346*207e5cccSFangrui Song 5347*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_u16( 5348*207e5cccSFangrui Song // CHECK-NEXT: entry: 5349*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 5350*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5351*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 5352*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 5353*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 5354*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 5355*207e5cccSFangrui Song // 5356*207e5cccSFangrui Song uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 5357*207e5cccSFangrui Song return vmlaq_lane_u16(a, b, v, 3); 5358*207e5cccSFangrui Song } 5359*207e5cccSFangrui Song 5360*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_lane_u32( 5361*207e5cccSFangrui Song // CHECK-NEXT: entry: 5362*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 5363*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5364*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 5365*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 5366*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 5367*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 5368*207e5cccSFangrui Song // 5369*207e5cccSFangrui Song uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 5370*207e5cccSFangrui Song return vmla_lane_u32(a, b, v, 1); 5371*207e5cccSFangrui Song } 5372*207e5cccSFangrui Song 5373*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_lane_u32( 5374*207e5cccSFangrui Song // CHECK-NEXT: entry: 5375*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 5376*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5377*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5378*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 5379*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 5380*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 5381*207e5cccSFangrui Song // 5382*207e5cccSFangrui Song uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 5383*207e5cccSFangrui Song return vmlaq_lane_u32(a, b, v, 1); 5384*207e5cccSFangrui Song } 5385*207e5cccSFangrui Song 5386*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_u16( 5387*207e5cccSFangrui Song // CHECK-NEXT: entry: 5388*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5389*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5390*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 5391*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 5392*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]] 5393*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[ADD]] 5394*207e5cccSFangrui Song // 5395*207e5cccSFangrui Song uint16x4_t test_vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 5396*207e5cccSFangrui Song return vmla_laneq_u16(a, b, v, 7); 5397*207e5cccSFangrui Song } 5398*207e5cccSFangrui Song 5399*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_u16( 5400*207e5cccSFangrui Song // CHECK-NEXT: entry: 5401*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5402*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5403*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 5404*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 5405*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]] 5406*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[ADD]] 5407*207e5cccSFangrui Song // 5408*207e5cccSFangrui Song uint16x8_t test_vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 5409*207e5cccSFangrui Song return vmlaq_laneq_u16(a, b, v, 7); 5410*207e5cccSFangrui Song } 5411*207e5cccSFangrui Song 5412*207e5cccSFangrui Song // CHECK-LABEL: @test_vmla_laneq_u32( 5413*207e5cccSFangrui Song // CHECK-NEXT: entry: 5414*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5415*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5416*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 5417*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 5418*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]] 5419*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[ADD]] 5420*207e5cccSFangrui Song // 5421*207e5cccSFangrui Song uint32x2_t test_vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 5422*207e5cccSFangrui Song return vmla_laneq_u32(a, b, v, 3); 5423*207e5cccSFangrui Song } 5424*207e5cccSFangrui Song 5425*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlaq_laneq_u32( 5426*207e5cccSFangrui Song // CHECK-NEXT: entry: 5427*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5428*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5429*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5430*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 5431*207e5cccSFangrui Song // CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]] 5432*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[ADD]] 5433*207e5cccSFangrui Song // 5434*207e5cccSFangrui Song uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 5435*207e5cccSFangrui Song return vmlaq_laneq_u32(a, b, v, 3); 5436*207e5cccSFangrui Song } 5437*207e5cccSFangrui Song 5438*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_laneq_s16( 5439*207e5cccSFangrui Song // CHECK-NEXT: entry: 5440*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5441*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5442*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 5443*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5444*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 5445*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5446*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 5447*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5448*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 5449*207e5cccSFangrui Song // 5450*207e5cccSFangrui Song int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 5451*207e5cccSFangrui Song return vqdmlal_laneq_s16(a, b, v, 7); 5452*207e5cccSFangrui Song } 5453*207e5cccSFangrui Song 5454*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_laneq_s32( 5455*207e5cccSFangrui Song // CHECK-NEXT: entry: 5456*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5457*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5458*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 5459*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5460*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 5461*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5462*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 5463*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5464*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 5465*207e5cccSFangrui Song // 5466*207e5cccSFangrui Song int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 5467*207e5cccSFangrui Song return vqdmlal_laneq_s32(a, b, v, 3); 5468*207e5cccSFangrui Song } 5469*207e5cccSFangrui Song 5470*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_laneq_s16( 5471*207e5cccSFangrui Song // CHECK-NEXT: entry: 5472*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 5473*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5474*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5475*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 5476*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5477*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 5478*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5479*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 5480*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5481*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLAL_V3_I]] 5482*207e5cccSFangrui Song // 5483*207e5cccSFangrui Song int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 5484*207e5cccSFangrui Song return vqdmlal_high_laneq_s16(a, b, v, 7); 5485*207e5cccSFangrui Song } 5486*207e5cccSFangrui Song 5487*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlal_high_laneq_s32( 5488*207e5cccSFangrui Song // CHECK-NEXT: entry: 5489*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 5490*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5491*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5492*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 5493*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5494*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 5495*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5496*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 5497*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5498*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLAL_V3_I]] 5499*207e5cccSFangrui Song // 5500*207e5cccSFangrui Song int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 5501*207e5cccSFangrui Song return vqdmlal_high_laneq_s32(a, b, v, 3); 5502*207e5cccSFangrui Song } 5503*207e5cccSFangrui Song 5504*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_u16( 5505*207e5cccSFangrui Song // CHECK-NEXT: entry: 5506*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 5507*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5508*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5509*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 5510*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 5511*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 5512*207e5cccSFangrui Song // 5513*207e5cccSFangrui Song uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { 5514*207e5cccSFangrui Song return vmls_lane_u16(a, b, v, 3); 5515*207e5cccSFangrui Song } 5516*207e5cccSFangrui Song 5517*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_u16( 5518*207e5cccSFangrui Song // CHECK-NEXT: entry: 5519*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[V:%.*]] to <8 x i8> 5520*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5521*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 5522*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 5523*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 5524*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 5525*207e5cccSFangrui Song // 5526*207e5cccSFangrui Song uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { 5527*207e5cccSFangrui Song return vmlsq_lane_u16(a, b, v, 3); 5528*207e5cccSFangrui Song } 5529*207e5cccSFangrui Song 5530*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_lane_u32( 5531*207e5cccSFangrui Song // CHECK-NEXT: entry: 5532*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 5533*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5534*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1> 5535*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 5536*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 5537*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 5538*207e5cccSFangrui Song // 5539*207e5cccSFangrui Song uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { 5540*207e5cccSFangrui Song return vmls_lane_u32(a, b, v, 1); 5541*207e5cccSFangrui Song } 5542*207e5cccSFangrui Song 5543*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_lane_u32( 5544*207e5cccSFangrui Song // CHECK-NEXT: entry: 5545*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> 5546*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5547*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5548*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 5549*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 5550*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 5551*207e5cccSFangrui Song // 5552*207e5cccSFangrui Song uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { 5553*207e5cccSFangrui Song return vmlsq_lane_u32(a, b, v, 1); 5554*207e5cccSFangrui Song } 5555*207e5cccSFangrui Song 5556*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_u16( 5557*207e5cccSFangrui Song // CHECK-NEXT: entry: 5558*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5559*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5560*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 5561*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]] 5562*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]] 5563*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[SUB]] 5564*207e5cccSFangrui Song // 5565*207e5cccSFangrui Song uint16x4_t test_vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { 5566*207e5cccSFangrui Song return vmls_laneq_u16(a, b, v, 7); 5567*207e5cccSFangrui Song } 5568*207e5cccSFangrui Song 5569*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_u16( 5570*207e5cccSFangrui Song // CHECK-NEXT: entry: 5571*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5572*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5573*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 5574*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]] 5575*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]] 5576*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[SUB]] 5577*207e5cccSFangrui Song // 5578*207e5cccSFangrui Song uint16x8_t test_vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { 5579*207e5cccSFangrui Song return vmlsq_laneq_u16(a, b, v, 7); 5580*207e5cccSFangrui Song } 5581*207e5cccSFangrui Song 5582*207e5cccSFangrui Song // CHECK-LABEL: @test_vmls_laneq_u32( 5583*207e5cccSFangrui Song // CHECK-NEXT: entry: 5584*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5585*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5586*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 5587*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]] 5588*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]] 5589*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[SUB]] 5590*207e5cccSFangrui Song // 5591*207e5cccSFangrui Song uint32x2_t test_vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { 5592*207e5cccSFangrui Song return vmls_laneq_u32(a, b, v, 3); 5593*207e5cccSFangrui Song } 5594*207e5cccSFangrui Song 5595*207e5cccSFangrui Song // CHECK-LABEL: @test_vmlsq_laneq_u32( 5596*207e5cccSFangrui Song // CHECK-NEXT: entry: 5597*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5598*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5599*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5600*207e5cccSFangrui Song // CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]] 5601*207e5cccSFangrui Song // CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]] 5602*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[SUB]] 5603*207e5cccSFangrui Song // 5604*207e5cccSFangrui Song uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { 5605*207e5cccSFangrui Song return vmlsq_laneq_u32(a, b, v, 3); 5606*207e5cccSFangrui Song } 5607*207e5cccSFangrui Song 5608*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_laneq_s16( 5609*207e5cccSFangrui Song // CHECK-NEXT: entry: 5610*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5611*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5612*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 5613*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5614*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> 5615*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5616*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]]) 5617*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5618*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 5619*207e5cccSFangrui Song // 5620*207e5cccSFangrui Song int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { 5621*207e5cccSFangrui Song return vqdmlsl_laneq_s16(a, b, v, 7); 5622*207e5cccSFangrui Song } 5623*207e5cccSFangrui Song 5624*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_laneq_s32( 5625*207e5cccSFangrui Song // CHECK-NEXT: entry: 5626*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5627*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5628*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 5629*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5630*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8> 5631*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5632*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]]) 5633*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5634*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 5635*207e5cccSFangrui Song // 5636*207e5cccSFangrui Song int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { 5637*207e5cccSFangrui Song return vqdmlsl_laneq_s32(a, b, v, 3); 5638*207e5cccSFangrui Song } 5639*207e5cccSFangrui Song 5640*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_laneq_s16( 5641*207e5cccSFangrui Song // CHECK-NEXT: entry: 5642*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> [[B]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 5643*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5644*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5645*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 5646*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5647*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 5648*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8> 5649*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I]], <4 x i16> [[LANE]]) 5650*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]]) 5651*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMLSL_V3_I]] 5652*207e5cccSFangrui Song // 5653*207e5cccSFangrui Song int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { 5654*207e5cccSFangrui Song return vqdmlsl_high_laneq_s16(a, b, v, 7); 5655*207e5cccSFangrui Song } 5656*207e5cccSFangrui Song 5657*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmlsl_high_laneq_s32( 5658*207e5cccSFangrui Song // CHECK-NEXT: entry: 5659*207e5cccSFangrui Song // CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[B]], <2 x i32> <i32 2, i32 3> 5660*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5661*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5662*207e5cccSFangrui Song // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 5663*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> 5664*207e5cccSFangrui Song // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 5665*207e5cccSFangrui Song // CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8> 5666*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I]], <2 x i32> [[LANE]]) 5667*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]]) 5668*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VQDMLSL_V3_I]] 5669*207e5cccSFangrui Song // 5670*207e5cccSFangrui Song int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { 5671*207e5cccSFangrui Song return vqdmlsl_high_laneq_s32(a, b, v, 3); 5672*207e5cccSFangrui Song } 5673*207e5cccSFangrui Song 5674*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_laneq_s16( 5675*207e5cccSFangrui Song // CHECK-NEXT: entry: 5676*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 5677*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5678*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5679*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5680*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v4i16.v8i16(<4 x i16> [[VQDMULH_LANEQ_V]], <8 x i16> [[VQDMULH_LANEQ_V1]], i32 7) 5681*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQDMULH_LANEQ_V2]] 5682*207e5cccSFangrui Song // 5683*207e5cccSFangrui Song int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { 5684*207e5cccSFangrui Song return vqdmulh_laneq_s16(a, v, 7); 5685*207e5cccSFangrui Song } 5686*207e5cccSFangrui Song 5687*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_laneq_s16( 5688*207e5cccSFangrui Song // CHECK-NEXT: entry: 5689*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 5690*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5691*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5692*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5693*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.laneq.v8i16.v8i16(<8 x i16> [[VQDMULHQ_LANEQ_V]], <8 x i16> [[VQDMULHQ_LANEQ_V1]], i32 7) 5694*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQDMULHQ_LANEQ_V2]] 5695*207e5cccSFangrui Song // 5696*207e5cccSFangrui Song int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { 5697*207e5cccSFangrui Song return vqdmulhq_laneq_s16(a, v, 7); 5698*207e5cccSFangrui Song } 5699*207e5cccSFangrui Song 5700*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulh_laneq_s32( 5701*207e5cccSFangrui Song // CHECK-NEXT: entry: 5702*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 5703*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5704*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5705*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5706*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULH_LANEQ_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v2i32.v4i32(<2 x i32> [[VQDMULH_LANEQ_V]], <4 x i32> [[VQDMULH_LANEQ_V1]], i32 3) 5707*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQDMULH_LANEQ_V2]] 5708*207e5cccSFangrui Song // 5709*207e5cccSFangrui Song int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { 5710*207e5cccSFangrui Song return vqdmulh_laneq_s32(a, v, 3); 5711*207e5cccSFangrui Song } 5712*207e5cccSFangrui Song 5713*207e5cccSFangrui Song // CHECK-LABEL: @test_vqdmulhq_laneq_s32( 5714*207e5cccSFangrui Song // CHECK-NEXT: entry: 5715*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5716*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5717*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5718*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5719*207e5cccSFangrui Song // CHECK-NEXT: [[VQDMULHQ_LANEQ_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.laneq.v4i32.v4i32(<4 x i32> [[VQDMULHQ_LANEQ_V]], <4 x i32> [[VQDMULHQ_LANEQ_V1]], i32 3) 5720*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQDMULHQ_LANEQ_V2]] 5721*207e5cccSFangrui Song // 5722*207e5cccSFangrui Song int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { 5723*207e5cccSFangrui Song return vqdmulhq_laneq_s32(a, v, 3); 5724*207e5cccSFangrui Song } 5725*207e5cccSFangrui Song 5726*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_laneq_s16( 5727*207e5cccSFangrui Song // CHECK-NEXT: entry: 5728*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> 5729*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5730*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5731*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5732*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V2:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v4i16.v8i16(<4 x i16> [[VQRDMULH_LANEQ_V]], <8 x i16> [[VQRDMULH_LANEQ_V1]], i32 7) 5733*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i16> [[VQRDMULH_LANEQ_V2]] 5734*207e5cccSFangrui Song // 5735*207e5cccSFangrui Song int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { 5736*207e5cccSFangrui Song return vqrdmulh_laneq_s16(a, v, 7); 5737*207e5cccSFangrui Song } 5738*207e5cccSFangrui Song 5739*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_laneq_s16( 5740*207e5cccSFangrui Song // CHECK-NEXT: entry: 5741*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> 5742*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 5743*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5744*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5745*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V2:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.laneq.v8i16.v8i16(<8 x i16> [[VQRDMULHQ_LANEQ_V]], <8 x i16> [[VQRDMULHQ_LANEQ_V1]], i32 7) 5746*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VQRDMULHQ_LANEQ_V2]] 5747*207e5cccSFangrui Song // 5748*207e5cccSFangrui Song int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { 5749*207e5cccSFangrui Song return vqrdmulhq_laneq_s16(a, v, 7); 5750*207e5cccSFangrui Song } 5751*207e5cccSFangrui Song 5752*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulh_laneq_s32( 5753*207e5cccSFangrui Song // CHECK-NEXT: entry: 5754*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8> 5755*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5756*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5757*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5758*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULH_LANEQ_V2:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v2i32.v4i32(<2 x i32> [[VQRDMULH_LANEQ_V]], <4 x i32> [[VQRDMULH_LANEQ_V1]], i32 3) 5759*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i32> [[VQRDMULH_LANEQ_V2]] 5760*207e5cccSFangrui Song // 5761*207e5cccSFangrui Song int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { 5762*207e5cccSFangrui Song return vqrdmulh_laneq_s32(a, v, 3); 5763*207e5cccSFangrui Song } 5764*207e5cccSFangrui Song 5765*207e5cccSFangrui Song // CHECK-LABEL: @test_vqrdmulhq_laneq_s32( 5766*207e5cccSFangrui Song // CHECK-NEXT: entry: 5767*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> 5768*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 5769*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5770*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5771*207e5cccSFangrui Song // CHECK-NEXT: [[VQRDMULHQ_LANEQ_V2:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.laneq.v4i32.v4i32(<4 x i32> [[VQRDMULHQ_LANEQ_V]], <4 x i32> [[VQRDMULHQ_LANEQ_V1]], i32 3) 5772*207e5cccSFangrui Song // CHECK-NEXT: ret <4 x i32> [[VQRDMULHQ_LANEQ_V2]] 5773*207e5cccSFangrui Song // 5774*207e5cccSFangrui Song int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { 5775*207e5cccSFangrui Song return vqrdmulhq_laneq_s32(a, v, 3); 5776*207e5cccSFangrui Song } 5777