1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64 -target-feature +neon \ 3*207e5cccSFangrui Song // RUN: -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \ 4*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | FileCheck %s 5*207e5cccSFangrui Song 6*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 7*207e5cccSFangrui Song 8*207e5cccSFangrui Song #include <arm_neon.h> 9*207e5cccSFangrui Song 10*207e5cccSFangrui Song 11*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_u64( 12*207e5cccSFangrui Song // CHECK-NEXT: entry: 13*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 14*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 15*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 16*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 17*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] 18*207e5cccSFangrui Song // 19*207e5cccSFangrui Song uint64x2_t test_vldap1q_lane_u64(uint64_t *a, uint64x2_t b) { 20*207e5cccSFangrui Song return vldap1q_lane_u64(a, b, 1); 21*207e5cccSFangrui Song } 22*207e5cccSFangrui Song 23*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_s64( 24*207e5cccSFangrui Song // CHECK-NEXT: entry: 25*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 26*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 27*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 28*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 29*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] 30*207e5cccSFangrui Song // 31*207e5cccSFangrui Song int64x2_t test_vldap1q_lane_s64(int64_t *a, int64x2_t b) { 32*207e5cccSFangrui Song return vldap1q_lane_s64(a, b, 1); 33*207e5cccSFangrui Song } 34*207e5cccSFangrui Song 35*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_f64( 36*207e5cccSFangrui Song // CHECK-NEXT: entry: 37*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 38*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 39*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8 40*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1 41*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x double> [[VLDAP1_LANE]] 42*207e5cccSFangrui Song // 43*207e5cccSFangrui Song float64x2_t test_vldap1q_lane_f64(float64_t *a, float64x2_t b) { 44*207e5cccSFangrui Song return vldap1q_lane_f64(a, b, 1); 45*207e5cccSFangrui Song } 46*207e5cccSFangrui Song 47*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_p64( 48*207e5cccSFangrui Song // CHECK-NEXT: entry: 49*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 50*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 51*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 52*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 53*207e5cccSFangrui Song // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] 54*207e5cccSFangrui Song // 55*207e5cccSFangrui Song poly64x2_t test_vldap1q_lane_p64(poly64_t *a, poly64x2_t b) { 56*207e5cccSFangrui Song return vldap1q_lane_p64(a, b, 1); 57*207e5cccSFangrui Song } 58*207e5cccSFangrui Song 59*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_u64( 60*207e5cccSFangrui Song // CHECK-NEXT: entry: 61*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 62*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 63*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 64*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 65*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] 66*207e5cccSFangrui Song // 67*207e5cccSFangrui Song uint64x1_t test_vldap1_lane_u64(uint64_t *a, uint64x1_t b) { 68*207e5cccSFangrui Song return vldap1_lane_u64(a, b, 0); 69*207e5cccSFangrui Song } 70*207e5cccSFangrui Song 71*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_s64( 72*207e5cccSFangrui Song // CHECK-NEXT: entry: 73*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 74*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 75*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 76*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 77*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] 78*207e5cccSFangrui Song // 79*207e5cccSFangrui Song int64x1_t test_vldap1_lane_s64(int64_t *a, int64x1_t b) { 80*207e5cccSFangrui Song return vldap1_lane_s64(a, b, 0); 81*207e5cccSFangrui Song } 82*207e5cccSFangrui Song 83*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_f64( 84*207e5cccSFangrui Song // CHECK-NEXT: entry: 85*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> 86*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 87*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8 88*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0 89*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x double> [[VLDAP1_LANE]] 90*207e5cccSFangrui Song // 91*207e5cccSFangrui Song float64x1_t test_vldap1_lane_f64(float64_t *a, float64x1_t b) { 92*207e5cccSFangrui Song return vldap1_lane_f64(a, b, 0); 93*207e5cccSFangrui Song } 94*207e5cccSFangrui Song 95*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_p64( 96*207e5cccSFangrui Song // CHECK-NEXT: entry: 97*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 98*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 99*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 100*207e5cccSFangrui Song // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 101*207e5cccSFangrui Song // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] 102*207e5cccSFangrui Song // 103*207e5cccSFangrui Song poly64x1_t test_vldap1_lane_p64(poly64_t *a, poly64x1_t b) { 104*207e5cccSFangrui Song return vldap1_lane_p64(a, b, 0); 105*207e5cccSFangrui Song } 106*207e5cccSFangrui Song 107*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_u64( 108*207e5cccSFangrui Song // CHECK-NEXT: entry: 109*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 110*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 111*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 112*207e5cccSFangrui Song // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 113*207e5cccSFangrui Song // CHECK-NEXT: ret void 114*207e5cccSFangrui Song // 115*207e5cccSFangrui Song void test_vstl1q_lane_u64(uint64_t *a, uint64x2_t b) { 116*207e5cccSFangrui Song vstl1q_lane_u64(a, b, 1); 117*207e5cccSFangrui Song } 118*207e5cccSFangrui Song 119*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_s64( 120*207e5cccSFangrui Song // CHECK-NEXT: entry: 121*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 122*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 123*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 124*207e5cccSFangrui Song // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 125*207e5cccSFangrui Song // CHECK-NEXT: ret void 126*207e5cccSFangrui Song // 127*207e5cccSFangrui Song void test_vstl1q_lane_s64(int64_t *a, int64x2_t b) { 128*207e5cccSFangrui Song vstl1q_lane_s64(a, b, 1); 129*207e5cccSFangrui Song } 130*207e5cccSFangrui Song 131*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_f64( 132*207e5cccSFangrui Song // CHECK-NEXT: entry: 133*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 134*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 135*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 136*207e5cccSFangrui Song // CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8 137*207e5cccSFangrui Song // CHECK-NEXT: ret void 138*207e5cccSFangrui Song // 139*207e5cccSFangrui Song void test_vstl1q_lane_f64(float64_t *a, float64x2_t b) { 140*207e5cccSFangrui Song vstl1q_lane_f64(a, b, 1); 141*207e5cccSFangrui Song } 142*207e5cccSFangrui Song 143*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_p64( 144*207e5cccSFangrui Song // CHECK-NEXT: entry: 145*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 146*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 147*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 148*207e5cccSFangrui Song // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 149*207e5cccSFangrui Song // CHECK-NEXT: ret void 150*207e5cccSFangrui Song // 151*207e5cccSFangrui Song void test_vstl1q_lane_p64(poly64_t *a, poly64x2_t b) { 152*207e5cccSFangrui Song vstl1q_lane_p64(a, b, 1); 153*207e5cccSFangrui Song } 154*207e5cccSFangrui Song 155*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_u64( 156*207e5cccSFangrui Song // CHECK-NEXT: entry: 157*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 158*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 159*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 160*207e5cccSFangrui Song // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 161*207e5cccSFangrui Song // CHECK-NEXT: ret void 162*207e5cccSFangrui Song // 163*207e5cccSFangrui Song void test_vstl1_lane_u64(uint64_t *a, uint64x1_t b) { 164*207e5cccSFangrui Song vstl1_lane_u64(a, b, 0); 165*207e5cccSFangrui Song } 166*207e5cccSFangrui Song 167*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_s64( 168*207e5cccSFangrui Song // CHECK-NEXT: entry: 169*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 170*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 171*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 172*207e5cccSFangrui Song // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 173*207e5cccSFangrui Song // CHECK-NEXT: ret void 174*207e5cccSFangrui Song // 175*207e5cccSFangrui Song void test_vstl1_lane_s64(int64_t *a, int64x1_t b) { 176*207e5cccSFangrui Song vstl1_lane_s64(a, b, 0); 177*207e5cccSFangrui Song } 178*207e5cccSFangrui Song 179*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_f64( 180*207e5cccSFangrui Song // CHECK-NEXT: entry: 181*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> 182*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 183*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 184*207e5cccSFangrui Song // CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8 185*207e5cccSFangrui Song // CHECK-NEXT: ret void 186*207e5cccSFangrui Song // 187*207e5cccSFangrui Song void test_vstl1_lane_f64(float64_t *a, float64x1_t b) { 188*207e5cccSFangrui Song vstl1_lane_f64(a, b, 0); 189*207e5cccSFangrui Song } 190*207e5cccSFangrui Song 191*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_p64( 192*207e5cccSFangrui Song // CHECK-NEXT: entry: 193*207e5cccSFangrui Song // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 194*207e5cccSFangrui Song // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 195*207e5cccSFangrui Song // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 196*207e5cccSFangrui Song // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 197*207e5cccSFangrui Song // CHECK-NEXT: ret void 198*207e5cccSFangrui Song // 199*207e5cccSFangrui Song void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) { 200*207e5cccSFangrui Song vstl1_lane_p64(a, b, 0); 201*207e5cccSFangrui Song } 202