1*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2*207e5cccSFangrui Song // RUN: -disable-O0-optnone -emit-llvm -o - %s \ 3*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | FileCheck %s 4*207e5cccSFangrui Song 5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target || arm-registered-target 6*207e5cccSFangrui Song 7*207e5cccSFangrui Song #include <arm_neon.h> 8*207e5cccSFangrui Song 9*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_dup_u8(ptr noundef %a) #0 { 10*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 11*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 12*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 13*207e5cccSFangrui Song // CHECK: ret <16 x i8> [[LANE]] 14*207e5cccSFangrui Song uint8x16_t test_vld1q_dup_u8(uint8_t *a) { 15*207e5cccSFangrui Song return vld1q_dup_u8(a); 16*207e5cccSFangrui Song } 17*207e5cccSFangrui Song 18*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_dup_u16(ptr noundef %a) #0 { 19*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i16, ptr %a 20*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0 21*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 22*207e5cccSFangrui Song // CHECK: ret <8 x i16> [[LANE]] 23*207e5cccSFangrui Song uint16x8_t test_vld1q_dup_u16(uint16_t *a) { 24*207e5cccSFangrui Song return vld1q_dup_u16(a); 25*207e5cccSFangrui Song } 26*207e5cccSFangrui Song 27*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_dup_u32(ptr noundef %a) #0 { 28*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i32, ptr %a 29*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0 30*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 31*207e5cccSFangrui Song // CHECK: ret <4 x i32> [[LANE]] 32*207e5cccSFangrui Song uint32x4_t test_vld1q_dup_u32(uint32_t *a) { 33*207e5cccSFangrui Song return vld1q_dup_u32(a); 34*207e5cccSFangrui Song } 35*207e5cccSFangrui Song 36*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_dup_u64(ptr noundef %a) #0 { 37*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i64, ptr %a 38*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 39*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 40*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[LANE]] 41*207e5cccSFangrui Song uint64x2_t test_vld1q_dup_u64(uint64_t *a) { 42*207e5cccSFangrui Song return vld1q_dup_u64(a); 43*207e5cccSFangrui Song } 44*207e5cccSFangrui Song 45*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_dup_s8(ptr noundef %a) #0 { 46*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 47*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 48*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 49*207e5cccSFangrui Song // CHECK: ret <16 x i8> [[LANE]] 50*207e5cccSFangrui Song int8x16_t test_vld1q_dup_s8(int8_t *a) { 51*207e5cccSFangrui Song return vld1q_dup_s8(a); 52*207e5cccSFangrui Song } 53*207e5cccSFangrui Song 54*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_dup_s16(ptr noundef %a) #0 { 55*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i16, ptr %a 56*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0 57*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 58*207e5cccSFangrui Song // CHECK: ret <8 x i16> [[LANE]] 59*207e5cccSFangrui Song int16x8_t test_vld1q_dup_s16(int16_t *a) { 60*207e5cccSFangrui Song return vld1q_dup_s16(a); 61*207e5cccSFangrui Song } 62*207e5cccSFangrui Song 63*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_dup_s32(ptr noundef %a) #0 { 64*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i32, ptr %a 65*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0 66*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 67*207e5cccSFangrui Song // CHECK: ret <4 x i32> [[LANE]] 68*207e5cccSFangrui Song int32x4_t test_vld1q_dup_s32(int32_t *a) { 69*207e5cccSFangrui Song return vld1q_dup_s32(a); 70*207e5cccSFangrui Song } 71*207e5cccSFangrui Song 72*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_dup_s64(ptr noundef %a) #0 { 73*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i64, ptr %a 74*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 75*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 76*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[LANE]] 77*207e5cccSFangrui Song int64x2_t test_vld1q_dup_s64(int64_t *a) { 78*207e5cccSFangrui Song return vld1q_dup_s64(a); 79*207e5cccSFangrui Song } 80*207e5cccSFangrui Song 81*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x half> @test_vld1q_dup_f16(ptr noundef %a) #0 { 82*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load half, ptr %a 83*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <8 x half> poison, half [[TMP2]], i32 0 84*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer 85*207e5cccSFangrui Song // CHECK: ret <8 x half> [[LANE]] 86*207e5cccSFangrui Song float16x8_t test_vld1q_dup_f16(float16_t *a) { 87*207e5cccSFangrui Song return vld1q_dup_f16(a); 88*207e5cccSFangrui Song } 89*207e5cccSFangrui Song 90*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x float> @test_vld1q_dup_f32(ptr noundef %a) #0 { 91*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load float, ptr %a 92*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 93*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer 94*207e5cccSFangrui Song // CHECK: ret <4 x float> [[LANE]] 95*207e5cccSFangrui Song float32x4_t test_vld1q_dup_f32(float32_t *a) { 96*207e5cccSFangrui Song return vld1q_dup_f32(a); 97*207e5cccSFangrui Song } 98*207e5cccSFangrui Song 99*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x double> @test_vld1q_dup_f64(ptr noundef %a) #0 { 100*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load double, ptr %a 101*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 102*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP3]], <2 x i32> zeroinitializer 103*207e5cccSFangrui Song // CHECK: ret <2 x double> [[LANE]] 104*207e5cccSFangrui Song float64x2_t test_vld1q_dup_f64(float64_t *a) { 105*207e5cccSFangrui Song return vld1q_dup_f64(a); 106*207e5cccSFangrui Song } 107*207e5cccSFangrui Song 108*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_dup_p8(ptr noundef %a) #0 { 109*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 110*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0 111*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer 112*207e5cccSFangrui Song // CHECK: ret <16 x i8> [[LANE]] 113*207e5cccSFangrui Song poly8x16_t test_vld1q_dup_p8(poly8_t *a) { 114*207e5cccSFangrui Song return vld1q_dup_p8(a); 115*207e5cccSFangrui Song } 116*207e5cccSFangrui Song 117*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_dup_p16(ptr noundef %a) #0 { 118*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i16, ptr %a 119*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i32 0 120*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer 121*207e5cccSFangrui Song // CHECK: ret <8 x i16> [[LANE]] 122*207e5cccSFangrui Song poly16x8_t test_vld1q_dup_p16(poly16_t *a) { 123*207e5cccSFangrui Song return vld1q_dup_p16(a); 124*207e5cccSFangrui Song } 125*207e5cccSFangrui Song 126*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_dup_p64(ptr noundef %a) #0 { 127*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i64, ptr %a 128*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 129*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer 130*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[LANE]] 131*207e5cccSFangrui Song poly64x2_t test_vld1q_dup_p64(poly64_t *a) { 132*207e5cccSFangrui Song return vld1q_dup_p64(a); 133*207e5cccSFangrui Song } 134*207e5cccSFangrui Song 135*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_dup_u8(ptr noundef %a) #0 { 136*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 137*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0 138*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 139*207e5cccSFangrui Song // CHECK: ret <8 x i8> [[LANE]] 140*207e5cccSFangrui Song uint8x8_t test_vld1_dup_u8(uint8_t *a) { 141*207e5cccSFangrui Song return vld1_dup_u8(a); 142*207e5cccSFangrui Song } 143*207e5cccSFangrui Song 144*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_dup_u16(ptr noundef %a) #0 { 145*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i16, ptr %a 146*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0 147*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 148*207e5cccSFangrui Song // CHECK: ret <4 x i16> [[LANE]] 149*207e5cccSFangrui Song uint16x4_t test_vld1_dup_u16(uint16_t *a) { 150*207e5cccSFangrui Song return vld1_dup_u16(a); 151*207e5cccSFangrui Song } 152*207e5cccSFangrui Song 153*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_dup_u32(ptr noundef %a) #0 { 154*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i32, ptr %a 155*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 156*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 157*207e5cccSFangrui Song // CHECK: ret <2 x i32> [[LANE]] 158*207e5cccSFangrui Song uint32x2_t test_vld1_dup_u32(uint32_t *a) { 159*207e5cccSFangrui Song return vld1_dup_u32(a); 160*207e5cccSFangrui Song } 161*207e5cccSFangrui Song 162*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_dup_u64(ptr noundef %a) #0 { 163*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i64, ptr %a 164*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0 165*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 166*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[LANE]] 167*207e5cccSFangrui Song uint64x1_t test_vld1_dup_u64(uint64_t *a) { 168*207e5cccSFangrui Song return vld1_dup_u64(a); 169*207e5cccSFangrui Song } 170*207e5cccSFangrui Song 171*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_dup_s8(ptr noundef %a) #0 { 172*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 173*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0 174*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 175*207e5cccSFangrui Song // CHECK: ret <8 x i8> [[LANE]] 176*207e5cccSFangrui Song int8x8_t test_vld1_dup_s8(int8_t *a) { 177*207e5cccSFangrui Song return vld1_dup_s8(a); 178*207e5cccSFangrui Song } 179*207e5cccSFangrui Song 180*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_dup_s16(ptr noundef %a) #0 { 181*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i16, ptr %a 182*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0 183*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 184*207e5cccSFangrui Song // CHECK: ret <4 x i16> [[LANE]] 185*207e5cccSFangrui Song int16x4_t test_vld1_dup_s16(int16_t *a) { 186*207e5cccSFangrui Song return vld1_dup_s16(a); 187*207e5cccSFangrui Song } 188*207e5cccSFangrui Song 189*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_dup_s32(ptr noundef %a) #0 { 190*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i32, ptr %a 191*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0 192*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer 193*207e5cccSFangrui Song // CHECK: ret <2 x i32> [[LANE]] 194*207e5cccSFangrui Song int32x2_t test_vld1_dup_s32(int32_t *a) { 195*207e5cccSFangrui Song return vld1_dup_s32(a); 196*207e5cccSFangrui Song } 197*207e5cccSFangrui Song 198*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_dup_s64(ptr noundef %a) #0 { 199*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i64, ptr %a 200*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0 201*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 202*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[LANE]] 203*207e5cccSFangrui Song int64x1_t test_vld1_dup_s64(int64_t *a) { 204*207e5cccSFangrui Song return vld1_dup_s64(a); 205*207e5cccSFangrui Song } 206*207e5cccSFangrui Song 207*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x half> @test_vld1_dup_f16(ptr noundef %a) #0 { 208*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load half, ptr %a 209*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x half> poison, half [[TMP2]], i32 0 210*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer 211*207e5cccSFangrui Song // CHECK: ret <4 x half> [[LANE]] 212*207e5cccSFangrui Song float16x4_t test_vld1_dup_f16(float16_t *a) { 213*207e5cccSFangrui Song return vld1_dup_f16(a); 214*207e5cccSFangrui Song } 215*207e5cccSFangrui Song 216*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x float> @test_vld1_dup_f32(ptr noundef %a) #0 { 217*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load float, ptr %a 218*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 219*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer 220*207e5cccSFangrui Song // CHECK: ret <2 x float> [[LANE]] 221*207e5cccSFangrui Song float32x2_t test_vld1_dup_f32(float32_t *a) { 222*207e5cccSFangrui Song return vld1_dup_f32(a); 223*207e5cccSFangrui Song } 224*207e5cccSFangrui Song 225*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x double> @test_vld1_dup_f64(ptr noundef %a) #0 { 226*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load double, ptr %a 227*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <1 x double> poison, double [[TMP2]], i32 0 228*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer 229*207e5cccSFangrui Song // CHECK: ret <1 x double> [[LANE]] 230*207e5cccSFangrui Song float64x1_t test_vld1_dup_f64(float64_t *a) { 231*207e5cccSFangrui Song return vld1_dup_f64(a); 232*207e5cccSFangrui Song } 233*207e5cccSFangrui Song 234*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_dup_p8(ptr noundef %a) #0 { 235*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 236*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0 237*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer 238*207e5cccSFangrui Song // CHECK: ret <8 x i8> [[LANE]] 239*207e5cccSFangrui Song poly8x8_t test_vld1_dup_p8(poly8_t *a) { 240*207e5cccSFangrui Song return vld1_dup_p8(a); 241*207e5cccSFangrui Song } 242*207e5cccSFangrui Song 243*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_dup_p16(ptr noundef %a) #0 { 244*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i16, ptr %a 245*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i32 0 246*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer 247*207e5cccSFangrui Song // CHECK: ret <4 x i16> [[LANE]] 248*207e5cccSFangrui Song poly16x4_t test_vld1_dup_p16(poly16_t *a) { 249*207e5cccSFangrui Song return vld1_dup_p16(a); 250*207e5cccSFangrui Song } 251*207e5cccSFangrui Song 252*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_dup_p64(ptr noundef %a) #0 { 253*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load i64, ptr %a 254*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = insertelement <1 x i64> poison, i64 [[TMP2]], i32 0 255*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 256*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[LANE]] 257*207e5cccSFangrui Song poly64x1_t test_vld1_dup_p64(poly64_t *a) { 258*207e5cccSFangrui Song return vld1_dup_p64(a); 259*207e5cccSFangrui Song } 260*207e5cccSFangrui Song 261*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_dup_u64(ptr noundef %a) #0 { 262*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 263*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 264*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %a) 265*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]] 266*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 267*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16 268*207e5cccSFangrui Song // CHECK: ret %struct.uint64x2x2_t [[TMP6]] 269*207e5cccSFangrui Song uint64x2x2_t test_vld2q_dup_u64(uint64_t *a) { 270*207e5cccSFangrui Song return vld2q_dup_u64(a); 271*207e5cccSFangrui Song } 272*207e5cccSFangrui Song 273*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_dup_s64(ptr noundef %a) #0 { 274*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 275*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 276*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %a) 277*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]] 278*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 279*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16 280*207e5cccSFangrui Song // CHECK: ret %struct.int64x2x2_t [[TMP6]] 281*207e5cccSFangrui Song int64x2x2_t test_vld2q_dup_s64(int64_t *a) { 282*207e5cccSFangrui Song return vld2q_dup_s64(a); 283*207e5cccSFangrui Song } 284*207e5cccSFangrui Song 285*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_dup_f64(ptr noundef %a) #0 { 286*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 287*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 288*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr %a) 289*207e5cccSFangrui Song // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]] 290*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 291*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16 292*207e5cccSFangrui Song // CHECK: ret %struct.float64x2x2_t [[TMP6]] 293*207e5cccSFangrui Song float64x2x2_t test_vld2q_dup_f64(float64_t *a) { 294*207e5cccSFangrui Song return vld2q_dup_f64(a); 295*207e5cccSFangrui Song } 296*207e5cccSFangrui Song 297*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_dup_p64(ptr noundef %a) #0 { 298*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 299*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 300*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %a) 301*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]] 302*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 303*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16 304*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x2_t [[TMP6]] 305*207e5cccSFangrui Song poly64x2x2_t test_vld2q_dup_p64(poly64_t *a) { 306*207e5cccSFangrui Song return vld2q_dup_p64(a); 307*207e5cccSFangrui Song } 308*207e5cccSFangrui Song 309*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_dup_f64(ptr noundef %a) #0 { 310*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 311*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 312*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr %a) 313*207e5cccSFangrui Song // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]] 314*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 315*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8 316*207e5cccSFangrui Song // CHECK: ret %struct.float64x1x2_t [[TMP6]] 317*207e5cccSFangrui Song float64x1x2_t test_vld2_dup_f64(float64_t *a) { 318*207e5cccSFangrui Song return vld2_dup_f64(a); 319*207e5cccSFangrui Song } 320*207e5cccSFangrui Song 321*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_dup_p64(ptr noundef %a) #0 { 322*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 323*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 324*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %a) 325*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]] 326*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 327*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8 328*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x2_t [[TMP6]] 329*207e5cccSFangrui Song poly64x1x2_t test_vld2_dup_p64(poly64_t *a) { 330*207e5cccSFangrui Song return vld2_dup_p64(a); 331*207e5cccSFangrui Song } 332*207e5cccSFangrui Song 333*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_dup_u64(ptr noundef %a) #0 { 334*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 335*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 336*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %a) 337*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]] 338*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 339*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16 340*207e5cccSFangrui Song // CHECK: ret %struct.uint64x2x3_t [[TMP6]] 341*207e5cccSFangrui Song uint64x2x3_t test_vld3q_dup_u64(uint64_t *a) { 342*207e5cccSFangrui Song return vld3q_dup_u64(a); 343*207e5cccSFangrui Song // [{{x[0-9]+|sp}}] 344*207e5cccSFangrui Song } 345*207e5cccSFangrui Song 346*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_dup_s64(ptr noundef %a) #0 { 347*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 348*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 349*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %a) 350*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]] 351*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 352*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16 353*207e5cccSFangrui Song // CHECK: ret %struct.int64x2x3_t [[TMP6]] 354*207e5cccSFangrui Song int64x2x3_t test_vld3q_dup_s64(int64_t *a) { 355*207e5cccSFangrui Song return vld3q_dup_s64(a); 356*207e5cccSFangrui Song // [{{x[0-9]+|sp}}] 357*207e5cccSFangrui Song } 358*207e5cccSFangrui Song 359*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_dup_f64(ptr noundef %a) #0 { 360*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 361*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 362*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr %a) 363*207e5cccSFangrui Song // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]] 364*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 365*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16 366*207e5cccSFangrui Song // CHECK: ret %struct.float64x2x3_t [[TMP6]] 367*207e5cccSFangrui Song float64x2x3_t test_vld3q_dup_f64(float64_t *a) { 368*207e5cccSFangrui Song return vld3q_dup_f64(a); 369*207e5cccSFangrui Song // [{{x[0-9]+|sp}}] 370*207e5cccSFangrui Song } 371*207e5cccSFangrui Song 372*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_dup_p64(ptr noundef %a) #0 { 373*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 374*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 375*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %a) 376*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]] 377*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 378*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16 379*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x3_t [[TMP6]] 380*207e5cccSFangrui Song poly64x2x3_t test_vld3q_dup_p64(poly64_t *a) { 381*207e5cccSFangrui Song return vld3q_dup_p64(a); 382*207e5cccSFangrui Song // [{{x[0-9]+|sp}}] 383*207e5cccSFangrui Song } 384*207e5cccSFangrui Song 385*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_dup_f64(ptr noundef %a) #0 { 386*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 387*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 388*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr %a) 389*207e5cccSFangrui Song // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]] 390*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 391*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8 392*207e5cccSFangrui Song // CHECK: ret %struct.float64x1x3_t [[TMP6]] 393*207e5cccSFangrui Song float64x1x3_t test_vld3_dup_f64(float64_t *a) { 394*207e5cccSFangrui Song return vld3_dup_f64(a); 395*207e5cccSFangrui Song // [{{x[0-9]+|sp}}] 396*207e5cccSFangrui Song } 397*207e5cccSFangrui Song 398*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_dup_p64(ptr noundef %a) #0 { 399*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 400*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 401*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %a) 402*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]] 403*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 404*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8 405*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x3_t [[TMP6]] 406*207e5cccSFangrui Song poly64x1x3_t test_vld3_dup_p64(poly64_t *a) { 407*207e5cccSFangrui Song return vld3_dup_p64(a); 408*207e5cccSFangrui Song // [{{x[0-9]+|sp}}] 409*207e5cccSFangrui Song } 410*207e5cccSFangrui Song 411*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_dup_u64(ptr noundef %a) #0 { 412*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 413*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 414*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %a) 415*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]] 416*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 417*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16 418*207e5cccSFangrui Song // CHECK: ret %struct.uint64x2x4_t [[TMP6]] 419*207e5cccSFangrui Song uint64x2x4_t test_vld4q_dup_u64(uint64_t *a) { 420*207e5cccSFangrui Song return vld4q_dup_u64(a); 421*207e5cccSFangrui Song } 422*207e5cccSFangrui Song 423*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_dup_s64(ptr noundef %a) #0 { 424*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 425*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 426*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %a) 427*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]] 428*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 429*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16 430*207e5cccSFangrui Song // CHECK: ret %struct.int64x2x4_t [[TMP6]] 431*207e5cccSFangrui Song int64x2x4_t test_vld4q_dup_s64(int64_t *a) { 432*207e5cccSFangrui Song return vld4q_dup_s64(a); 433*207e5cccSFangrui Song } 434*207e5cccSFangrui Song 435*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_dup_f64(ptr noundef %a) #0 { 436*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 437*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 438*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr %a) 439*207e5cccSFangrui Song // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]] 440*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 441*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16 442*207e5cccSFangrui Song // CHECK: ret %struct.float64x2x4_t [[TMP6]] 443*207e5cccSFangrui Song float64x2x4_t test_vld4q_dup_f64(float64_t *a) { 444*207e5cccSFangrui Song return vld4q_dup_f64(a); 445*207e5cccSFangrui Song } 446*207e5cccSFangrui Song 447*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_dup_p64(ptr noundef %a) #0 { 448*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 449*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 450*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %a) 451*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]] 452*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 453*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16 454*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x4_t [[TMP6]] 455*207e5cccSFangrui Song poly64x2x4_t test_vld4q_dup_p64(poly64_t *a) { 456*207e5cccSFangrui Song return vld4q_dup_p64(a); 457*207e5cccSFangrui Song } 458*207e5cccSFangrui Song 459*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_dup_f64(ptr noundef %a) #0 { 460*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 461*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 462*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr %a) 463*207e5cccSFangrui Song // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]] 464*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 465*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8 466*207e5cccSFangrui Song // CHECK: ret %struct.float64x1x4_t [[TMP6]] 467*207e5cccSFangrui Song float64x1x4_t test_vld4_dup_f64(float64_t *a) { 468*207e5cccSFangrui Song return vld4_dup_f64(a); 469*207e5cccSFangrui Song } 470*207e5cccSFangrui Song 471*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_dup_p64(ptr noundef %a) #0 { 472*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 473*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 474*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %a) 475*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]] 476*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 477*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8 478*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x4_t [[TMP6]] 479*207e5cccSFangrui Song poly64x1x4_t test_vld4_dup_p64(poly64_t *a) { 480*207e5cccSFangrui Song return vld4_dup_p64(a); 481*207e5cccSFangrui Song } 482*207e5cccSFangrui Song 483*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_lane_u8(ptr noundef %a, <16 x i8> noundef %b) #0 { 484*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 485*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 486*207e5cccSFangrui Song // CHECK: ret <16 x i8> [[VLD1_LANE]] 487*207e5cccSFangrui Song uint8x16_t test_vld1q_lane_u8(uint8_t *a, uint8x16_t b) { 488*207e5cccSFangrui Song return vld1q_lane_u8(a, b, 15); 489*207e5cccSFangrui Song } 490*207e5cccSFangrui Song 491*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_lane_u16(ptr noundef %a, <8 x i16> noundef %b) #0 { 492*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 493*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 494*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i16, ptr %a 495*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 496*207e5cccSFangrui Song // CHECK: ret <8 x i16> [[VLD1_LANE]] 497*207e5cccSFangrui Song uint16x8_t test_vld1q_lane_u16(uint16_t *a, uint16x8_t b) { 498*207e5cccSFangrui Song return vld1q_lane_u16(a, b, 7); 499*207e5cccSFangrui Song } 500*207e5cccSFangrui Song 501*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_lane_u32(ptr noundef %a, <4 x i32> noundef %b) #0 { 502*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 503*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 504*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i32, ptr %a 505*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 506*207e5cccSFangrui Song // CHECK: ret <4 x i32> [[VLD1_LANE]] 507*207e5cccSFangrui Song uint32x4_t test_vld1q_lane_u32(uint32_t *a, uint32x4_t b) { 508*207e5cccSFangrui Song return vld1q_lane_u32(a, b, 3); 509*207e5cccSFangrui Song } 510*207e5cccSFangrui Song 511*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_lane_u64(ptr noundef %a, <2 x i64> noundef %b) #0 { 512*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 513*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 514*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i64, ptr %a 515*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1 516*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VLD1_LANE]] 517*207e5cccSFangrui Song uint64x2_t test_vld1q_lane_u64(uint64_t *a, uint64x2_t b) { 518*207e5cccSFangrui Song return vld1q_lane_u64(a, b, 1); 519*207e5cccSFangrui Song } 520*207e5cccSFangrui Song 521*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_lane_s8(ptr noundef %a, <16 x i8> noundef %b) #0 { 522*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 523*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 524*207e5cccSFangrui Song // CHECK: ret <16 x i8> [[VLD1_LANE]] 525*207e5cccSFangrui Song int8x16_t test_vld1q_lane_s8(int8_t *a, int8x16_t b) { 526*207e5cccSFangrui Song return vld1q_lane_s8(a, b, 15); 527*207e5cccSFangrui Song } 528*207e5cccSFangrui Song 529*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_lane_s16(ptr noundef %a, <8 x i16> noundef %b) #0 { 530*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 531*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 532*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i16, ptr %a 533*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 534*207e5cccSFangrui Song // CHECK: ret <8 x i16> [[VLD1_LANE]] 535*207e5cccSFangrui Song int16x8_t test_vld1q_lane_s16(int16_t *a, int16x8_t b) { 536*207e5cccSFangrui Song return vld1q_lane_s16(a, b, 7); 537*207e5cccSFangrui Song } 538*207e5cccSFangrui Song 539*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i32> @test_vld1q_lane_s32(ptr noundef %a, <4 x i32> noundef %b) #0 { 540*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 541*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 542*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i32, ptr %a 543*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3 544*207e5cccSFangrui Song // CHECK: ret <4 x i32> [[VLD1_LANE]] 545*207e5cccSFangrui Song int32x4_t test_vld1q_lane_s32(int32_t *a, int32x4_t b) { 546*207e5cccSFangrui Song return vld1q_lane_s32(a, b, 3); 547*207e5cccSFangrui Song } 548*207e5cccSFangrui Song 549*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_lane_s64(ptr noundef %a, <2 x i64> noundef %b) #0 { 550*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 551*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 552*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i64, ptr %a 553*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1 554*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VLD1_LANE]] 555*207e5cccSFangrui Song int64x2_t test_vld1q_lane_s64(int64_t *a, int64x2_t b) { 556*207e5cccSFangrui Song return vld1q_lane_s64(a, b, 1); 557*207e5cccSFangrui Song } 558*207e5cccSFangrui Song 559*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x half> @test_vld1q_lane_f16(ptr noundef %a, <8 x half> noundef %b) #0 { 560*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 561*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 562*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load half, ptr %a 563*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7 564*207e5cccSFangrui Song // CHECK: ret <8 x half> [[VLD1_LANE]] 565*207e5cccSFangrui Song float16x8_t test_vld1q_lane_f16(float16_t *a, float16x8_t b) { 566*207e5cccSFangrui Song return vld1q_lane_f16(a, b, 7); 567*207e5cccSFangrui Song } 568*207e5cccSFangrui Song 569*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x float> @test_vld1q_lane_f32(ptr noundef %a, <4 x float> noundef %b) #0 { 570*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 571*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 572*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load float, ptr %a 573*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3 574*207e5cccSFangrui Song // CHECK: ret <4 x float> [[VLD1_LANE]] 575*207e5cccSFangrui Song float32x4_t test_vld1q_lane_f32(float32_t *a, float32x4_t b) { 576*207e5cccSFangrui Song return vld1q_lane_f32(a, b, 3); 577*207e5cccSFangrui Song } 578*207e5cccSFangrui Song 579*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x double> @test_vld1q_lane_f64(ptr noundef %a, <2 x double> noundef %b) #0 { 580*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 581*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 582*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load double, ptr %a 583*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP4]], i32 1 584*207e5cccSFangrui Song // CHECK: ret <2 x double> [[VLD1_LANE]] 585*207e5cccSFangrui Song float64x2_t test_vld1q_lane_f64(float64_t *a, float64x2_t b) { 586*207e5cccSFangrui Song return vld1q_lane_f64(a, b, 1); 587*207e5cccSFangrui Song } 588*207e5cccSFangrui Song 589*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <16 x i8> @test_vld1q_lane_p8(ptr noundef %a, <16 x i8> noundef %b) #0 { 590*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 591*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 592*207e5cccSFangrui Song // CHECK: ret <16 x i8> [[VLD1_LANE]] 593*207e5cccSFangrui Song poly8x16_t test_vld1q_lane_p8(poly8_t *a, poly8x16_t b) { 594*207e5cccSFangrui Song return vld1q_lane_p8(a, b, 15); 595*207e5cccSFangrui Song } 596*207e5cccSFangrui Song 597*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i16> @test_vld1q_lane_p16(ptr noundef %a, <8 x i16> noundef %b) #0 { 598*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 599*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 600*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i16, ptr %a 601*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7 602*207e5cccSFangrui Song // CHECK: ret <8 x i16> [[VLD1_LANE]] 603*207e5cccSFangrui Song poly16x8_t test_vld1q_lane_p16(poly16_t *a, poly16x8_t b) { 604*207e5cccSFangrui Song return vld1q_lane_p16(a, b, 7); 605*207e5cccSFangrui Song } 606*207e5cccSFangrui Song 607*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_lane_p64(ptr noundef %a, <2 x i64> noundef %b) #0 { 608*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 609*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 610*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i64, ptr %a 611*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP4]], i32 1 612*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VLD1_LANE]] 613*207e5cccSFangrui Song poly64x2_t test_vld1q_lane_p64(poly64_t *a, poly64x2_t b) { 614*207e5cccSFangrui Song return vld1q_lane_p64(a, b, 1); 615*207e5cccSFangrui Song } 616*207e5cccSFangrui Song 617*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_lane_u8(ptr noundef %a, <8 x i8> noundef %b) #0 { 618*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 619*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 620*207e5cccSFangrui Song // CHECK: ret <8 x i8> [[VLD1_LANE]] 621*207e5cccSFangrui Song uint8x8_t test_vld1_lane_u8(uint8_t *a, uint8x8_t b) { 622*207e5cccSFangrui Song return vld1_lane_u8(a, b, 7); 623*207e5cccSFangrui Song } 624*207e5cccSFangrui Song 625*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_lane_u16(ptr noundef %a, <4 x i16> noundef %b) #0 { 626*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 627*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 628*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i16, ptr %a 629*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 630*207e5cccSFangrui Song // CHECK: ret <4 x i16> [[VLD1_LANE]] 631*207e5cccSFangrui Song uint16x4_t test_vld1_lane_u16(uint16_t *a, uint16x4_t b) { 632*207e5cccSFangrui Song return vld1_lane_u16(a, b, 3); 633*207e5cccSFangrui Song } 634*207e5cccSFangrui Song 635*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_lane_u32(ptr noundef %a, <2 x i32> noundef %b) #0 { 636*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 637*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 638*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i32, ptr %a 639*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 640*207e5cccSFangrui Song // CHECK: ret <2 x i32> [[VLD1_LANE]] 641*207e5cccSFangrui Song uint32x2_t test_vld1_lane_u32(uint32_t *a, uint32x2_t b) { 642*207e5cccSFangrui Song return vld1_lane_u32(a, b, 1); 643*207e5cccSFangrui Song } 644*207e5cccSFangrui Song 645*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_lane_u64(ptr noundef %a, <1 x i64> noundef %b) #0 { 646*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 647*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 648*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i64, ptr %a 649*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 650*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VLD1_LANE]] 651*207e5cccSFangrui Song uint64x1_t test_vld1_lane_u64(uint64_t *a, uint64x1_t b) { 652*207e5cccSFangrui Song return vld1_lane_u64(a, b, 0); 653*207e5cccSFangrui Song } 654*207e5cccSFangrui Song 655*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_lane_s8(ptr noundef %a, <8 x i8> noundef %b) #0 { 656*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 657*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 658*207e5cccSFangrui Song // CHECK: ret <8 x i8> [[VLD1_LANE]] 659*207e5cccSFangrui Song int8x8_t test_vld1_lane_s8(int8_t *a, int8x8_t b) { 660*207e5cccSFangrui Song return vld1_lane_s8(a, b, 7); 661*207e5cccSFangrui Song } 662*207e5cccSFangrui Song 663*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_lane_s16(ptr noundef %a, <4 x i16> noundef %b) #0 { 664*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 665*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 666*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i16, ptr %a 667*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 668*207e5cccSFangrui Song // CHECK: ret <4 x i16> [[VLD1_LANE]] 669*207e5cccSFangrui Song int16x4_t test_vld1_lane_s16(int16_t *a, int16x4_t b) { 670*207e5cccSFangrui Song return vld1_lane_s16(a, b, 3); 671*207e5cccSFangrui Song } 672*207e5cccSFangrui Song 673*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i32> @test_vld1_lane_s32(ptr noundef %a, <2 x i32> noundef %b) #0 { 674*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 675*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 676*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i32, ptr %a 677*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1 678*207e5cccSFangrui Song // CHECK: ret <2 x i32> [[VLD1_LANE]] 679*207e5cccSFangrui Song int32x2_t test_vld1_lane_s32(int32_t *a, int32x2_t b) { 680*207e5cccSFangrui Song return vld1_lane_s32(a, b, 1); 681*207e5cccSFangrui Song } 682*207e5cccSFangrui Song 683*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_lane_s64(ptr noundef %a, <1 x i64> noundef %b) #0 { 684*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 685*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 686*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i64, ptr %a 687*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 688*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VLD1_LANE]] 689*207e5cccSFangrui Song int64x1_t test_vld1_lane_s64(int64_t *a, int64x1_t b) { 690*207e5cccSFangrui Song return vld1_lane_s64(a, b, 0); 691*207e5cccSFangrui Song } 692*207e5cccSFangrui Song 693*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x half> @test_vld1_lane_f16(ptr noundef %a, <4 x half> noundef %b) #0 { 694*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 695*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 696*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load half, ptr %a 697*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3 698*207e5cccSFangrui Song // CHECK: ret <4 x half> [[VLD1_LANE]] 699*207e5cccSFangrui Song float16x4_t test_vld1_lane_f16(float16_t *a, float16x4_t b) { 700*207e5cccSFangrui Song return vld1_lane_f16(a, b, 3); 701*207e5cccSFangrui Song } 702*207e5cccSFangrui Song 703*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x float> @test_vld1_lane_f32(ptr noundef %a, <2 x float> noundef %b) #0 { 704*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 705*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 706*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load float, ptr %a 707*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1 708*207e5cccSFangrui Song // CHECK: ret <2 x float> [[VLD1_LANE]] 709*207e5cccSFangrui Song float32x2_t test_vld1_lane_f32(float32_t *a, float32x2_t b) { 710*207e5cccSFangrui Song return vld1_lane_f32(a, b, 1); 711*207e5cccSFangrui Song } 712*207e5cccSFangrui Song 713*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x double> @test_vld1_lane_f64(ptr noundef %a, <1 x double> noundef %b) #0 { 714*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 715*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 716*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load double, ptr %a 717*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x double> [[TMP2]], double [[TMP4]], i32 0 718*207e5cccSFangrui Song // CHECK: ret <1 x double> [[VLD1_LANE]] 719*207e5cccSFangrui Song float64x1_t test_vld1_lane_f64(float64_t *a, float64x1_t b) { 720*207e5cccSFangrui Song return vld1_lane_f64(a, b, 0); 721*207e5cccSFangrui Song } 722*207e5cccSFangrui Song 723*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <8 x i8> @test_vld1_lane_p8(ptr noundef %a, <8 x i8> noundef %b) #0 { 724*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = load i8, ptr %a 725*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 726*207e5cccSFangrui Song // CHECK: ret <8 x i8> [[VLD1_LANE]] 727*207e5cccSFangrui Song poly8x8_t test_vld1_lane_p8(poly8_t *a, poly8x8_t b) { 728*207e5cccSFangrui Song return vld1_lane_p8(a, b, 7); 729*207e5cccSFangrui Song } 730*207e5cccSFangrui Song 731*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <4 x i16> @test_vld1_lane_p16(ptr noundef %a, <4 x i16> noundef %b) #0 { 732*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 733*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 734*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i16, ptr %a 735*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3 736*207e5cccSFangrui Song // CHECK: ret <4 x i16> [[VLD1_LANE]] 737*207e5cccSFangrui Song poly16x4_t test_vld1_lane_p16(poly16_t *a, poly16x4_t b) { 738*207e5cccSFangrui Song return vld1_lane_p16(a, b, 3); 739*207e5cccSFangrui Song } 740*207e5cccSFangrui Song 741*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_lane_p64(ptr noundef %a, <1 x i64> noundef %b) #0 { 742*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 743*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 744*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load i64, ptr %a 745*207e5cccSFangrui Song // CHECK: [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0 746*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VLD1_LANE]] 747*207e5cccSFangrui Song poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) { 748*207e5cccSFangrui Song return vld1_lane_p64(a, b, 0); 749*207e5cccSFangrui Song } 750*207e5cccSFangrui Song 751*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int8x16x2_t @test_vld2q_lane_s8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 { 752*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 753*207e5cccSFangrui Song // CHECK: [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16 754*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 755*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 756*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[SRC]], i32 0, i32 0 757*207e5cccSFangrui Song // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16 758*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 32, i1 false) 759*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0 760*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 761*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 762*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0 763*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 764*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 765*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %ptr) 766*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], ptr [[__RET]] 767*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 768*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16 769*207e5cccSFangrui Song // CHECK: ret %struct.int8x16x2_t [[TMP8]] 770*207e5cccSFangrui Song int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { 771*207e5cccSFangrui Song return vld2q_lane_s8(ptr, src, 15); 772*207e5cccSFangrui Song } 773*207e5cccSFangrui Song 774*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint8x16x2_t @test_vld2q_lane_u8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 { 775*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 776*207e5cccSFangrui Song // CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16 777*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 778*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 779*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[SRC]], i32 0, i32 0 780*207e5cccSFangrui Song // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16 781*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 32, i1 false) 782*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0 783*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 784*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 785*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0 786*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 787*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 788*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %ptr) 789*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], ptr [[__RET]] 790*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 791*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16 792*207e5cccSFangrui Song // CHECK: ret %struct.uint8x16x2_t [[TMP8]] 793*207e5cccSFangrui Song uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { 794*207e5cccSFangrui Song return vld2q_lane_u8(ptr, src, 15); 795*207e5cccSFangrui Song } 796*207e5cccSFangrui Song 797*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly8x16x2_t @test_vld2q_lane_p8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 { 798*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 799*207e5cccSFangrui Song // CHECK: [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16 800*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 801*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 802*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[SRC]], i32 0, i32 0 803*207e5cccSFangrui Song // CHECK: store [2 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16 804*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 32, i1 false) 805*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0 806*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 807*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 808*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0 809*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 810*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 811*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %ptr) 812*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], ptr [[__RET]] 813*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 814*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16 815*207e5cccSFangrui Song // CHECK: ret %struct.poly8x16x2_t [[TMP8]] 816*207e5cccSFangrui Song poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { 817*207e5cccSFangrui Song return vld2q_lane_p8(ptr, src, 15); 818*207e5cccSFangrui Song } 819*207e5cccSFangrui Song 820*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int8x16x3_t @test_vld3q_lane_s8(ptr noundef %ptr, [3 x <16 x i8>] alignstack(16) %src.coerce) #0 { 821*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 822*207e5cccSFangrui Song // CHECK: [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16 823*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 824*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 825*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[SRC]], i32 0, i32 0 826*207e5cccSFangrui Song // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16 827*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 48, i1 false) 828*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 829*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 830*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 831*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 832*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 833*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 834*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 835*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 836*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 837*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %ptr) 838*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], ptr [[__RET]] 839*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 840*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16 841*207e5cccSFangrui Song // CHECK: ret %struct.int8x16x3_t [[TMP9]] 842*207e5cccSFangrui Song int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { 843*207e5cccSFangrui Song return vld3q_lane_s8(ptr, src, 15); 844*207e5cccSFangrui Song } 845*207e5cccSFangrui Song 846*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint8x16x3_t @test_vld3q_lane_u8(ptr noundef %ptr, [3 x <16 x i8>] alignstack(16) %src.coerce) #0 { 847*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 848*207e5cccSFangrui Song // CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16 849*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 850*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 851*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[SRC]], i32 0, i32 0 852*207e5cccSFangrui Song // CHECK: store [3 x <16 x i8>] [[SRC]].coerce, ptr [[COERCE_DIVE]], align 16 853*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[SRC]], i64 48, i1 false) 854*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 855*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 856*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 857*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 858*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 859*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 860*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 861*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 862*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 863*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %ptr) 864*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], ptr [[__RET]] 865*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 866*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16 867*207e5cccSFangrui Song // CHECK: ret %struct.uint8x16x3_t [[TMP9]] 868*207e5cccSFangrui Song uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { 869*207e5cccSFangrui Song return vld3q_lane_u8(ptr, src, 15); 870*207e5cccSFangrui Song } 871*207e5cccSFangrui Song 872*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint16x8x2_t @test_vld2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 { 873*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 874*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 875*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 876*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 877*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0 878*207e5cccSFangrui Song // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 879*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 880*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0 881*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 882*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 883*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 884*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0 885*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 886*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 887*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 888*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 889*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 890*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, ptr %a) 891*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], ptr [[__RET]] 892*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 893*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16 894*207e5cccSFangrui Song // CHECK: ret %struct.uint16x8x2_t [[TMP13]] 895*207e5cccSFangrui Song uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { 896*207e5cccSFangrui Song return vld2q_lane_u16(a, b, 7); 897*207e5cccSFangrui Song } 898*207e5cccSFangrui Song 899*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint32x4x2_t @test_vld2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 { 900*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 901*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 902*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 903*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 904*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0 905*207e5cccSFangrui Song // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 906*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 907*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0 908*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 909*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 910*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 911*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0 912*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 913*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 914*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 915*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 916*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 917*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, ptr %a) 918*207e5cccSFangrui Song // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], ptr [[__RET]] 919*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 920*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16 921*207e5cccSFangrui Song // CHECK: ret %struct.uint32x4x2_t [[TMP13]] 922*207e5cccSFangrui Song uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { 923*207e5cccSFangrui Song return vld2q_lane_u32(a, b, 3); 924*207e5cccSFangrui Song } 925*207e5cccSFangrui Song 926*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 { 927*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 928*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 929*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 930*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 931*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0 932*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 933*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 934*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0 935*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 936*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 937*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 938*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0 939*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 940*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 941*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 942*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 943*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 944*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, ptr %a) 945*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], ptr [[__RET]] 946*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 947*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16 948*207e5cccSFangrui Song // CHECK: ret %struct.uint64x2x2_t [[TMP13]] 949*207e5cccSFangrui Song uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { 950*207e5cccSFangrui Song return vld2q_lane_u64(a, b, 1); 951*207e5cccSFangrui Song } 952*207e5cccSFangrui Song 953*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int16x8x2_t @test_vld2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 { 954*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 955*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 956*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 957*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 958*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0 959*207e5cccSFangrui Song // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 960*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 961*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0 962*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 963*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 964*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 965*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0 966*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 967*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 968*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 969*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 970*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 971*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, ptr %a) 972*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], ptr [[__RET]] 973*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 974*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16 975*207e5cccSFangrui Song // CHECK: ret %struct.int16x8x2_t [[TMP13]] 976*207e5cccSFangrui Song int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { 977*207e5cccSFangrui Song return vld2q_lane_s16(a, b, 7); 978*207e5cccSFangrui Song } 979*207e5cccSFangrui Song 980*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int32x4x2_t @test_vld2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 { 981*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 982*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 983*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 984*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 985*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0 986*207e5cccSFangrui Song // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 987*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 988*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0 989*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 990*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 991*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 992*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0 993*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 994*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 995*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 996*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 997*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 998*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, ptr %a) 999*207e5cccSFangrui Song // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], ptr [[__RET]] 1000*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1001*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16 1002*207e5cccSFangrui Song // CHECK: ret %struct.int32x4x2_t [[TMP13]] 1003*207e5cccSFangrui Song int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { 1004*207e5cccSFangrui Song return vld2q_lane_s32(a, b, 3); 1005*207e5cccSFangrui Song } 1006*207e5cccSFangrui Song 1007*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 { 1008*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 1009*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 1010*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 1011*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 1012*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0 1013*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1014*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 1015*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0 1016*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 1017*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 1018*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 1019*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0 1020*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 1021*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 1022*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 1023*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 1024*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 1025*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, ptr %a) 1026*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], ptr [[__RET]] 1027*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1028*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16 1029*207e5cccSFangrui Song // CHECK: ret %struct.int64x2x2_t [[TMP13]] 1030*207e5cccSFangrui Song int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { 1031*207e5cccSFangrui Song return vld2q_lane_s64(a, b, 1); 1032*207e5cccSFangrui Song } 1033*207e5cccSFangrui Song 1034*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float16x8x2_t @test_vld2q_lane_f16(ptr noundef %a, [2 x <8 x half>] alignstack(16) %b.coerce) #0 { 1035*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 1036*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 1037*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 1038*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 1039*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0 1040*207e5cccSFangrui Song // CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1041*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 1042*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0 1043*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0 1044*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 1045*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 1046*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0 1047*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 1048*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 1049*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 1050*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half> 1051*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half> 1052*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0(<8 x half> [[TMP8]], <8 x half> [[TMP9]], i64 7, ptr %a) 1053*207e5cccSFangrui Song // CHECK: store { <8 x half>, <8 x half> } [[VLD2_LANE]], ptr [[__RET]] 1054*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1055*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16 1056*207e5cccSFangrui Song // CHECK: ret %struct.float16x8x2_t [[TMP13]] 1057*207e5cccSFangrui Song float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { 1058*207e5cccSFangrui Song return vld2q_lane_f16(a, b, 7); 1059*207e5cccSFangrui Song } 1060*207e5cccSFangrui Song 1061*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float32x4x2_t @test_vld2q_lane_f32(ptr noundef %a, [2 x <4 x float>] alignstack(16) %b.coerce) #0 { 1062*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 1063*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 1064*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 1065*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 1066*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0 1067*207e5cccSFangrui Song // CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1068*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 1069*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0 1070*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0 1071*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 1072*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 1073*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0 1074*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 1075*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 1076*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 1077*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 1078*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 1079*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> [[TMP8]], <4 x float> [[TMP9]], i64 3, ptr %a) 1080*207e5cccSFangrui Song // CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], ptr [[__RET]] 1081*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1082*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16 1083*207e5cccSFangrui Song // CHECK: ret %struct.float32x4x2_t [[TMP13]] 1084*207e5cccSFangrui Song float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { 1085*207e5cccSFangrui Song return vld2q_lane_f32(a, b, 3); 1086*207e5cccSFangrui Song } 1087*207e5cccSFangrui Song 1088*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_lane_f64(ptr noundef %a, [2 x <2 x double>] alignstack(16) %b.coerce) #0 { 1089*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 1090*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 1091*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 1092*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 1093*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0 1094*207e5cccSFangrui Song // CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1095*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 1096*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 1097*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0 1098*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 1099*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> 1100*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 1101*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 1102*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 1103*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> 1104*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> 1105*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> 1106*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> [[TMP8]], <2 x double> [[TMP9]], i64 1, ptr %a) 1107*207e5cccSFangrui Song // CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], ptr [[__RET]] 1108*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1109*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16 1110*207e5cccSFangrui Song // CHECK: ret %struct.float64x2x2_t [[TMP13]] 1111*207e5cccSFangrui Song float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { 1112*207e5cccSFangrui Song return vld2q_lane_f64(a, b, 1); 1113*207e5cccSFangrui Song } 1114*207e5cccSFangrui Song 1115*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly16x8x2_t @test_vld2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 { 1116*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 1117*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 1118*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 1119*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 1120*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0 1121*207e5cccSFangrui Song // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1122*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 1123*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0 1124*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 1125*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 1126*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 1127*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0 1128*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 1129*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 1130*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 1131*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 1132*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 1133*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, ptr %a) 1134*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], ptr [[__RET]] 1135*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1136*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16 1137*207e5cccSFangrui Song // CHECK: ret %struct.poly16x8x2_t [[TMP13]] 1138*207e5cccSFangrui Song poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { 1139*207e5cccSFangrui Song return vld2q_lane_p16(a, b, 7); 1140*207e5cccSFangrui Song } 1141*207e5cccSFangrui Song 1142*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 { 1143*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 1144*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 1145*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 1146*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 1147*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0 1148*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1149*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 1150*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 1151*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 1152*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 1153*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 1154*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 1155*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 1156*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 1157*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 1158*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 1159*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 1160*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, ptr %a) 1161*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], ptr [[__RET]] 1162*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 1163*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16 1164*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x2_t [[TMP13]] 1165*207e5cccSFangrui Song poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { 1166*207e5cccSFangrui Song return vld2q_lane_p64(a, b, 1); 1167*207e5cccSFangrui Song } 1168*207e5cccSFangrui Song 1169*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint8x8x2_t @test_vld2_lane_u8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 { 1170*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 1171*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 1172*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 1173*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 1174*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0 1175*207e5cccSFangrui Song // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1176*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1177*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0 1178*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 1179*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 1180*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0 1181*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 1182*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 1183*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a) 1184*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], ptr [[__RET]] 1185*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1186*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8 1187*207e5cccSFangrui Song // CHECK: ret %struct.uint8x8x2_t [[TMP8]] 1188*207e5cccSFangrui Song uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { 1189*207e5cccSFangrui Song return vld2_lane_u8(a, b, 7); 1190*207e5cccSFangrui Song } 1191*207e5cccSFangrui Song 1192*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint16x4x2_t @test_vld2_lane_u16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 { 1193*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 1194*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 1195*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 1196*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 1197*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0 1198*207e5cccSFangrui Song // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1199*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1200*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0 1201*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 1202*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 1203*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 1204*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0 1205*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 1206*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 1207*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 1208*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 1209*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 1210*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, ptr %a) 1211*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], ptr [[__RET]] 1212*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1213*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8 1214*207e5cccSFangrui Song // CHECK: ret %struct.uint16x4x2_t [[TMP13]] 1215*207e5cccSFangrui Song uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { 1216*207e5cccSFangrui Song return vld2_lane_u16(a, b, 3); 1217*207e5cccSFangrui Song } 1218*207e5cccSFangrui Song 1219*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint32x2x2_t @test_vld2_lane_u32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 { 1220*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 1221*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 1222*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 1223*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 1224*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0 1225*207e5cccSFangrui Song // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1226*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1227*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0 1228*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 1229*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 1230*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 1231*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0 1232*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 1233*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 1234*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 1235*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 1236*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 1237*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, ptr %a) 1238*207e5cccSFangrui Song // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], ptr [[__RET]] 1239*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1240*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8 1241*207e5cccSFangrui Song // CHECK: ret %struct.uint32x2x2_t [[TMP13]] 1242*207e5cccSFangrui Song uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { 1243*207e5cccSFangrui Song return vld2_lane_u32(a, b, 1); 1244*207e5cccSFangrui Song } 1245*207e5cccSFangrui Song 1246*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x1x2_t @test_vld2_lane_u64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 { 1247*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 1248*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 1249*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 1250*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 1251*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0 1252*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1253*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1254*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0 1255*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 1256*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 1257*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 1258*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0 1259*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 1260*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 1261*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 1262*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 1263*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 1264*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, ptr %a) 1265*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], ptr [[__RET]] 1266*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1267*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8 1268*207e5cccSFangrui Song // CHECK: ret %struct.uint64x1x2_t [[TMP13]] 1269*207e5cccSFangrui Song uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { 1270*207e5cccSFangrui Song return vld2_lane_u64(a, b, 0); 1271*207e5cccSFangrui Song } 1272*207e5cccSFangrui Song 1273*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int8x8x2_t @test_vld2_lane_s8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 { 1274*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 1275*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 1276*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 1277*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 1278*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0 1279*207e5cccSFangrui Song // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1280*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1281*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0 1282*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 1283*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 1284*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0 1285*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 1286*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 1287*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a) 1288*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], ptr [[__RET]] 1289*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1290*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8 1291*207e5cccSFangrui Song // CHECK: ret %struct.int8x8x2_t [[TMP8]] 1292*207e5cccSFangrui Song int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { 1293*207e5cccSFangrui Song return vld2_lane_s8(a, b, 7); 1294*207e5cccSFangrui Song } 1295*207e5cccSFangrui Song 1296*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int16x4x2_t @test_vld2_lane_s16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 { 1297*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 1298*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 1299*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 1300*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 1301*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0 1302*207e5cccSFangrui Song // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1303*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1304*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0 1305*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 1306*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 1307*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 1308*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0 1309*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 1310*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 1311*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 1312*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 1313*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 1314*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, ptr %a) 1315*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], ptr [[__RET]] 1316*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1317*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8 1318*207e5cccSFangrui Song // CHECK: ret %struct.int16x4x2_t [[TMP13]] 1319*207e5cccSFangrui Song int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { 1320*207e5cccSFangrui Song return vld2_lane_s16(a, b, 3); 1321*207e5cccSFangrui Song } 1322*207e5cccSFangrui Song 1323*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int32x2x2_t @test_vld2_lane_s32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 { 1324*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 1325*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 1326*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 1327*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 1328*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0 1329*207e5cccSFangrui Song // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1330*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1331*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0 1332*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 1333*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 1334*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 1335*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0 1336*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 1337*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 1338*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 1339*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 1340*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 1341*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, ptr %a) 1342*207e5cccSFangrui Song // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], ptr [[__RET]] 1343*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1344*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8 1345*207e5cccSFangrui Song // CHECK: ret %struct.int32x2x2_t [[TMP13]] 1346*207e5cccSFangrui Song int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { 1347*207e5cccSFangrui Song return vld2_lane_s32(a, b, 1); 1348*207e5cccSFangrui Song } 1349*207e5cccSFangrui Song 1350*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x1x2_t @test_vld2_lane_s64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 { 1351*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 1352*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 1353*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 1354*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 1355*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0 1356*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1357*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1358*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0 1359*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 1360*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 1361*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 1362*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0 1363*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 1364*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 1365*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 1366*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 1367*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 1368*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, ptr %a) 1369*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], ptr [[__RET]] 1370*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1371*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8 1372*207e5cccSFangrui Song // CHECK: ret %struct.int64x1x2_t [[TMP13]] 1373*207e5cccSFangrui Song int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { 1374*207e5cccSFangrui Song return vld2_lane_s64(a, b, 0); 1375*207e5cccSFangrui Song } 1376*207e5cccSFangrui Song 1377*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float16x4x2_t @test_vld2_lane_f16(ptr noundef %a, [2 x <4 x half>] alignstack(8) %b.coerce) #0 { 1378*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 1379*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 1380*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 1381*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 1382*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0 1383*207e5cccSFangrui Song // CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1384*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1385*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0 1386*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0 1387*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 1388*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 1389*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0 1390*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 1391*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 1392*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 1393*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half> 1394*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half> 1395*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0(<4 x half> [[TMP8]], <4 x half> [[TMP9]], i64 3, ptr %a) 1396*207e5cccSFangrui Song // CHECK: store { <4 x half>, <4 x half> } [[VLD2_LANE]], ptr [[__RET]] 1397*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1398*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8 1399*207e5cccSFangrui Song // CHECK: ret %struct.float16x4x2_t [[TMP13]] 1400*207e5cccSFangrui Song float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { 1401*207e5cccSFangrui Song return vld2_lane_f16(a, b, 3); 1402*207e5cccSFangrui Song } 1403*207e5cccSFangrui Song 1404*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float32x2x2_t @test_vld2_lane_f32(ptr noundef %a, [2 x <2 x float>] alignstack(8) %b.coerce) #0 { 1405*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 1406*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 1407*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 1408*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 1409*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0 1410*207e5cccSFangrui Song // CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1411*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1412*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0 1413*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0 1414*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 1415*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 1416*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0 1417*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 1418*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 1419*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 1420*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 1421*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 1422*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> [[TMP8]], <2 x float> [[TMP9]], i64 1, ptr %a) 1423*207e5cccSFangrui Song // CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], ptr [[__RET]] 1424*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1425*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8 1426*207e5cccSFangrui Song // CHECK: ret %struct.float32x2x2_t [[TMP13]] 1427*207e5cccSFangrui Song float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { 1428*207e5cccSFangrui Song return vld2_lane_f32(a, b, 1); 1429*207e5cccSFangrui Song } 1430*207e5cccSFangrui Song 1431*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_lane_f64(ptr noundef %a, [2 x <1 x double>] alignstack(8) %b.coerce) #0 { 1432*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 1433*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 1434*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 1435*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 1436*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0 1437*207e5cccSFangrui Song // CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1438*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1439*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 1440*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0 1441*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 1442*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> 1443*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 1444*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 1445*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 1446*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> 1447*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> 1448*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> 1449*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> [[TMP8]], <1 x double> [[TMP9]], i64 0, ptr %a) 1450*207e5cccSFangrui Song // CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], ptr [[__RET]] 1451*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1452*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8 1453*207e5cccSFangrui Song // CHECK: ret %struct.float64x1x2_t [[TMP13]] 1454*207e5cccSFangrui Song float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { 1455*207e5cccSFangrui Song return vld2_lane_f64(a, b, 0); 1456*207e5cccSFangrui Song } 1457*207e5cccSFangrui Song 1458*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly8x8x2_t @test_vld2_lane_p8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 { 1459*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 1460*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 1461*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 1462*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 1463*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0 1464*207e5cccSFangrui Song // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1465*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1466*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0 1467*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 1468*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 1469*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0 1470*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 1471*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 1472*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a) 1473*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], ptr [[__RET]] 1474*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1475*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8 1476*207e5cccSFangrui Song // CHECK: ret %struct.poly8x8x2_t [[TMP8]] 1477*207e5cccSFangrui Song poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { 1478*207e5cccSFangrui Song return vld2_lane_p8(a, b, 7); 1479*207e5cccSFangrui Song } 1480*207e5cccSFangrui Song 1481*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly16x4x2_t @test_vld2_lane_p16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 { 1482*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 1483*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 1484*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 1485*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 1486*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0 1487*207e5cccSFangrui Song // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1488*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1489*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0 1490*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 1491*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 1492*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 1493*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0 1494*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 1495*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 1496*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 1497*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 1498*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 1499*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, ptr %a) 1500*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], ptr [[__RET]] 1501*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1502*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8 1503*207e5cccSFangrui Song // CHECK: ret %struct.poly16x4x2_t [[TMP13]] 1504*207e5cccSFangrui Song poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { 1505*207e5cccSFangrui Song return vld2_lane_p16(a, b, 3); 1506*207e5cccSFangrui Song } 1507*207e5cccSFangrui Song 1508*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_lane_p64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 { 1509*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 1510*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 1511*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 1512*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 1513*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0 1514*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1515*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 1516*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 1517*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 1518*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 1519*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 1520*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 1521*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 1522*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 1523*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 1524*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 1525*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 1526*207e5cccSFangrui Song // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, ptr %a) 1527*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], ptr [[__RET]] 1528*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 1529*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8 1530*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x2_t [[TMP13]] 1531*207e5cccSFangrui Song poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { 1532*207e5cccSFangrui Song return vld2_lane_p64(a, b, 0); 1533*207e5cccSFangrui Song } 1534*207e5cccSFangrui Song 1535*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint16x8x3_t @test_vld3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 { 1536*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 1537*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 1538*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 1539*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 1540*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0 1541*207e5cccSFangrui Song // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1542*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1543*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 1544*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 1545*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 1546*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 1547*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 1548*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 1549*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 1550*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 1551*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 1552*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 1553*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 1554*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 1555*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 1556*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 1557*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 1558*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, ptr %a) 1559*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], ptr [[__RET]] 1560*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1561*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16 1562*207e5cccSFangrui Song // CHECK: ret %struct.uint16x8x3_t [[TMP16]] 1563*207e5cccSFangrui Song uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { 1564*207e5cccSFangrui Song return vld3q_lane_u16(a, b, 7); 1565*207e5cccSFangrui Song } 1566*207e5cccSFangrui Song 1567*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint32x4x3_t @test_vld3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 { 1568*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 1569*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 1570*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 1571*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 1572*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0 1573*207e5cccSFangrui Song // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1574*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1575*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 1576*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 1577*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 1578*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 1579*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 1580*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 1581*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 1582*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 1583*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 1584*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 1585*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 1586*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 1587*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 1588*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 1589*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 1590*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, ptr %a) 1591*207e5cccSFangrui Song // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], ptr [[__RET]] 1592*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1593*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16 1594*207e5cccSFangrui Song // CHECK: ret %struct.uint32x4x3_t [[TMP16]] 1595*207e5cccSFangrui Song uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { 1596*207e5cccSFangrui Song return vld3q_lane_u32(a, b, 3); 1597*207e5cccSFangrui Song } 1598*207e5cccSFangrui Song 1599*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 { 1600*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 1601*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 1602*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 1603*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 1604*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0 1605*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1606*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1607*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 1608*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 1609*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 1610*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 1611*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 1612*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 1613*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 1614*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 1615*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 1616*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 1617*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 1618*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 1619*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 1620*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 1621*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 1622*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, ptr %a) 1623*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], ptr [[__RET]] 1624*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1625*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16 1626*207e5cccSFangrui Song // CHECK: ret %struct.uint64x2x3_t [[TMP16]] 1627*207e5cccSFangrui Song uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { 1628*207e5cccSFangrui Song return vld3q_lane_u64(a, b, 1); 1629*207e5cccSFangrui Song } 1630*207e5cccSFangrui Song 1631*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int16x8x3_t @test_vld3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 { 1632*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 1633*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 1634*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 1635*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 1636*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0 1637*207e5cccSFangrui Song // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1638*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1639*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 1640*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 1641*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 1642*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 1643*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 1644*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 1645*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 1646*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 1647*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 1648*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 1649*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 1650*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 1651*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 1652*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 1653*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 1654*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, ptr %a) 1655*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], ptr [[__RET]] 1656*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1657*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16 1658*207e5cccSFangrui Song // CHECK: ret %struct.int16x8x3_t [[TMP16]] 1659*207e5cccSFangrui Song int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { 1660*207e5cccSFangrui Song return vld3q_lane_s16(a, b, 7); 1661*207e5cccSFangrui Song } 1662*207e5cccSFangrui Song 1663*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int32x4x3_t @test_vld3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 { 1664*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 1665*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 1666*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 1667*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 1668*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0 1669*207e5cccSFangrui Song // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1670*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1671*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 1672*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 1673*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 1674*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 1675*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 1676*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 1677*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 1678*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 1679*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 1680*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 1681*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 1682*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 1683*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 1684*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 1685*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 1686*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, ptr %a) 1687*207e5cccSFangrui Song // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], ptr [[__RET]] 1688*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1689*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16 1690*207e5cccSFangrui Song // CHECK: ret %struct.int32x4x3_t [[TMP16]] 1691*207e5cccSFangrui Song int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { 1692*207e5cccSFangrui Song return vld3q_lane_s32(a, b, 3); 1693*207e5cccSFangrui Song } 1694*207e5cccSFangrui Song 1695*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 { 1696*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 1697*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 1698*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 1699*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 1700*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0 1701*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1702*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1703*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 1704*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 1705*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 1706*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 1707*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 1708*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 1709*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 1710*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 1711*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 1712*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 1713*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 1714*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 1715*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 1716*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 1717*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 1718*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, ptr %a) 1719*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], ptr [[__RET]] 1720*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1721*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16 1722*207e5cccSFangrui Song // CHECK: ret %struct.int64x2x3_t [[TMP16]] 1723*207e5cccSFangrui Song int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { 1724*207e5cccSFangrui Song return vld3q_lane_s64(a, b, 1); 1725*207e5cccSFangrui Song } 1726*207e5cccSFangrui Song 1727*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float16x8x3_t @test_vld3q_lane_f16(ptr noundef %a, [3 x <8 x half>] alignstack(16) %b.coerce) #0 { 1728*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 1729*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 1730*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 1731*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 1732*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0 1733*207e5cccSFangrui Song // CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1734*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1735*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 1736*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0 1737*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 1738*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 1739*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 1740*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 1741*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 1742*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 1743*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 1744*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2 1745*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16 1746*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 1747*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half> 1748*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half> 1749*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half> 1750*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0(<8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i64 7, ptr %a) 1751*207e5cccSFangrui Song // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3_LANE]], ptr [[__RET]] 1752*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1753*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16 1754*207e5cccSFangrui Song // CHECK: ret %struct.float16x8x3_t [[TMP16]] 1755*207e5cccSFangrui Song float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { 1756*207e5cccSFangrui Song return vld3q_lane_f16(a, b, 7); 1757*207e5cccSFangrui Song } 1758*207e5cccSFangrui Song 1759*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float32x4x3_t @test_vld3q_lane_f32(ptr noundef %a, [3 x <4 x float>] alignstack(16) %b.coerce) #0 { 1760*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 1761*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 1762*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 1763*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 1764*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0 1765*207e5cccSFangrui Song // CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1766*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1767*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 1768*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0 1769*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 1770*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 1771*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 1772*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 1773*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 1774*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 1775*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 1776*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2 1777*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16 1778*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 1779*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 1780*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 1781*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 1782*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i64 3, ptr %a) 1783*207e5cccSFangrui Song // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], ptr [[__RET]] 1784*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1785*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16 1786*207e5cccSFangrui Song // CHECK: ret %struct.float32x4x3_t [[TMP16]] 1787*207e5cccSFangrui Song float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { 1788*207e5cccSFangrui Song return vld3q_lane_f32(a, b, 3); 1789*207e5cccSFangrui Song } 1790*207e5cccSFangrui Song 1791*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_lane_f64(ptr noundef %a, [3 x <2 x double>] alignstack(16) %b.coerce) #0 { 1792*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 1793*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 1794*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 1795*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 1796*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0 1797*207e5cccSFangrui Song // CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1798*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1799*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 1800*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0 1801*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 1802*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> 1803*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 1804*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 1805*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 1806*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> 1807*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 1808*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 1809*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 1810*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8> 1811*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> 1812*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> 1813*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double> 1814*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]], i64 1, ptr %a) 1815*207e5cccSFangrui Song // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], ptr [[__RET]] 1816*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1817*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16 1818*207e5cccSFangrui Song // CHECK: ret %struct.float64x2x3_t [[TMP16]] 1819*207e5cccSFangrui Song float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { 1820*207e5cccSFangrui Song return vld3q_lane_f64(a, b, 1); 1821*207e5cccSFangrui Song } 1822*207e5cccSFangrui Song 1823*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly8x16x3_t @test_vld3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 { 1824*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 1825*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 1826*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 1827*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 1828*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0 1829*207e5cccSFangrui Song // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1830*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1831*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 1832*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 1833*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 1834*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 1835*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 1836*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 1837*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 1838*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 1839*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 1840*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a) 1841*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], ptr [[__RET]] 1842*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1843*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16 1844*207e5cccSFangrui Song // CHECK: ret %struct.poly8x16x3_t [[TMP9]] 1845*207e5cccSFangrui Song poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { 1846*207e5cccSFangrui Song return vld3q_lane_p8(a, b, 15); 1847*207e5cccSFangrui Song } 1848*207e5cccSFangrui Song 1849*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly16x8x3_t @test_vld3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 { 1850*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 1851*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 1852*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 1853*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 1854*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0 1855*207e5cccSFangrui Song // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1856*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1857*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 1858*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 1859*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 1860*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 1861*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 1862*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 1863*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 1864*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 1865*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 1866*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 1867*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 1868*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 1869*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 1870*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 1871*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 1872*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, ptr %a) 1873*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], ptr [[__RET]] 1874*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1875*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16 1876*207e5cccSFangrui Song // CHECK: ret %struct.poly16x8x3_t [[TMP16]] 1877*207e5cccSFangrui Song poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { 1878*207e5cccSFangrui Song return vld3q_lane_p16(a, b, 7); 1879*207e5cccSFangrui Song } 1880*207e5cccSFangrui Song 1881*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 { 1882*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 1883*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 1884*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 1885*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 1886*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0 1887*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 1888*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 1889*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 1890*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 1891*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 1892*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 1893*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 1894*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 1895*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 1896*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 1897*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 1898*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 1899*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 1900*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 1901*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 1902*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 1903*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 1904*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, ptr %a) 1905*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], ptr [[__RET]] 1906*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 1907*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16 1908*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x3_t [[TMP16]] 1909*207e5cccSFangrui Song poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { 1910*207e5cccSFangrui Song return vld3q_lane_p64(a, b, 1); 1911*207e5cccSFangrui Song } 1912*207e5cccSFangrui Song 1913*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint8x8x3_t @test_vld3_lane_u8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 { 1914*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 1915*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 1916*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 1917*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 1918*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0 1919*207e5cccSFangrui Song // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1920*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 1921*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 1922*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 1923*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 1924*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 1925*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 1926*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 1927*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 1928*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 1929*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 1930*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a) 1931*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], ptr [[__RET]] 1932*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 1933*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8 1934*207e5cccSFangrui Song // CHECK: ret %struct.uint8x8x3_t [[TMP9]] 1935*207e5cccSFangrui Song uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { 1936*207e5cccSFangrui Song return vld3_lane_u8(a, b, 7); 1937*207e5cccSFangrui Song } 1938*207e5cccSFangrui Song 1939*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint16x4x3_t @test_vld3_lane_u16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 { 1940*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 1941*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 1942*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 1943*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 1944*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0 1945*207e5cccSFangrui Song // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1946*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 1947*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 1948*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 1949*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 1950*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 1951*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 1952*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 1953*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 1954*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 1955*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 1956*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 1957*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 1958*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 1959*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 1960*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 1961*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 1962*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, ptr %a) 1963*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], ptr [[__RET]] 1964*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 1965*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8 1966*207e5cccSFangrui Song // CHECK: ret %struct.uint16x4x3_t [[TMP16]] 1967*207e5cccSFangrui Song uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { 1968*207e5cccSFangrui Song return vld3_lane_u16(a, b, 3); 1969*207e5cccSFangrui Song } 1970*207e5cccSFangrui Song 1971*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint32x2x3_t @test_vld3_lane_u32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 { 1972*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 1973*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 1974*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 1975*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 1976*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0 1977*207e5cccSFangrui Song // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 1978*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 1979*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 1980*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 1981*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 1982*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 1983*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 1984*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 1985*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 1986*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 1987*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 1988*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 1989*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 1990*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 1991*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 1992*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 1993*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 1994*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, ptr %a) 1995*207e5cccSFangrui Song // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], ptr [[__RET]] 1996*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 1997*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8 1998*207e5cccSFangrui Song // CHECK: ret %struct.uint32x2x3_t [[TMP16]] 1999*207e5cccSFangrui Song uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { 2000*207e5cccSFangrui Song return vld3_lane_u32(a, b, 1); 2001*207e5cccSFangrui Song } 2002*207e5cccSFangrui Song 2003*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x1x3_t @test_vld3_lane_u64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 { 2004*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 2005*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 2006*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 2007*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 2008*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0 2009*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2010*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2011*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 2012*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 2013*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 2014*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 2015*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 2016*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 2017*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 2018*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 2019*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 2020*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 2021*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 2022*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 2023*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 2024*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 2025*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 2026*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, ptr %a) 2027*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], ptr [[__RET]] 2028*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2029*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8 2030*207e5cccSFangrui Song // CHECK: ret %struct.uint64x1x3_t [[TMP16]] 2031*207e5cccSFangrui Song uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { 2032*207e5cccSFangrui Song return vld3_lane_u64(a, b, 0); 2033*207e5cccSFangrui Song } 2034*207e5cccSFangrui Song 2035*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int8x8x3_t @test_vld3_lane_s8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 { 2036*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 2037*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 2038*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 2039*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 2040*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0 2041*207e5cccSFangrui Song // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2042*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2043*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 2044*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 2045*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 2046*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 2047*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 2048*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 2049*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 2050*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 2051*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 2052*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a) 2053*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], ptr [[__RET]] 2054*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2055*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8 2056*207e5cccSFangrui Song // CHECK: ret %struct.int8x8x3_t [[TMP9]] 2057*207e5cccSFangrui Song int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { 2058*207e5cccSFangrui Song return vld3_lane_s8(a, b, 7); 2059*207e5cccSFangrui Song } 2060*207e5cccSFangrui Song 2061*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int16x4x3_t @test_vld3_lane_s16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 { 2062*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 2063*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 2064*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 2065*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 2066*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0 2067*207e5cccSFangrui Song // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2068*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2069*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 2070*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 2071*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 2072*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 2073*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 2074*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 2075*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 2076*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 2077*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 2078*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 2079*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 2080*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 2081*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 2082*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 2083*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 2084*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, ptr %a) 2085*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], ptr [[__RET]] 2086*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2087*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8 2088*207e5cccSFangrui Song // CHECK: ret %struct.int16x4x3_t [[TMP16]] 2089*207e5cccSFangrui Song int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { 2090*207e5cccSFangrui Song return vld3_lane_s16(a, b, 3); 2091*207e5cccSFangrui Song } 2092*207e5cccSFangrui Song 2093*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int32x2x3_t @test_vld3_lane_s32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 { 2094*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 2095*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 2096*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 2097*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 2098*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0 2099*207e5cccSFangrui Song // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2100*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2101*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 2102*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 2103*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 2104*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 2105*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 2106*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 2107*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 2108*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 2109*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 2110*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 2111*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 2112*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 2113*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 2114*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 2115*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 2116*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, ptr %a) 2117*207e5cccSFangrui Song // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], ptr [[__RET]] 2118*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2119*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8 2120*207e5cccSFangrui Song // CHECK: ret %struct.int32x2x3_t [[TMP16]] 2121*207e5cccSFangrui Song int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { 2122*207e5cccSFangrui Song return vld3_lane_s32(a, b, 1); 2123*207e5cccSFangrui Song } 2124*207e5cccSFangrui Song 2125*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x1x3_t @test_vld3_lane_s64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 { 2126*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 2127*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 2128*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 2129*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 2130*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0 2131*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2132*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2133*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 2134*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 2135*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 2136*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 2137*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 2138*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 2139*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 2140*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 2141*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 2142*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 2143*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 2144*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 2145*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 2146*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 2147*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 2148*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, ptr %a) 2149*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], ptr [[__RET]] 2150*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2151*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8 2152*207e5cccSFangrui Song // CHECK: ret %struct.int64x1x3_t [[TMP16]] 2153*207e5cccSFangrui Song int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { 2154*207e5cccSFangrui Song return vld3_lane_s64(a, b, 0); 2155*207e5cccSFangrui Song } 2156*207e5cccSFangrui Song 2157*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float16x4x3_t @test_vld3_lane_f16(ptr noundef %a, [3 x <4 x half>] alignstack(8) %b.coerce) #0 { 2158*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 2159*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 2160*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 2161*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 2162*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0 2163*207e5cccSFangrui Song // CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2164*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2165*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 2166*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0 2167*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 2168*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 2169*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 2170*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 2171*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 2172*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 2173*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 2174*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2 2175*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8 2176*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 2177*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half> 2178*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half> 2179*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half> 2180*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0(<4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i64 3, ptr %a) 2181*207e5cccSFangrui Song // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3_LANE]], ptr [[__RET]] 2182*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2183*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8 2184*207e5cccSFangrui Song // CHECK: ret %struct.float16x4x3_t [[TMP16]] 2185*207e5cccSFangrui Song float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { 2186*207e5cccSFangrui Song return vld3_lane_f16(a, b, 3); 2187*207e5cccSFangrui Song } 2188*207e5cccSFangrui Song 2189*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float32x2x3_t @test_vld3_lane_f32(ptr noundef %a, [3 x <2 x float>] alignstack(8) %b.coerce) #0 { 2190*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 2191*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 2192*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 2193*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 2194*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0 2195*207e5cccSFangrui Song // CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2196*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2197*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 2198*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0 2199*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 2200*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 2201*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 2202*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 2203*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 2204*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 2205*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 2206*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2 2207*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8 2208*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 2209*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 2210*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 2211*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 2212*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i64 1, ptr %a) 2213*207e5cccSFangrui Song // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], ptr [[__RET]] 2214*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2215*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8 2216*207e5cccSFangrui Song // CHECK: ret %struct.float32x2x3_t [[TMP16]] 2217*207e5cccSFangrui Song float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { 2218*207e5cccSFangrui Song return vld3_lane_f32(a, b, 1); 2219*207e5cccSFangrui Song } 2220*207e5cccSFangrui Song 2221*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_lane_f64(ptr noundef %a, [3 x <1 x double>] alignstack(8) %b.coerce) #0 { 2222*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 2223*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 2224*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 2225*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 2226*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0 2227*207e5cccSFangrui Song // CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2228*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2229*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 2230*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0 2231*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 2232*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> 2233*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 2234*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 2235*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 2236*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> 2237*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 2238*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 2239*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 2240*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8> 2241*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> 2242*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> 2243*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double> 2244*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> [[TMP10]], <1 x double> [[TMP11]], <1 x double> [[TMP12]], i64 0, ptr %a) 2245*207e5cccSFangrui Song // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], ptr [[__RET]] 2246*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2247*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8 2248*207e5cccSFangrui Song // CHECK: ret %struct.float64x1x3_t [[TMP16]] 2249*207e5cccSFangrui Song float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { 2250*207e5cccSFangrui Song return vld3_lane_f64(a, b, 0); 2251*207e5cccSFangrui Song } 2252*207e5cccSFangrui Song 2253*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly8x8x3_t @test_vld3_lane_p8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 { 2254*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 2255*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 2256*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 2257*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 2258*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0 2259*207e5cccSFangrui Song // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2260*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2261*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 2262*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 2263*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 2264*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 2265*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 2266*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 2267*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 2268*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 2269*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 2270*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a) 2271*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], ptr [[__RET]] 2272*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2273*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8 2274*207e5cccSFangrui Song // CHECK: ret %struct.poly8x8x3_t [[TMP9]] 2275*207e5cccSFangrui Song poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { 2276*207e5cccSFangrui Song return vld3_lane_p8(a, b, 7); 2277*207e5cccSFangrui Song } 2278*207e5cccSFangrui Song 2279*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly16x4x3_t @test_vld3_lane_p16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 { 2280*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 2281*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 2282*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 2283*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 2284*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0 2285*207e5cccSFangrui Song // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2286*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2287*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 2288*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 2289*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 2290*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 2291*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 2292*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 2293*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 2294*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 2295*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 2296*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 2297*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 2298*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 2299*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 2300*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 2301*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 2302*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, ptr %a) 2303*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], ptr [[__RET]] 2304*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2305*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8 2306*207e5cccSFangrui Song // CHECK: ret %struct.poly16x4x3_t [[TMP16]] 2307*207e5cccSFangrui Song poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { 2308*207e5cccSFangrui Song return vld3_lane_p16(a, b, 3); 2309*207e5cccSFangrui Song } 2310*207e5cccSFangrui Song 2311*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_lane_p64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 { 2312*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 2313*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 2314*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 2315*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 2316*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0 2317*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2318*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 2319*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 2320*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 2321*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 2322*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 2323*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 2324*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 2325*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 2326*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 2327*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 2328*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 2329*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 2330*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 2331*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 2332*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 2333*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 2334*207e5cccSFangrui Song // CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, ptr %a) 2335*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], ptr [[__RET]] 2336*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 2337*207e5cccSFangrui Song // CHECK: [[TMP16:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8 2338*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x3_t [[TMP16]] 2339*207e5cccSFangrui Song poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { 2340*207e5cccSFangrui Song return vld3_lane_p64(a, b, 0); 2341*207e5cccSFangrui Song } 2342*207e5cccSFangrui Song 2343*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint8x16x4_t @test_vld4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 { 2344*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 2345*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 2346*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 2347*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 2348*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0 2349*207e5cccSFangrui Song // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2350*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2351*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 2352*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 2353*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 2354*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 2355*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 2356*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 2357*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 2358*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 2359*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 2360*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 2361*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 2362*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 2363*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, ptr %a) 2364*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], ptr [[__RET]] 2365*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2366*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16 2367*207e5cccSFangrui Song // CHECK: ret %struct.uint8x16x4_t [[TMP10]] 2368*207e5cccSFangrui Song uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { 2369*207e5cccSFangrui Song return vld4q_lane_u8(a, b, 15); 2370*207e5cccSFangrui Song } 2371*207e5cccSFangrui Song 2372*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint16x8x4_t @test_vld4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 { 2373*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 2374*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 2375*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 2376*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 2377*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0 2378*207e5cccSFangrui Song // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2379*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2380*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 2381*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 2382*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 2383*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 2384*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 2385*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 2386*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 2387*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 2388*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 2389*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 2390*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 2391*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 2392*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 2393*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 2394*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 2395*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 2396*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2397*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2398*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 2399*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 2400*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, ptr %a) 2401*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], ptr [[__RET]] 2402*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2403*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16 2404*207e5cccSFangrui Song // CHECK: ret %struct.uint16x8x4_t [[TMP19]] 2405*207e5cccSFangrui Song uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { 2406*207e5cccSFangrui Song return vld4q_lane_u16(a, b, 7); 2407*207e5cccSFangrui Song } 2408*207e5cccSFangrui Song 2409*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint32x4x4_t @test_vld4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 { 2410*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 2411*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 2412*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 2413*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 2414*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0 2415*207e5cccSFangrui Song // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2416*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2417*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 2418*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 2419*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 2420*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 2421*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 2422*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 2423*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 2424*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 2425*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 2426*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 2427*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 2428*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 2429*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 2430*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3 2431*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16 2432*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 2433*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 2434*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 2435*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 2436*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 2437*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, ptr %a) 2438*207e5cccSFangrui Song // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], ptr [[__RET]] 2439*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2440*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16 2441*207e5cccSFangrui Song // CHECK: ret %struct.uint32x4x4_t [[TMP19]] 2442*207e5cccSFangrui Song uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { 2443*207e5cccSFangrui Song return vld4q_lane_u32(a, b, 3); 2444*207e5cccSFangrui Song } 2445*207e5cccSFangrui Song 2446*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 { 2447*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 2448*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 2449*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 2450*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 2451*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0 2452*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2453*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2454*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 2455*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 2456*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 2457*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 2458*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 2459*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 2460*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 2461*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 2462*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 2463*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 2464*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 2465*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 2466*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 2467*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 2468*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 2469*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> 2470*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 2471*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 2472*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 2473*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> 2474*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, ptr %a) 2475*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], ptr [[__RET]] 2476*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2477*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16 2478*207e5cccSFangrui Song // CHECK: ret %struct.uint64x2x4_t [[TMP19]] 2479*207e5cccSFangrui Song uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { 2480*207e5cccSFangrui Song return vld4q_lane_u64(a, b, 1); 2481*207e5cccSFangrui Song } 2482*207e5cccSFangrui Song 2483*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int8x16x4_t @test_vld4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 { 2484*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 2485*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 2486*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 2487*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 2488*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0 2489*207e5cccSFangrui Song // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2490*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2491*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 2492*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 2493*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 2494*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 2495*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 2496*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 2497*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 2498*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 2499*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 2500*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 2501*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 2502*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 2503*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, ptr %a) 2504*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], ptr [[__RET]] 2505*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2506*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16 2507*207e5cccSFangrui Song // CHECK: ret %struct.int8x16x4_t [[TMP10]] 2508*207e5cccSFangrui Song int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { 2509*207e5cccSFangrui Song return vld4q_lane_s8(a, b, 15); 2510*207e5cccSFangrui Song } 2511*207e5cccSFangrui Song 2512*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int16x8x4_t @test_vld4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 { 2513*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 2514*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 2515*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 2516*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 2517*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0 2518*207e5cccSFangrui Song // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2519*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2520*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 2521*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 2522*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 2523*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 2524*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 2525*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 2526*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 2527*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 2528*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 2529*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 2530*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 2531*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 2532*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 2533*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 2534*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 2535*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 2536*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2537*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2538*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 2539*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 2540*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, ptr %a) 2541*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], ptr [[__RET]] 2542*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2543*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16 2544*207e5cccSFangrui Song // CHECK: ret %struct.int16x8x4_t [[TMP19]] 2545*207e5cccSFangrui Song int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { 2546*207e5cccSFangrui Song return vld4q_lane_s16(a, b, 7); 2547*207e5cccSFangrui Song } 2548*207e5cccSFangrui Song 2549*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int32x4x4_t @test_vld4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 { 2550*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 2551*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 2552*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 2553*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 2554*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0 2555*207e5cccSFangrui Song // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2556*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2557*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 2558*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 2559*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 2560*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> 2561*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 2562*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 2563*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 2564*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> 2565*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 2566*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 2567*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 2568*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> 2569*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 2570*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3 2571*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16 2572*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> 2573*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> 2574*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> 2575*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> 2576*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> 2577*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, ptr %a) 2578*207e5cccSFangrui Song // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], ptr [[__RET]] 2579*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2580*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16 2581*207e5cccSFangrui Song // CHECK: ret %struct.int32x4x4_t [[TMP19]] 2582*207e5cccSFangrui Song int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { 2583*207e5cccSFangrui Song return vld4q_lane_s32(a, b, 3); 2584*207e5cccSFangrui Song } 2585*207e5cccSFangrui Song 2586*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 { 2587*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 2588*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 2589*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 2590*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 2591*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0 2592*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2593*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2594*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 2595*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 2596*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 2597*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 2598*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 2599*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 2600*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 2601*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 2602*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 2603*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 2604*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 2605*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 2606*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 2607*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 2608*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 2609*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> 2610*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 2611*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 2612*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 2613*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> 2614*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, ptr %a) 2615*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], ptr [[__RET]] 2616*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2617*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16 2618*207e5cccSFangrui Song // CHECK: ret %struct.int64x2x4_t [[TMP19]] 2619*207e5cccSFangrui Song int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { 2620*207e5cccSFangrui Song return vld4q_lane_s64(a, b, 1); 2621*207e5cccSFangrui Song } 2622*207e5cccSFangrui Song 2623*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float16x8x4_t @test_vld4q_lane_f16(ptr noundef %a, [4 x <8 x half>] alignstack(16) %b.coerce) #0 { 2624*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 2625*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 2626*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 2627*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 2628*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0 2629*207e5cccSFangrui Song // CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2630*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2631*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 2632*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0 2633*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 2634*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> 2635*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 2636*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 2637*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 2638*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> 2639*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 2640*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2 2641*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16 2642*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> 2643*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 2644*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3 2645*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16 2646*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> 2647*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half> 2648*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half> 2649*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half> 2650*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half> 2651*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0(<8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i64 7, ptr %a) 2652*207e5cccSFangrui Song // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4_LANE]], ptr [[__RET]] 2653*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2654*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16 2655*207e5cccSFangrui Song // CHECK: ret %struct.float16x8x4_t [[TMP19]] 2656*207e5cccSFangrui Song float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { 2657*207e5cccSFangrui Song return vld4q_lane_f16(a, b, 7); 2658*207e5cccSFangrui Song } 2659*207e5cccSFangrui Song 2660*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float32x4x4_t @test_vld4q_lane_f32(ptr noundef %a, [4 x <4 x float>] alignstack(16) %b.coerce) #0 { 2661*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 2662*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 2663*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 2664*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 2665*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0 2666*207e5cccSFangrui Song // CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2667*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2668*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 2669*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0 2670*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 2671*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> 2672*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 2673*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 2674*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 2675*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> 2676*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 2677*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2 2678*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16 2679*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> 2680*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 2681*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3 2682*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16 2683*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> 2684*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> 2685*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> 2686*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> 2687*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> 2688*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i64 3, ptr %a) 2689*207e5cccSFangrui Song // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], ptr [[__RET]] 2690*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2691*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16 2692*207e5cccSFangrui Song // CHECK: ret %struct.float32x4x4_t [[TMP19]] 2693*207e5cccSFangrui Song float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { 2694*207e5cccSFangrui Song return vld4q_lane_f32(a, b, 3); 2695*207e5cccSFangrui Song } 2696*207e5cccSFangrui Song 2697*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_lane_f64(ptr noundef %a, [4 x <2 x double>] alignstack(16) %b.coerce) #0 { 2698*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 2699*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 2700*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 2701*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 2702*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0 2703*207e5cccSFangrui Song // CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2704*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2705*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 2706*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0 2707*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 2708*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> 2709*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 2710*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 2711*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 2712*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> 2713*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 2714*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 2715*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 2716*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8> 2717*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 2718*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3 2719*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16 2720*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x double> [[TMP10]] to <16 x i8> 2721*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> 2722*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> 2723*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double> 2724*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x double> 2725*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP15]], i64 1, ptr %a) 2726*207e5cccSFangrui Song // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], ptr [[__RET]] 2727*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2728*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16 2729*207e5cccSFangrui Song // CHECK: ret %struct.float64x2x4_t [[TMP19]] 2730*207e5cccSFangrui Song float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { 2731*207e5cccSFangrui Song return vld4q_lane_f64(a, b, 1); 2732*207e5cccSFangrui Song } 2733*207e5cccSFangrui Song 2734*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly8x16x4_t @test_vld4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 { 2735*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 2736*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 2737*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 2738*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 2739*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0 2740*207e5cccSFangrui Song // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2741*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2742*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 2743*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 2744*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 2745*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 2746*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 2747*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 2748*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 2749*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 2750*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 2751*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 2752*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 2753*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 2754*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, ptr %a) 2755*207e5cccSFangrui Song // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], ptr [[__RET]] 2756*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2757*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16 2758*207e5cccSFangrui Song // CHECK: ret %struct.poly8x16x4_t [[TMP10]] 2759*207e5cccSFangrui Song poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { 2760*207e5cccSFangrui Song return vld4q_lane_p8(a, b, 15); 2761*207e5cccSFangrui Song } 2762*207e5cccSFangrui Song 2763*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly16x8x4_t @test_vld4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 { 2764*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 2765*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 2766*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 2767*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 2768*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0 2769*207e5cccSFangrui Song // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2770*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2771*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 2772*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 2773*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 2774*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> 2775*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 2776*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 2777*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 2778*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> 2779*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 2780*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 2781*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 2782*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> 2783*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 2784*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 2785*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 2786*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> 2787*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> 2788*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> 2789*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> 2790*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> 2791*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, ptr %a) 2792*207e5cccSFangrui Song // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], ptr [[__RET]] 2793*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2794*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16 2795*207e5cccSFangrui Song // CHECK: ret %struct.poly16x8x4_t [[TMP19]] 2796*207e5cccSFangrui Song poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { 2797*207e5cccSFangrui Song return vld4q_lane_p16(a, b, 7); 2798*207e5cccSFangrui Song } 2799*207e5cccSFangrui Song 2800*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 { 2801*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 2802*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 2803*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 2804*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 2805*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0 2806*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 2807*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 2808*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 2809*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 2810*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 2811*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> 2812*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 2813*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 2814*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 2815*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> 2816*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 2817*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 2818*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 2819*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> 2820*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 2821*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 2822*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 2823*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> 2824*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> 2825*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> 2826*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 2827*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> 2828*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, ptr %a) 2829*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], ptr [[__RET]] 2830*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 2831*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16 2832*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x4_t [[TMP19]] 2833*207e5cccSFangrui Song poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { 2834*207e5cccSFangrui Song return vld4q_lane_p64(a, b, 1); 2835*207e5cccSFangrui Song } 2836*207e5cccSFangrui Song 2837*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint8x8x4_t @test_vld4_lane_u8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 { 2838*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 2839*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 2840*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 2841*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 2842*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0 2843*207e5cccSFangrui Song // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2844*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 2845*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 2846*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 2847*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 2848*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 2849*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 2850*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 2851*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 2852*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 2853*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 2854*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 2855*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 2856*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 2857*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, ptr %a) 2858*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], ptr [[__RET]] 2859*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 2860*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8 2861*207e5cccSFangrui Song // CHECK: ret %struct.uint8x8x4_t [[TMP10]] 2862*207e5cccSFangrui Song uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { 2863*207e5cccSFangrui Song return vld4_lane_u8(a, b, 7); 2864*207e5cccSFangrui Song } 2865*207e5cccSFangrui Song 2866*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint16x4x4_t @test_vld4_lane_u16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 { 2867*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 2868*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 2869*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 2870*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 2871*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0 2872*207e5cccSFangrui Song // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2873*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 2874*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 2875*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 2876*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 2877*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 2878*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 2879*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 2880*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 2881*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 2882*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 2883*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 2884*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 2885*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 2886*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 2887*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 2888*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 2889*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 2890*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 2891*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 2892*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 2893*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 2894*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, ptr %a) 2895*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], ptr [[__RET]] 2896*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 2897*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8 2898*207e5cccSFangrui Song // CHECK: ret %struct.uint16x4x4_t [[TMP19]] 2899*207e5cccSFangrui Song uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { 2900*207e5cccSFangrui Song return vld4_lane_u16(a, b, 3); 2901*207e5cccSFangrui Song } 2902*207e5cccSFangrui Song 2903*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint32x2x4_t @test_vld4_lane_u32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 { 2904*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 2905*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 2906*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 2907*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 2908*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0 2909*207e5cccSFangrui Song // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2910*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 2911*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 2912*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 2913*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 2914*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 2915*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 2916*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 2917*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 2918*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 2919*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 2920*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 2921*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 2922*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 2923*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 2924*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3 2925*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8 2926*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 2927*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 2928*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 2929*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 2930*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 2931*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, ptr %a) 2932*207e5cccSFangrui Song // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], ptr [[__RET]] 2933*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 2934*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8 2935*207e5cccSFangrui Song // CHECK: ret %struct.uint32x2x4_t [[TMP19]] 2936*207e5cccSFangrui Song uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { 2937*207e5cccSFangrui Song return vld4_lane_u32(a, b, 1); 2938*207e5cccSFangrui Song } 2939*207e5cccSFangrui Song 2940*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.uint64x1x4_t @test_vld4_lane_u64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 { 2941*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 2942*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 2943*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 2944*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 2945*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0 2946*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2947*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 2948*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 2949*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 2950*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 2951*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 2952*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 2953*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 2954*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 2955*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 2956*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 2957*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 2958*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 2959*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 2960*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 2961*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 2962*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 2963*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 2964*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 2965*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 2966*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 2967*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 2968*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, ptr %a) 2969*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], ptr [[__RET]] 2970*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 2971*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8 2972*207e5cccSFangrui Song // CHECK: ret %struct.uint64x1x4_t [[TMP19]] 2973*207e5cccSFangrui Song uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { 2974*207e5cccSFangrui Song return vld4_lane_u64(a, b, 0); 2975*207e5cccSFangrui Song } 2976*207e5cccSFangrui Song 2977*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int8x8x4_t @test_vld4_lane_s8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 { 2978*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 2979*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 2980*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 2981*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 2982*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0 2983*207e5cccSFangrui Song // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 2984*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 2985*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 2986*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 2987*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 2988*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 2989*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 2990*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 2991*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 2992*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 2993*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 2994*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 2995*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 2996*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 2997*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, ptr %a) 2998*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], ptr [[__RET]] 2999*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3000*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8 3001*207e5cccSFangrui Song // CHECK: ret %struct.int8x8x4_t [[TMP10]] 3002*207e5cccSFangrui Song int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { 3003*207e5cccSFangrui Song return vld4_lane_s8(a, b, 7); 3004*207e5cccSFangrui Song } 3005*207e5cccSFangrui Song 3006*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int16x4x4_t @test_vld4_lane_s16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 { 3007*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 3008*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 3009*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 3010*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 3011*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0 3012*207e5cccSFangrui Song // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3013*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3014*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 3015*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 3016*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 3017*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 3018*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 3019*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 3020*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 3021*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 3022*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 3023*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 3024*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 3025*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 3026*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 3027*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 3028*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 3029*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 3030*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 3031*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 3032*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 3033*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 3034*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, ptr %a) 3035*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], ptr [[__RET]] 3036*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3037*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8 3038*207e5cccSFangrui Song // CHECK: ret %struct.int16x4x4_t [[TMP19]] 3039*207e5cccSFangrui Song int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { 3040*207e5cccSFangrui Song return vld4_lane_s16(a, b, 3); 3041*207e5cccSFangrui Song } 3042*207e5cccSFangrui Song 3043*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int32x2x4_t @test_vld4_lane_s32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 { 3044*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 3045*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 3046*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 3047*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 3048*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0 3049*207e5cccSFangrui Song // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3050*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3051*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 3052*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 3053*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 3054*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> 3055*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 3056*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 3057*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 3058*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> 3059*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 3060*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 3061*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 3062*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> 3063*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 3064*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3 3065*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8 3066*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> 3067*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> 3068*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> 3069*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> 3070*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> 3071*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, ptr %a) 3072*207e5cccSFangrui Song // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], ptr [[__RET]] 3073*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3074*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8 3075*207e5cccSFangrui Song // CHECK: ret %struct.int32x2x4_t [[TMP19]] 3076*207e5cccSFangrui Song int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { 3077*207e5cccSFangrui Song return vld4_lane_s32(a, b, 1); 3078*207e5cccSFangrui Song } 3079*207e5cccSFangrui Song 3080*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.int64x1x4_t @test_vld4_lane_s64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 { 3081*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 3082*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 3083*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 3084*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 3085*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0 3086*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3087*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3088*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 3089*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 3090*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 3091*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 3092*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 3093*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 3094*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 3095*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 3096*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 3097*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 3098*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 3099*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 3100*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 3101*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 3102*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 3103*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 3104*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 3105*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 3106*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 3107*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 3108*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, ptr %a) 3109*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], ptr [[__RET]] 3110*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3111*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8 3112*207e5cccSFangrui Song // CHECK: ret %struct.int64x1x4_t [[TMP19]] 3113*207e5cccSFangrui Song int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { 3114*207e5cccSFangrui Song return vld4_lane_s64(a, b, 0); 3115*207e5cccSFangrui Song } 3116*207e5cccSFangrui Song 3117*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float16x4x4_t @test_vld4_lane_f16(ptr noundef %a, [4 x <4 x half>] alignstack(8) %b.coerce) #0 { 3118*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 3119*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 3120*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 3121*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 3122*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0 3123*207e5cccSFangrui Song // CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3124*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3125*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 3126*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0 3127*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 3128*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> 3129*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 3130*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 3131*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 3132*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> 3133*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 3134*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2 3135*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8 3136*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> 3137*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 3138*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3 3139*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8 3140*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> 3141*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half> 3142*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half> 3143*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half> 3144*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half> 3145*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0(<4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i64 3, ptr %a) 3146*207e5cccSFangrui Song // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4_LANE]], ptr [[__RET]] 3147*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3148*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8 3149*207e5cccSFangrui Song // CHECK: ret %struct.float16x4x4_t [[TMP19]] 3150*207e5cccSFangrui Song float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { 3151*207e5cccSFangrui Song return vld4_lane_f16(a, b, 3); 3152*207e5cccSFangrui Song } 3153*207e5cccSFangrui Song 3154*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float32x2x4_t @test_vld4_lane_f32(ptr noundef %a, [4 x <2 x float>] alignstack(8) %b.coerce) #0 { 3155*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 3156*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 3157*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 3158*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 3159*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0 3160*207e5cccSFangrui Song // CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3161*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3162*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 3163*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0 3164*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 3165*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> 3166*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 3167*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 3168*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 3169*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> 3170*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 3171*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2 3172*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8 3173*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> 3174*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 3175*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3 3176*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8 3177*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> 3178*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> 3179*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> 3180*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> 3181*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> 3182*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i64 1, ptr %a) 3183*207e5cccSFangrui Song // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], ptr [[__RET]] 3184*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3185*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8 3186*207e5cccSFangrui Song // CHECK: ret %struct.float32x2x4_t [[TMP19]] 3187*207e5cccSFangrui Song float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { 3188*207e5cccSFangrui Song return vld4_lane_f32(a, b, 1); 3189*207e5cccSFangrui Song } 3190*207e5cccSFangrui Song 3191*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_lane_f64(ptr noundef %a, [4 x <1 x double>] alignstack(8) %b.coerce) #0 { 3192*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 3193*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 3194*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 3195*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 3196*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0 3197*207e5cccSFangrui Song // CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3198*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3199*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 3200*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0 3201*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 3202*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> 3203*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 3204*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 3205*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 3206*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> 3207*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 3208*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 3209*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 3210*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8> 3211*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 3212*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3 3213*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8 3214*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <1 x double> [[TMP10]] to <8 x i8> 3215*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> 3216*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> 3217*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double> 3218*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x double> 3219*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], <1 x double> [[TMP15]], i64 0, ptr %a) 3220*207e5cccSFangrui Song // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], ptr [[__RET]] 3221*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3222*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8 3223*207e5cccSFangrui Song // CHECK: ret %struct.float64x1x4_t [[TMP19]] 3224*207e5cccSFangrui Song float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { 3225*207e5cccSFangrui Song return vld4_lane_f64(a, b, 0); 3226*207e5cccSFangrui Song } 3227*207e5cccSFangrui Song 3228*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly8x8x4_t @test_vld4_lane_p8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 { 3229*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 3230*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 3231*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 3232*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 3233*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0 3234*207e5cccSFangrui Song // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3235*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3236*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 3237*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 3238*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 3239*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 3240*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 3241*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 3242*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 3243*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 3244*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 3245*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 3246*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 3247*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 3248*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, ptr %a) 3249*207e5cccSFangrui Song // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], ptr [[__RET]] 3250*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3251*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8 3252*207e5cccSFangrui Song // CHECK: ret %struct.poly8x8x4_t [[TMP10]] 3253*207e5cccSFangrui Song poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { 3254*207e5cccSFangrui Song return vld4_lane_p8(a, b, 7); 3255*207e5cccSFangrui Song } 3256*207e5cccSFangrui Song 3257*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly16x4x4_t @test_vld4_lane_p16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 { 3258*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 3259*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 3260*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 3261*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 3262*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0 3263*207e5cccSFangrui Song // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3264*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3265*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 3266*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 3267*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 3268*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> 3269*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 3270*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 3271*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 3272*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> 3273*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 3274*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 3275*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 3276*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> 3277*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 3278*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 3279*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 3280*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> 3281*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> 3282*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> 3283*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> 3284*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> 3285*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, ptr %a) 3286*207e5cccSFangrui Song // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], ptr [[__RET]] 3287*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3288*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8 3289*207e5cccSFangrui Song // CHECK: ret %struct.poly16x4x4_t [[TMP19]] 3290*207e5cccSFangrui Song poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { 3291*207e5cccSFangrui Song return vld4_lane_p16(a, b, 3); 3292*207e5cccSFangrui Song } 3293*207e5cccSFangrui Song 3294*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_lane_p64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 { 3295*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 3296*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 3297*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 3298*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 3299*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0 3300*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3301*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 3302*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 3303*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 3304*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 3305*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> 3306*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 3307*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 3308*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 3309*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> 3310*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 3311*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 3312*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 3313*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> 3314*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 3315*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 3316*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 3317*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> 3318*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> 3319*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> 3320*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> 3321*207e5cccSFangrui Song // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> 3322*207e5cccSFangrui Song // CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, ptr %a) 3323*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], ptr [[__RET]] 3324*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 3325*207e5cccSFangrui Song // CHECK: [[TMP19:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8 3326*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x4_t [[TMP19]] 3327*207e5cccSFangrui Song poly64x1x4_t test_vld4_lane_p64(poly64_t *a, poly64x1x4_t b) { 3328*207e5cccSFangrui Song return vld4_lane_p64(a, b, 0); 3329*207e5cccSFangrui Song } 3330*207e5cccSFangrui Song 3331*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u8(ptr noundef %a, <16 x i8> noundef %b) #0 { 3332*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 3333*207e5cccSFangrui Song // CHECK: store i8 [[TMP0]], ptr %a 3334*207e5cccSFangrui Song // CHECK: ret void 3335*207e5cccSFangrui Song void test_vst1q_lane_u8(uint8_t *a, uint8x16_t b) { 3336*207e5cccSFangrui Song vst1q_lane_u8(a, b, 15); 3337*207e5cccSFangrui Song } 3338*207e5cccSFangrui Song 3339*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u16(ptr noundef %a, <8 x i16> noundef %b) #0 { 3340*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3341*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3342*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 3343*207e5cccSFangrui Song // CHECK: store i16 [[TMP3]], ptr %a 3344*207e5cccSFangrui Song // CHECK: ret void 3345*207e5cccSFangrui Song void test_vst1q_lane_u16(uint16_t *a, uint16x8_t b) { 3346*207e5cccSFangrui Song vst1q_lane_u16(a, b, 7); 3347*207e5cccSFangrui Song } 3348*207e5cccSFangrui Song 3349*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u32(ptr noundef %a, <4 x i32> noundef %b) #0 { 3350*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3351*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3352*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 3353*207e5cccSFangrui Song // CHECK: store i32 [[TMP3]], ptr %a 3354*207e5cccSFangrui Song // CHECK: ret void 3355*207e5cccSFangrui Song void test_vst1q_lane_u32(uint32_t *a, uint32x4_t b) { 3356*207e5cccSFangrui Song vst1q_lane_u32(a, b, 3); 3357*207e5cccSFangrui Song } 3358*207e5cccSFangrui Song 3359*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_u64(ptr noundef %a, <2 x i64> noundef %b) #0 { 3360*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3361*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3362*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 3363*207e5cccSFangrui Song // CHECK: store i64 [[TMP3]], ptr %a 3364*207e5cccSFangrui Song // CHECK: ret void 3365*207e5cccSFangrui Song void test_vst1q_lane_u64(uint64_t *a, uint64x2_t b) { 3366*207e5cccSFangrui Song vst1q_lane_u64(a, b, 1); 3367*207e5cccSFangrui Song } 3368*207e5cccSFangrui Song 3369*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s8(ptr noundef %a, <16 x i8> noundef %b) #0 { 3370*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 3371*207e5cccSFangrui Song // CHECK: store i8 [[TMP0]], ptr %a 3372*207e5cccSFangrui Song // CHECK: ret void 3373*207e5cccSFangrui Song void test_vst1q_lane_s8(int8_t *a, int8x16_t b) { 3374*207e5cccSFangrui Song vst1q_lane_s8(a, b, 15); 3375*207e5cccSFangrui Song } 3376*207e5cccSFangrui Song 3377*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s16(ptr noundef %a, <8 x i16> noundef %b) #0 { 3378*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3379*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3380*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 3381*207e5cccSFangrui Song // CHECK: store i16 [[TMP3]], ptr %a 3382*207e5cccSFangrui Song // CHECK: ret void 3383*207e5cccSFangrui Song void test_vst1q_lane_s16(int16_t *a, int16x8_t b) { 3384*207e5cccSFangrui Song vst1q_lane_s16(a, b, 7); 3385*207e5cccSFangrui Song } 3386*207e5cccSFangrui Song 3387*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s32(ptr noundef %a, <4 x i32> noundef %b) #0 { 3388*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3389*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 3390*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 3391*207e5cccSFangrui Song // CHECK: store i32 [[TMP3]], ptr %a 3392*207e5cccSFangrui Song // CHECK: ret void 3393*207e5cccSFangrui Song void test_vst1q_lane_s32(int32_t *a, int32x4_t b) { 3394*207e5cccSFangrui Song vst1q_lane_s32(a, b, 3); 3395*207e5cccSFangrui Song } 3396*207e5cccSFangrui Song 3397*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_s64(ptr noundef %a, <2 x i64> noundef %b) #0 { 3398*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3399*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3400*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 3401*207e5cccSFangrui Song // CHECK: store i64 [[TMP3]], ptr %a 3402*207e5cccSFangrui Song // CHECK: ret void 3403*207e5cccSFangrui Song void test_vst1q_lane_s64(int64_t *a, int64x2_t b) { 3404*207e5cccSFangrui Song vst1q_lane_s64(a, b, 1); 3405*207e5cccSFangrui Song } 3406*207e5cccSFangrui Song 3407*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_f16(ptr noundef %a, <8 x half> noundef %b) #0 { 3408*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 3409*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 3410*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7 3411*207e5cccSFangrui Song // CHECK: store half [[TMP3]], ptr %a 3412*207e5cccSFangrui Song // CHECK: ret void 3413*207e5cccSFangrui Song void test_vst1q_lane_f16(float16_t *a, float16x8_t b) { 3414*207e5cccSFangrui Song vst1q_lane_f16(a, b, 7); 3415*207e5cccSFangrui Song } 3416*207e5cccSFangrui Song 3417*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_f32(ptr noundef %a, <4 x float> noundef %b) #0 { 3418*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3419*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 3420*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 3421*207e5cccSFangrui Song // CHECK: store float [[TMP3]], ptr %a 3422*207e5cccSFangrui Song // CHECK: ret void 3423*207e5cccSFangrui Song void test_vst1q_lane_f32(float32_t *a, float32x4_t b) { 3424*207e5cccSFangrui Song vst1q_lane_f32(a, b, 3); 3425*207e5cccSFangrui Song } 3426*207e5cccSFangrui Song 3427*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_f64(ptr noundef %a, <2 x double> noundef %b) #0 { 3428*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 3429*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 3430*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 3431*207e5cccSFangrui Song // CHECK: store double [[TMP3]], ptr %a 3432*207e5cccSFangrui Song // CHECK: ret void 3433*207e5cccSFangrui Song void test_vst1q_lane_f64(float64_t *a, float64x2_t b) { 3434*207e5cccSFangrui Song vst1q_lane_f64(a, b, 1); 3435*207e5cccSFangrui Song } 3436*207e5cccSFangrui Song 3437*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_p8(ptr noundef %a, <16 x i8> noundef %b) #0 { 3438*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 3439*207e5cccSFangrui Song // CHECK: store i8 [[TMP0]], ptr %a 3440*207e5cccSFangrui Song // CHECK: ret void 3441*207e5cccSFangrui Song void test_vst1q_lane_p8(poly8_t *a, poly8x16_t b) { 3442*207e5cccSFangrui Song vst1q_lane_p8(a, b, 15); 3443*207e5cccSFangrui Song } 3444*207e5cccSFangrui Song 3445*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_p16(ptr noundef %a, <8 x i16> noundef %b) #0 { 3446*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3447*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 3448*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7 3449*207e5cccSFangrui Song // CHECK: store i16 [[TMP3]], ptr %a 3450*207e5cccSFangrui Song // CHECK: ret void 3451*207e5cccSFangrui Song void test_vst1q_lane_p16(poly16_t *a, poly16x8_t b) { 3452*207e5cccSFangrui Song vst1q_lane_p16(a, b, 7); 3453*207e5cccSFangrui Song } 3454*207e5cccSFangrui Song 3455*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_lane_p64(ptr noundef %a, <2 x i64> noundef %b) #0 { 3456*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3457*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3458*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 3459*207e5cccSFangrui Song // CHECK: store i64 [[TMP3]], ptr %a 3460*207e5cccSFangrui Song // CHECK: ret void 3461*207e5cccSFangrui Song void test_vst1q_lane_p64(poly64_t *a, poly64x2_t b) { 3462*207e5cccSFangrui Song vst1q_lane_p64(a, b, 1); 3463*207e5cccSFangrui Song } 3464*207e5cccSFangrui Song 3465*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_u8(ptr noundef %a, <8 x i8> noundef %b) #0 { 3466*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 3467*207e5cccSFangrui Song // CHECK: store i8 [[TMP0]], ptr %a 3468*207e5cccSFangrui Song // CHECK: ret void 3469*207e5cccSFangrui Song void test_vst1_lane_u8(uint8_t *a, uint8x8_t b) { 3470*207e5cccSFangrui Song vst1_lane_u8(a, b, 7); 3471*207e5cccSFangrui Song } 3472*207e5cccSFangrui Song 3473*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_u16(ptr noundef %a, <4 x i16> noundef %b) #0 { 3474*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3475*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3476*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 3477*207e5cccSFangrui Song // CHECK: store i16 [[TMP3]], ptr %a 3478*207e5cccSFangrui Song // CHECK: ret void 3479*207e5cccSFangrui Song void test_vst1_lane_u16(uint16_t *a, uint16x4_t b) { 3480*207e5cccSFangrui Song vst1_lane_u16(a, b, 3); 3481*207e5cccSFangrui Song } 3482*207e5cccSFangrui Song 3483*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_u32(ptr noundef %a, <2 x i32> noundef %b) #0 { 3484*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3485*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3486*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 3487*207e5cccSFangrui Song // CHECK: store i32 [[TMP3]], ptr %a 3488*207e5cccSFangrui Song // CHECK: ret void 3489*207e5cccSFangrui Song void test_vst1_lane_u32(uint32_t *a, uint32x2_t b) { 3490*207e5cccSFangrui Song vst1_lane_u32(a, b, 1); 3491*207e5cccSFangrui Song } 3492*207e5cccSFangrui Song 3493*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_u64(ptr noundef %a, <1 x i64> noundef %b) #0 { 3494*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3495*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3496*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 3497*207e5cccSFangrui Song // CHECK: store i64 [[TMP3]], ptr %a 3498*207e5cccSFangrui Song // CHECK: ret void 3499*207e5cccSFangrui Song void test_vst1_lane_u64(uint64_t *a, uint64x1_t b) { 3500*207e5cccSFangrui Song vst1_lane_u64(a, b, 0); 3501*207e5cccSFangrui Song } 3502*207e5cccSFangrui Song 3503*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_s8(ptr noundef %a, <8 x i8> noundef %b) #0 { 3504*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 3505*207e5cccSFangrui Song // CHECK: store i8 [[TMP0]], ptr %a 3506*207e5cccSFangrui Song // CHECK: ret void 3507*207e5cccSFangrui Song void test_vst1_lane_s8(int8_t *a, int8x8_t b) { 3508*207e5cccSFangrui Song vst1_lane_s8(a, b, 7); 3509*207e5cccSFangrui Song } 3510*207e5cccSFangrui Song 3511*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_s16(ptr noundef %a, <4 x i16> noundef %b) #0 { 3512*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3513*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3514*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 3515*207e5cccSFangrui Song // CHECK: store i16 [[TMP3]], ptr %a 3516*207e5cccSFangrui Song // CHECK: ret void 3517*207e5cccSFangrui Song void test_vst1_lane_s16(int16_t *a, int16x4_t b) { 3518*207e5cccSFangrui Song vst1_lane_s16(a, b, 3); 3519*207e5cccSFangrui Song } 3520*207e5cccSFangrui Song 3521*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_s32(ptr noundef %a, <2 x i32> noundef %b) #0 { 3522*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3523*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 3524*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 3525*207e5cccSFangrui Song // CHECK: store i32 [[TMP3]], ptr %a 3526*207e5cccSFangrui Song // CHECK: ret void 3527*207e5cccSFangrui Song void test_vst1_lane_s32(int32_t *a, int32x2_t b) { 3528*207e5cccSFangrui Song vst1_lane_s32(a, b, 1); 3529*207e5cccSFangrui Song } 3530*207e5cccSFangrui Song 3531*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_s64(ptr noundef %a, <1 x i64> noundef %b) #0 { 3532*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3533*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3534*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 3535*207e5cccSFangrui Song // CHECK: store i64 [[TMP3]], ptr %a 3536*207e5cccSFangrui Song // CHECK: ret void 3537*207e5cccSFangrui Song void test_vst1_lane_s64(int64_t *a, int64x1_t b) { 3538*207e5cccSFangrui Song vst1_lane_s64(a, b, 0); 3539*207e5cccSFangrui Song } 3540*207e5cccSFangrui Song 3541*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_f16(ptr noundef %a, <4 x half> noundef %b) #0 { 3542*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 3543*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 3544*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3 3545*207e5cccSFangrui Song // CHECK: store half [[TMP3]], ptr %a 3546*207e5cccSFangrui Song // CHECK: ret void 3547*207e5cccSFangrui Song void test_vst1_lane_f16(float16_t *a, float16x4_t b) { 3548*207e5cccSFangrui Song vst1_lane_f16(a, b, 3); 3549*207e5cccSFangrui Song } 3550*207e5cccSFangrui Song 3551*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_f32(ptr noundef %a, <2 x float> noundef %b) #0 { 3552*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3553*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 3554*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 3555*207e5cccSFangrui Song // CHECK: store float [[TMP3]], ptr %a 3556*207e5cccSFangrui Song // CHECK: ret void 3557*207e5cccSFangrui Song void test_vst1_lane_f32(float32_t *a, float32x2_t b) { 3558*207e5cccSFangrui Song vst1_lane_f32(a, b, 1); 3559*207e5cccSFangrui Song } 3560*207e5cccSFangrui Song 3561*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_f64(ptr noundef %a, <1 x double> noundef %b) #0 { 3562*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 3563*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 3564*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <1 x double> [[TMP2]], i32 0 3565*207e5cccSFangrui Song // CHECK: store double [[TMP3]], ptr %a 3566*207e5cccSFangrui Song // CHECK: ret void 3567*207e5cccSFangrui Song void test_vst1_lane_f64(float64_t *a, float64x1_t b) { 3568*207e5cccSFangrui Song vst1_lane_f64(a, b, 0); 3569*207e5cccSFangrui Song } 3570*207e5cccSFangrui Song 3571*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_p8(ptr noundef %a, <8 x i8> noundef %b) #0 { 3572*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 3573*207e5cccSFangrui Song // CHECK: store i8 [[TMP0]], ptr %a 3574*207e5cccSFangrui Song // CHECK: ret void 3575*207e5cccSFangrui Song void test_vst1_lane_p8(poly8_t *a, poly8x8_t b) { 3576*207e5cccSFangrui Song vst1_lane_p8(a, b, 7); 3577*207e5cccSFangrui Song } 3578*207e5cccSFangrui Song 3579*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_p16(ptr noundef %a, <4 x i16> noundef %b) #0 { 3580*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3581*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 3582*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3 3583*207e5cccSFangrui Song // CHECK: store i16 [[TMP3]], ptr %a 3584*207e5cccSFangrui Song // CHECK: ret void 3585*207e5cccSFangrui Song void test_vst1_lane_p16(poly16_t *a, poly16x4_t b) { 3586*207e5cccSFangrui Song vst1_lane_p16(a, b, 3); 3587*207e5cccSFangrui Song } 3588*207e5cccSFangrui Song 3589*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_lane_p64(ptr noundef %a, <1 x i64> noundef %b) #0 { 3590*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3591*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3592*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 3593*207e5cccSFangrui Song // CHECK: store i64 [[TMP3]], ptr %a 3594*207e5cccSFangrui Song // CHECK: ret void 3595*207e5cccSFangrui Song void test_vst1_lane_p64(poly64_t *a, poly64x1_t b) { 3596*207e5cccSFangrui Song vst1_lane_p64(a, b, 0); 3597*207e5cccSFangrui Song } 3598*207e5cccSFangrui Song 3599*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 { 3600*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 3601*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 3602*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0 3603*207e5cccSFangrui Song // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3604*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3605*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0 3606*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 3607*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 3608*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0 3609*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 3610*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 3611*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, ptr %a) 3612*207e5cccSFangrui Song // CHECK: ret void 3613*207e5cccSFangrui Song void test_vst2q_lane_u8(uint8_t *a, uint8x16x2_t b) { 3614*207e5cccSFangrui Song vst2q_lane_u8(a, b, 15); 3615*207e5cccSFangrui Song } 3616*207e5cccSFangrui Song 3617*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 { 3618*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 3619*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 3620*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0 3621*207e5cccSFangrui Song // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3622*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3623*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0 3624*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 3625*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 3626*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 3627*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0 3628*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 3629*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 3630*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 3631*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 3632*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 3633*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, ptr %a) 3634*207e5cccSFangrui Song // CHECK: ret void 3635*207e5cccSFangrui Song void test_vst2q_lane_u16(uint16_t *a, uint16x8x2_t b) { 3636*207e5cccSFangrui Song vst2q_lane_u16(a, b, 7); 3637*207e5cccSFangrui Song } 3638*207e5cccSFangrui Song 3639*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 { 3640*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 3641*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 3642*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0 3643*207e5cccSFangrui Song // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3644*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3645*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0 3646*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 3647*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 3648*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 3649*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0 3650*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 3651*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 3652*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 3653*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 3654*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 3655*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i64 3, ptr %a) 3656*207e5cccSFangrui Song // CHECK: ret void 3657*207e5cccSFangrui Song void test_vst2q_lane_u32(uint32_t *a, uint32x4x2_t b) { 3658*207e5cccSFangrui Song vst2q_lane_u32(a, b, 3); 3659*207e5cccSFangrui Song } 3660*207e5cccSFangrui Song 3661*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 { 3662*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 3663*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 3664*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0 3665*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3666*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3667*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0 3668*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 3669*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 3670*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 3671*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0 3672*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 3673*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 3674*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 3675*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 3676*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 3677*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, ptr %a) 3678*207e5cccSFangrui Song // CHECK: ret void 3679*207e5cccSFangrui Song void test_vst2q_lane_u64(uint64_t *a, uint64x2x2_t b) { 3680*207e5cccSFangrui Song vst2q_lane_u64(a, b, 1); 3681*207e5cccSFangrui Song } 3682*207e5cccSFangrui Song 3683*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 { 3684*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 3685*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 3686*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0 3687*207e5cccSFangrui Song // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3688*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3689*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0 3690*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 3691*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 3692*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0 3693*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 3694*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 3695*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, ptr %a) 3696*207e5cccSFangrui Song // CHECK: ret void 3697*207e5cccSFangrui Song void test_vst2q_lane_s8(int8_t *a, int8x16x2_t b) { 3698*207e5cccSFangrui Song vst2q_lane_s8(a, b, 15); 3699*207e5cccSFangrui Song } 3700*207e5cccSFangrui Song 3701*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 { 3702*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 3703*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 3704*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0 3705*207e5cccSFangrui Song // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3706*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3707*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0 3708*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 3709*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 3710*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 3711*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0 3712*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 3713*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 3714*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 3715*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 3716*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 3717*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, ptr %a) 3718*207e5cccSFangrui Song // CHECK: ret void 3719*207e5cccSFangrui Song void test_vst2q_lane_s16(int16_t *a, int16x8x2_t b) { 3720*207e5cccSFangrui Song vst2q_lane_s16(a, b, 7); 3721*207e5cccSFangrui Song } 3722*207e5cccSFangrui Song 3723*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 { 3724*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 3725*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 3726*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0 3727*207e5cccSFangrui Song // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3728*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3729*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0 3730*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 3731*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 3732*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 3733*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0 3734*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 3735*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 3736*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 3737*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 3738*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 3739*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], i64 3, ptr %a) 3740*207e5cccSFangrui Song // CHECK: ret void 3741*207e5cccSFangrui Song void test_vst2q_lane_s32(int32_t *a, int32x4x2_t b) { 3742*207e5cccSFangrui Song vst2q_lane_s32(a, b, 3); 3743*207e5cccSFangrui Song } 3744*207e5cccSFangrui Song 3745*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 { 3746*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 3747*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 3748*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0 3749*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3750*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3751*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0 3752*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 3753*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 3754*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 3755*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0 3756*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 3757*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 3758*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 3759*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 3760*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 3761*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, ptr %a) 3762*207e5cccSFangrui Song // CHECK: ret void 3763*207e5cccSFangrui Song void test_vst2q_lane_s64(int64_t *a, int64x2x2_t b) { 3764*207e5cccSFangrui Song vst2q_lane_s64(a, b, 1); 3765*207e5cccSFangrui Song } 3766*207e5cccSFangrui Song 3767*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f16(ptr noundef %a, [2 x <8 x half>] alignstack(16) %b.coerce) #0 { 3768*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 3769*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 3770*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0 3771*207e5cccSFangrui Song // CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3772*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3773*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0 3774*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0 3775*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 3776*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 3777*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0 3778*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 3779*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 3780*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 3781*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half> 3782*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half> 3783*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], i64 7, ptr %a) 3784*207e5cccSFangrui Song // CHECK: ret void 3785*207e5cccSFangrui Song void test_vst2q_lane_f16(float16_t *a, float16x8x2_t b) { 3786*207e5cccSFangrui Song vst2q_lane_f16(a, b, 7); 3787*207e5cccSFangrui Song } 3788*207e5cccSFangrui Song 3789*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f32(ptr noundef %a, [2 x <4 x float>] alignstack(16) %b.coerce) #0 { 3790*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 3791*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 3792*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0 3793*207e5cccSFangrui Song // CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3794*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3795*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0 3796*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0 3797*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 3798*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 3799*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0 3800*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 3801*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 3802*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 3803*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 3804*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 3805*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], i64 3, ptr %a) 3806*207e5cccSFangrui Song // CHECK: ret void 3807*207e5cccSFangrui Song void test_vst2q_lane_f32(float32_t *a, float32x4x2_t b) { 3808*207e5cccSFangrui Song vst2q_lane_f32(a, b, 3); 3809*207e5cccSFangrui Song } 3810*207e5cccSFangrui Song 3811*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f64(ptr noundef %a, [2 x <2 x double>] alignstack(16) %b.coerce) #0 { 3812*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 3813*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 3814*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0 3815*207e5cccSFangrui Song // CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3816*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3817*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 3818*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0 3819*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 3820*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 3821*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 3822*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 3823*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 3824*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 3825*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 3826*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 3827*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], i64 1, ptr %a) 3828*207e5cccSFangrui Song // CHECK: ret void 3829*207e5cccSFangrui Song void test_vst2q_lane_f64(float64_t *a, float64x2x2_t b) { 3830*207e5cccSFangrui Song vst2q_lane_f64(a, b, 1); 3831*207e5cccSFangrui Song } 3832*207e5cccSFangrui Song 3833*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 { 3834*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 3835*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 3836*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0 3837*207e5cccSFangrui Song // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3838*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3839*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0 3840*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 3841*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 3842*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0 3843*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 3844*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 3845*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, ptr %a) 3846*207e5cccSFangrui Song // CHECK: ret void 3847*207e5cccSFangrui Song void test_vst2q_lane_p8(poly8_t *a, poly8x16x2_t b) { 3848*207e5cccSFangrui Song vst2q_lane_p8(a, b, 15); 3849*207e5cccSFangrui Song } 3850*207e5cccSFangrui Song 3851*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 { 3852*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 3853*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 3854*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0 3855*207e5cccSFangrui Song // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3856*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3857*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0 3858*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 3859*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 3860*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 3861*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0 3862*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 3863*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 3864*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 3865*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 3866*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 3867*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], i64 7, ptr %a) 3868*207e5cccSFangrui Song // CHECK: ret void 3869*207e5cccSFangrui Song void test_vst2q_lane_p16(poly16_t *a, poly16x8x2_t b) { 3870*207e5cccSFangrui Song vst2q_lane_p16(a, b, 7); 3871*207e5cccSFangrui Song } 3872*207e5cccSFangrui Song 3873*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 { 3874*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 3875*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 3876*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0 3877*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 3878*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 3879*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 3880*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 3881*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 3882*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 3883*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 3884*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 3885*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 3886*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 3887*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 3888*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 3889*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i64 1, ptr %a) 3890*207e5cccSFangrui Song // CHECK: ret void 3891*207e5cccSFangrui Song void test_vst2q_lane_p64(poly64_t *a, poly64x2x2_t b) { 3892*207e5cccSFangrui Song vst2q_lane_p64(a, b, 1); 3893*207e5cccSFangrui Song } 3894*207e5cccSFangrui Song 3895*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_u8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 { 3896*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 3897*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 3898*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0 3899*207e5cccSFangrui Song // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3900*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 3901*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0 3902*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 3903*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 3904*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0 3905*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 3906*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 3907*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, ptr %a) 3908*207e5cccSFangrui Song // CHECK: ret void 3909*207e5cccSFangrui Song void test_vst2_lane_u8(uint8_t *a, uint8x8x2_t b) { 3910*207e5cccSFangrui Song vst2_lane_u8(a, b, 7); 3911*207e5cccSFangrui Song } 3912*207e5cccSFangrui Song 3913*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_u16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 { 3914*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 3915*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 3916*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0 3917*207e5cccSFangrui Song // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3918*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 3919*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0 3920*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 3921*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 3922*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 3923*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0 3924*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 3925*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 3926*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 3927*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 3928*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 3929*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, ptr %a) 3930*207e5cccSFangrui Song // CHECK: ret void 3931*207e5cccSFangrui Song void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) { 3932*207e5cccSFangrui Song vst2_lane_u16(a, b, 3); 3933*207e5cccSFangrui Song } 3934*207e5cccSFangrui Song 3935*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_u32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 { 3936*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 3937*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 3938*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0 3939*207e5cccSFangrui Song // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3940*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 3941*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0 3942*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 3943*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 3944*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 3945*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0 3946*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 3947*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 3948*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 3949*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 3950*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 3951*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i64 1, ptr %a) 3952*207e5cccSFangrui Song // CHECK: ret void 3953*207e5cccSFangrui Song void test_vst2_lane_u32(uint32_t *a, uint32x2x2_t b) { 3954*207e5cccSFangrui Song vst2_lane_u32(a, b, 1); 3955*207e5cccSFangrui Song } 3956*207e5cccSFangrui Song 3957*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_u64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 { 3958*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 3959*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 3960*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0 3961*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3962*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 3963*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0 3964*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 3965*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 3966*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 3967*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0 3968*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 3969*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 3970*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 3971*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 3972*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 3973*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, ptr %a) 3974*207e5cccSFangrui Song // CHECK: ret void 3975*207e5cccSFangrui Song void test_vst2_lane_u64(uint64_t *a, uint64x1x2_t b) { 3976*207e5cccSFangrui Song vst2_lane_u64(a, b, 0); 3977*207e5cccSFangrui Song } 3978*207e5cccSFangrui Song 3979*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_s8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 { 3980*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 3981*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 3982*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0 3983*207e5cccSFangrui Song // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 3984*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 3985*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0 3986*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 3987*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 3988*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0 3989*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 3990*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 3991*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, ptr %a) 3992*207e5cccSFangrui Song // CHECK: ret void 3993*207e5cccSFangrui Song void test_vst2_lane_s8(int8_t *a, int8x8x2_t b) { 3994*207e5cccSFangrui Song vst2_lane_s8(a, b, 7); 3995*207e5cccSFangrui Song } 3996*207e5cccSFangrui Song 3997*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_s16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 { 3998*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 3999*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 4000*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0 4001*207e5cccSFangrui Song // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4002*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4003*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0 4004*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 4005*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 4006*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 4007*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0 4008*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 4009*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 4010*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 4011*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 4012*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 4013*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, ptr %a) 4014*207e5cccSFangrui Song // CHECK: ret void 4015*207e5cccSFangrui Song void test_vst2_lane_s16(int16_t *a, int16x4x2_t b) { 4016*207e5cccSFangrui Song vst2_lane_s16(a, b, 3); 4017*207e5cccSFangrui Song } 4018*207e5cccSFangrui Song 4019*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_s32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 { 4020*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 4021*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 4022*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0 4023*207e5cccSFangrui Song // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4024*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4025*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0 4026*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 4027*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 4028*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 4029*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0 4030*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 4031*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 4032*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 4033*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 4034*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 4035*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], i64 1, ptr %a) 4036*207e5cccSFangrui Song // CHECK: ret void 4037*207e5cccSFangrui Song void test_vst2_lane_s32(int32_t *a, int32x2x2_t b) { 4038*207e5cccSFangrui Song vst2_lane_s32(a, b, 1); 4039*207e5cccSFangrui Song } 4040*207e5cccSFangrui Song 4041*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_s64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 { 4042*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 4043*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 4044*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0 4045*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4046*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4047*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0 4048*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 4049*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 4050*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 4051*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0 4052*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 4053*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 4054*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 4055*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 4056*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 4057*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, ptr %a) 4058*207e5cccSFangrui Song // CHECK: ret void 4059*207e5cccSFangrui Song void test_vst2_lane_s64(int64_t *a, int64x1x2_t b) { 4060*207e5cccSFangrui Song vst2_lane_s64(a, b, 0); 4061*207e5cccSFangrui Song } 4062*207e5cccSFangrui Song 4063*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_f16(ptr noundef %a, [2 x <4 x half>] alignstack(8) %b.coerce) #0 { 4064*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 4065*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 4066*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0 4067*207e5cccSFangrui Song // CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4068*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4069*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0 4070*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0 4071*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 4072*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 4073*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0 4074*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 4075*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 4076*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 4077*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half> 4078*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half> 4079*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], i64 3, ptr %a) 4080*207e5cccSFangrui Song // CHECK: ret void 4081*207e5cccSFangrui Song void test_vst2_lane_f16(float16_t *a, float16x4x2_t b) { 4082*207e5cccSFangrui Song vst2_lane_f16(a, b, 3); 4083*207e5cccSFangrui Song } 4084*207e5cccSFangrui Song 4085*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_f32(ptr noundef %a, [2 x <2 x float>] alignstack(8) %b.coerce) #0 { 4086*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 4087*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 4088*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0 4089*207e5cccSFangrui Song // CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4090*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4091*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0 4092*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0 4093*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 4094*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 4095*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0 4096*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 4097*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 4098*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 4099*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 4100*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 4101*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], i64 1, ptr %a) 4102*207e5cccSFangrui Song // CHECK: ret void 4103*207e5cccSFangrui Song void test_vst2_lane_f32(float32_t *a, float32x2x2_t b) { 4104*207e5cccSFangrui Song vst2_lane_f32(a, b, 1); 4105*207e5cccSFangrui Song } 4106*207e5cccSFangrui Song 4107*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_f64(ptr noundef %a, [2 x <1 x double>] alignstack(8) %b.coerce) #0 { 4108*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 4109*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 4110*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0 4111*207e5cccSFangrui Song // CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4112*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4113*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 4114*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0 4115*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 4116*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 4117*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 4118*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 4119*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 4120*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 4121*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 4122*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 4123*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], i64 0, ptr %a) 4124*207e5cccSFangrui Song // CHECK: ret void 4125*207e5cccSFangrui Song void test_vst2_lane_f64(float64_t *a, float64x1x2_t b) { 4126*207e5cccSFangrui Song vst2_lane_f64(a, b, 0); 4127*207e5cccSFangrui Song } 4128*207e5cccSFangrui Song 4129*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_p8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 { 4130*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 4131*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 4132*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0 4133*207e5cccSFangrui Song // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4134*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4135*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0 4136*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 4137*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 4138*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0 4139*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 4140*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 4141*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, ptr %a) 4142*207e5cccSFangrui Song // CHECK: ret void 4143*207e5cccSFangrui Song void test_vst2_lane_p8(poly8_t *a, poly8x8x2_t b) { 4144*207e5cccSFangrui Song vst2_lane_p8(a, b, 7); 4145*207e5cccSFangrui Song } 4146*207e5cccSFangrui Song 4147*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_p16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 { 4148*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 4149*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 4150*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0 4151*207e5cccSFangrui Song // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4152*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4153*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0 4154*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 4155*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 4156*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 4157*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0 4158*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 4159*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 4160*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 4161*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 4162*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 4163*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], i64 3, ptr %a) 4164*207e5cccSFangrui Song // CHECK: ret void 4165*207e5cccSFangrui Song void test_vst2_lane_p16(poly16_t *a, poly16x4x2_t b) { 4166*207e5cccSFangrui Song vst2_lane_p16(a, b, 3); 4167*207e5cccSFangrui Song } 4168*207e5cccSFangrui Song 4169*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_lane_p64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 { 4170*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 4171*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 4172*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0 4173*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4174*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 4175*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 4176*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 4177*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 4178*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 4179*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 4180*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 4181*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 4182*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 4183*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 4184*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 4185*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i64 0, ptr %a) 4186*207e5cccSFangrui Song // CHECK: ret void 4187*207e5cccSFangrui Song void test_vst2_lane_p64(poly64_t *a, poly64x1x2_t b) { 4188*207e5cccSFangrui Song vst2_lane_p64(a, b, 0); 4189*207e5cccSFangrui Song } 4190*207e5cccSFangrui Song 4191*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 { 4192*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 4193*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 4194*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0 4195*207e5cccSFangrui Song // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4196*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4197*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 4198*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 4199*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 4200*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 4201*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 4202*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 4203*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 4204*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 4205*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 4206*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %a) 4207*207e5cccSFangrui Song // CHECK: ret void 4208*207e5cccSFangrui Song void test_vst3q_lane_u8(uint8_t *a, uint8x16x3_t b) { 4209*207e5cccSFangrui Song vst3q_lane_u8(a, b, 15); 4210*207e5cccSFangrui Song } 4211*207e5cccSFangrui Song 4212*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 { 4213*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 4214*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 4215*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0 4216*207e5cccSFangrui Song // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4217*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4218*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 4219*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 4220*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 4221*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 4222*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 4223*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 4224*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 4225*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 4226*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 4227*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 4228*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 4229*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 4230*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 4231*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 4232*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 4233*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, ptr %a) 4234*207e5cccSFangrui Song // CHECK: ret void 4235*207e5cccSFangrui Song void test_vst3q_lane_u16(uint16_t *a, uint16x8x3_t b) { 4236*207e5cccSFangrui Song vst3q_lane_u16(a, b, 7); 4237*207e5cccSFangrui Song } 4238*207e5cccSFangrui Song 4239*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 { 4240*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 4241*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 4242*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0 4243*207e5cccSFangrui Song // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4244*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4245*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 4246*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 4247*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 4248*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 4249*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 4250*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 4251*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 4252*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 4253*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 4254*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 4255*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 4256*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 4257*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 4258*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 4259*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 4260*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, ptr %a) 4261*207e5cccSFangrui Song // CHECK: ret void 4262*207e5cccSFangrui Song void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) { 4263*207e5cccSFangrui Song vst3q_lane_u32(a, b, 3); 4264*207e5cccSFangrui Song } 4265*207e5cccSFangrui Song 4266*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 { 4267*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 4268*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 4269*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0 4270*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4271*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4272*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 4273*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 4274*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 4275*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 4276*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 4277*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 4278*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 4279*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 4280*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 4281*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 4282*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 4283*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 4284*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 4285*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 4286*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 4287*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, ptr %a) 4288*207e5cccSFangrui Song // CHECK: ret void 4289*207e5cccSFangrui Song void test_vst3q_lane_u64(uint64_t *a, uint64x2x3_t b) { 4290*207e5cccSFangrui Song vst3q_lane_u64(a, b, 1); 4291*207e5cccSFangrui Song } 4292*207e5cccSFangrui Song 4293*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 { 4294*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 4295*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 4296*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0 4297*207e5cccSFangrui Song // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4298*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4299*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 4300*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 4301*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 4302*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 4303*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 4304*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 4305*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 4306*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 4307*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 4308*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %a) 4309*207e5cccSFangrui Song // CHECK: ret void 4310*207e5cccSFangrui Song void test_vst3q_lane_s8(int8_t *a, int8x16x3_t b) { 4311*207e5cccSFangrui Song vst3q_lane_s8(a, b, 15); 4312*207e5cccSFangrui Song } 4313*207e5cccSFangrui Song 4314*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 { 4315*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 4316*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 4317*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0 4318*207e5cccSFangrui Song // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4319*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4320*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 4321*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 4322*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 4323*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 4324*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 4325*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 4326*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 4327*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 4328*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 4329*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 4330*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 4331*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 4332*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 4333*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 4334*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 4335*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, ptr %a) 4336*207e5cccSFangrui Song // CHECK: ret void 4337*207e5cccSFangrui Song void test_vst3q_lane_s16(int16_t *a, int16x8x3_t b) { 4338*207e5cccSFangrui Song vst3q_lane_s16(a, b, 7); 4339*207e5cccSFangrui Song } 4340*207e5cccSFangrui Song 4341*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 { 4342*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 4343*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 4344*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0 4345*207e5cccSFangrui Song // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4346*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4347*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 4348*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 4349*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 4350*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 4351*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 4352*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 4353*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 4354*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 4355*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 4356*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 4357*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 4358*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 4359*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 4360*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 4361*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 4362*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, ptr %a) 4363*207e5cccSFangrui Song // CHECK: ret void 4364*207e5cccSFangrui Song void test_vst3q_lane_s32(int32_t *a, int32x4x3_t b) { 4365*207e5cccSFangrui Song vst3q_lane_s32(a, b, 3); 4366*207e5cccSFangrui Song } 4367*207e5cccSFangrui Song 4368*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 { 4369*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 4370*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 4371*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0 4372*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4373*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4374*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 4375*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 4376*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 4377*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 4378*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 4379*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 4380*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 4381*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 4382*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 4383*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 4384*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 4385*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 4386*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 4387*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 4388*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 4389*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, ptr %a) 4390*207e5cccSFangrui Song // CHECK: ret void 4391*207e5cccSFangrui Song void test_vst3q_lane_s64(int64_t *a, int64x2x3_t b) { 4392*207e5cccSFangrui Song vst3q_lane_s64(a, b, 1); 4393*207e5cccSFangrui Song } 4394*207e5cccSFangrui Song 4395*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f16(ptr noundef %a, [3 x <8 x half>] alignstack(16) %b.coerce) #0 { 4396*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 4397*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 4398*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0 4399*207e5cccSFangrui Song // CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4400*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4401*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 4402*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0 4403*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 4404*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 4405*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 4406*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 4407*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 4408*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 4409*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 4410*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2 4411*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16 4412*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 4413*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half> 4414*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half> 4415*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half> 4416*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], i64 7, ptr %a) 4417*207e5cccSFangrui Song // CHECK: ret void 4418*207e5cccSFangrui Song void test_vst3q_lane_f16(float16_t *a, float16x8x3_t b) { 4419*207e5cccSFangrui Song vst3q_lane_f16(a, b, 7); 4420*207e5cccSFangrui Song } 4421*207e5cccSFangrui Song 4422*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f32(ptr noundef %a, [3 x <4 x float>] alignstack(16) %b.coerce) #0 { 4423*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 4424*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 4425*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0 4426*207e5cccSFangrui Song // CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4427*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4428*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 4429*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0 4430*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 4431*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 4432*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 4433*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 4434*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 4435*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 4436*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 4437*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2 4438*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16 4439*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 4440*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 4441*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 4442*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 4443*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i64 3, ptr %a) 4444*207e5cccSFangrui Song // CHECK: ret void 4445*207e5cccSFangrui Song void test_vst3q_lane_f32(float32_t *a, float32x4x3_t b) { 4446*207e5cccSFangrui Song vst3q_lane_f32(a, b, 3); 4447*207e5cccSFangrui Song } 4448*207e5cccSFangrui Song 4449*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f64(ptr noundef %a, [3 x <2 x double>] alignstack(16) %b.coerce) #0 { 4450*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 4451*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 4452*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0 4453*207e5cccSFangrui Song // CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4454*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4455*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 4456*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0 4457*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 4458*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 4459*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 4460*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 4461*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 4462*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 4463*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 4464*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 4465*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 4466*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 4467*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 4468*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 4469*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 4470*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i64 1, ptr %a) 4471*207e5cccSFangrui Song // CHECK: ret void 4472*207e5cccSFangrui Song void test_vst3q_lane_f64(float64_t *a, float64x2x3_t b) { 4473*207e5cccSFangrui Song vst3q_lane_f64(a, b, 1); 4474*207e5cccSFangrui Song } 4475*207e5cccSFangrui Song 4476*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 { 4477*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 4478*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 4479*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0 4480*207e5cccSFangrui Song // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4481*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4482*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 4483*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 4484*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 4485*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 4486*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 4487*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 4488*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 4489*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 4490*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 4491*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, ptr %a) 4492*207e5cccSFangrui Song // CHECK: ret void 4493*207e5cccSFangrui Song void test_vst3q_lane_p8(poly8_t *a, poly8x16x3_t b) { 4494*207e5cccSFangrui Song vst3q_lane_p8(a, b, 15); 4495*207e5cccSFangrui Song } 4496*207e5cccSFangrui Song 4497*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 { 4498*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 4499*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 4500*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0 4501*207e5cccSFangrui Song // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4502*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4503*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 4504*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 4505*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 4506*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 4507*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 4508*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 4509*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 4510*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 4511*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 4512*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 4513*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 4514*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 4515*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 4516*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 4517*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 4518*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, ptr %a) 4519*207e5cccSFangrui Song // CHECK: ret void 4520*207e5cccSFangrui Song void test_vst3q_lane_p16(poly16_t *a, poly16x8x3_t b) { 4521*207e5cccSFangrui Song vst3q_lane_p16(a, b, 7); 4522*207e5cccSFangrui Song } 4523*207e5cccSFangrui Song 4524*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 { 4525*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 4526*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 4527*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0 4528*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4529*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 4530*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 4531*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 4532*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 4533*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 4534*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 4535*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 4536*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 4537*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 4538*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 4539*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 4540*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 4541*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 4542*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 4543*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 4544*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 4545*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, ptr %a) 4546*207e5cccSFangrui Song // CHECK: ret void 4547*207e5cccSFangrui Song void test_vst3q_lane_p64(poly64_t *a, poly64x2x3_t b) { 4548*207e5cccSFangrui Song vst3q_lane_p64(a, b, 1); 4549*207e5cccSFangrui Song } 4550*207e5cccSFangrui Song 4551*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_u8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 { 4552*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 4553*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 4554*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0 4555*207e5cccSFangrui Song // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4556*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4557*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 4558*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 4559*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 4560*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 4561*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 4562*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 4563*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 4564*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 4565*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 4566*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a) 4567*207e5cccSFangrui Song // CHECK: ret void 4568*207e5cccSFangrui Song void test_vst3_lane_u8(uint8_t *a, uint8x8x3_t b) { 4569*207e5cccSFangrui Song vst3_lane_u8(a, b, 7); 4570*207e5cccSFangrui Song } 4571*207e5cccSFangrui Song 4572*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_u16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 { 4573*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 4574*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 4575*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0 4576*207e5cccSFangrui Song // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4577*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4578*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 4579*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 4580*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 4581*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 4582*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 4583*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 4584*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 4585*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 4586*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 4587*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 4588*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 4589*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 4590*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 4591*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 4592*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 4593*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, ptr %a) 4594*207e5cccSFangrui Song // CHECK: ret void 4595*207e5cccSFangrui Song void test_vst3_lane_u16(uint16_t *a, uint16x4x3_t b) { 4596*207e5cccSFangrui Song vst3_lane_u16(a, b, 3); 4597*207e5cccSFangrui Song } 4598*207e5cccSFangrui Song 4599*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_u32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 { 4600*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 4601*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 4602*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0 4603*207e5cccSFangrui Song // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4604*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4605*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 4606*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 4607*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 4608*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 4609*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 4610*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 4611*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 4612*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 4613*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 4614*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 4615*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 4616*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 4617*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 4618*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 4619*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 4620*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, ptr %a) 4621*207e5cccSFangrui Song // CHECK: ret void 4622*207e5cccSFangrui Song void test_vst3_lane_u32(uint32_t *a, uint32x2x3_t b) { 4623*207e5cccSFangrui Song vst3_lane_u32(a, b, 1); 4624*207e5cccSFangrui Song } 4625*207e5cccSFangrui Song 4626*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_u64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 { 4627*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 4628*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 4629*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0 4630*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4631*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4632*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 4633*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 4634*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 4635*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 4636*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 4637*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 4638*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 4639*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 4640*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 4641*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 4642*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 4643*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 4644*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 4645*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 4646*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 4647*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, ptr %a) 4648*207e5cccSFangrui Song // CHECK: ret void 4649*207e5cccSFangrui Song void test_vst3_lane_u64(uint64_t *a, uint64x1x3_t b) { 4650*207e5cccSFangrui Song vst3_lane_u64(a, b, 0); 4651*207e5cccSFangrui Song } 4652*207e5cccSFangrui Song 4653*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_s8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 { 4654*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 4655*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 4656*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0 4657*207e5cccSFangrui Song // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4658*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4659*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 4660*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 4661*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 4662*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 4663*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 4664*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 4665*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 4666*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 4667*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 4668*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a) 4669*207e5cccSFangrui Song // CHECK: ret void 4670*207e5cccSFangrui Song void test_vst3_lane_s8(int8_t *a, int8x8x3_t b) { 4671*207e5cccSFangrui Song vst3_lane_s8(a, b, 7); 4672*207e5cccSFangrui Song } 4673*207e5cccSFangrui Song 4674*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_s16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 { 4675*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 4676*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 4677*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0 4678*207e5cccSFangrui Song // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4679*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4680*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 4681*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 4682*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 4683*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 4684*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 4685*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 4686*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 4687*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 4688*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 4689*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 4690*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 4691*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 4692*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 4693*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 4694*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 4695*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, ptr %a) 4696*207e5cccSFangrui Song // CHECK: ret void 4697*207e5cccSFangrui Song void test_vst3_lane_s16(int16_t *a, int16x4x3_t b) { 4698*207e5cccSFangrui Song vst3_lane_s16(a, b, 3); 4699*207e5cccSFangrui Song } 4700*207e5cccSFangrui Song 4701*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_s32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 { 4702*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 4703*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 4704*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0 4705*207e5cccSFangrui Song // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4706*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4707*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 4708*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 4709*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 4710*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 4711*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 4712*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 4713*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 4714*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 4715*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 4716*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 4717*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 4718*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 4719*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 4720*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 4721*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 4722*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, ptr %a) 4723*207e5cccSFangrui Song // CHECK: ret void 4724*207e5cccSFangrui Song void test_vst3_lane_s32(int32_t *a, int32x2x3_t b) { 4725*207e5cccSFangrui Song vst3_lane_s32(a, b, 1); 4726*207e5cccSFangrui Song } 4727*207e5cccSFangrui Song 4728*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_s64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 { 4729*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 4730*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 4731*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0 4732*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4733*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4734*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 4735*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 4736*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 4737*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 4738*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 4739*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 4740*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 4741*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 4742*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 4743*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 4744*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 4745*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 4746*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 4747*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 4748*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 4749*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, ptr %a) 4750*207e5cccSFangrui Song // CHECK: ret void 4751*207e5cccSFangrui Song void test_vst3_lane_s64(int64_t *a, int64x1x3_t b) { 4752*207e5cccSFangrui Song vst3_lane_s64(a, b, 0); 4753*207e5cccSFangrui Song } 4754*207e5cccSFangrui Song 4755*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_f16(ptr noundef %a, [3 x <4 x half>] alignstack(8) %b.coerce) #0 { 4756*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 4757*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 4758*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0 4759*207e5cccSFangrui Song // CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4760*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4761*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 4762*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0 4763*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 4764*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 4765*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 4766*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 4767*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 4768*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 4769*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 4770*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2 4771*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8 4772*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 4773*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half> 4774*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half> 4775*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half> 4776*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], i64 3, ptr %a) 4777*207e5cccSFangrui Song // CHECK: ret void 4778*207e5cccSFangrui Song void test_vst3_lane_f16(float16_t *a, float16x4x3_t b) { 4779*207e5cccSFangrui Song vst3_lane_f16(a, b, 3); 4780*207e5cccSFangrui Song } 4781*207e5cccSFangrui Song 4782*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_f32(ptr noundef %a, [3 x <2 x float>] alignstack(8) %b.coerce) #0 { 4783*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 4784*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 4785*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0 4786*207e5cccSFangrui Song // CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4787*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4788*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 4789*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0 4790*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 4791*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 4792*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 4793*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 4794*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 4795*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 4796*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 4797*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2 4798*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8 4799*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 4800*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 4801*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 4802*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 4803*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i64 1, ptr %a) 4804*207e5cccSFangrui Song // CHECK: ret void 4805*207e5cccSFangrui Song void test_vst3_lane_f32(float32_t *a, float32x2x3_t b) { 4806*207e5cccSFangrui Song vst3_lane_f32(a, b, 1); 4807*207e5cccSFangrui Song } 4808*207e5cccSFangrui Song 4809*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_f64(ptr noundef %a, [3 x <1 x double>] alignstack(8) %b.coerce) #0 { 4810*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 4811*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 4812*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0 4813*207e5cccSFangrui Song // CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4814*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4815*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 4816*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0 4817*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 4818*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 4819*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 4820*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 4821*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 4822*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 4823*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 4824*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 4825*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 4826*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 4827*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 4828*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 4829*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 4830*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i64 0, ptr %a) 4831*207e5cccSFangrui Song // CHECK: ret void 4832*207e5cccSFangrui Song void test_vst3_lane_f64(float64_t *a, float64x1x3_t b) { 4833*207e5cccSFangrui Song vst3_lane_f64(a, b, 0); 4834*207e5cccSFangrui Song } 4835*207e5cccSFangrui Song 4836*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_p8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 { 4837*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 4838*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 4839*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0 4840*207e5cccSFangrui Song // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4841*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4842*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 4843*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 4844*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 4845*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 4846*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 4847*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 4848*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 4849*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 4850*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 4851*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, ptr %a) 4852*207e5cccSFangrui Song // CHECK: ret void 4853*207e5cccSFangrui Song void test_vst3_lane_p8(poly8_t *a, poly8x8x3_t b) { 4854*207e5cccSFangrui Song vst3_lane_p8(a, b, 7); 4855*207e5cccSFangrui Song } 4856*207e5cccSFangrui Song 4857*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_p16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 { 4858*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 4859*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 4860*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0 4861*207e5cccSFangrui Song // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4862*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4863*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 4864*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 4865*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 4866*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 4867*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 4868*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 4869*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 4870*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 4871*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 4872*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 4873*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 4874*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 4875*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 4876*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 4877*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 4878*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, ptr %a) 4879*207e5cccSFangrui Song // CHECK: ret void 4880*207e5cccSFangrui Song void test_vst3_lane_p16(poly16_t *a, poly16x4x3_t b) { 4881*207e5cccSFangrui Song vst3_lane_p16(a, b, 3); 4882*207e5cccSFangrui Song } 4883*207e5cccSFangrui Song 4884*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_lane_p64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 { 4885*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 4886*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 4887*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0 4888*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 4889*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 4890*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 4891*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 4892*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 4893*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 4894*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 4895*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 4896*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 4897*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 4898*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 4899*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 4900*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 4901*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 4902*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 4903*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 4904*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 4905*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, ptr %a) 4906*207e5cccSFangrui Song // CHECK: ret void 4907*207e5cccSFangrui Song void test_vst3_lane_p64(poly64_t *a, poly64x1x3_t b) { 4908*207e5cccSFangrui Song vst3_lane_p64(a, b, 0); 4909*207e5cccSFangrui Song } 4910*207e5cccSFangrui Song 4911*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 { 4912*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 4913*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 4914*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0 4915*207e5cccSFangrui Song // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4916*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 4917*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 4918*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 4919*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 4920*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 4921*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 4922*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 4923*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 4924*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 4925*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 4926*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 4927*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 4928*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 4929*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a) 4930*207e5cccSFangrui Song // CHECK: ret void 4931*207e5cccSFangrui Song void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) { 4932*207e5cccSFangrui Song vst4q_lane_u8(a, b, 15); 4933*207e5cccSFangrui Song } 4934*207e5cccSFangrui Song 4935*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 { 4936*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 4937*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 4938*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0 4939*207e5cccSFangrui Song // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4940*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 4941*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 4942*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 4943*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 4944*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 4945*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 4946*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 4947*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 4948*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 4949*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 4950*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 4951*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 4952*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 4953*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 4954*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 4955*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 4956*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 4957*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 4958*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 4959*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 4960*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 4961*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, ptr %a) 4962*207e5cccSFangrui Song // CHECK: ret void 4963*207e5cccSFangrui Song void test_vst4q_lane_u16(uint16_t *a, uint16x8x4_t b) { 4964*207e5cccSFangrui Song vst4q_lane_u16(a, b, 7); 4965*207e5cccSFangrui Song } 4966*207e5cccSFangrui Song 4967*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 { 4968*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 4969*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 4970*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0 4971*207e5cccSFangrui Song // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 4972*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 4973*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 4974*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 4975*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 4976*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 4977*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 4978*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 4979*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 4980*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 4981*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 4982*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 4983*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 4984*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 4985*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 4986*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3 4987*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16 4988*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 4989*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 4990*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 4991*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 4992*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 4993*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, ptr %a) 4994*207e5cccSFangrui Song // CHECK: ret void 4995*207e5cccSFangrui Song void test_vst4q_lane_u32(uint32_t *a, uint32x4x4_t b) { 4996*207e5cccSFangrui Song vst4q_lane_u32(a, b, 3); 4997*207e5cccSFangrui Song } 4998*207e5cccSFangrui Song 4999*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 { 5000*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 5001*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 5002*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0 5003*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5004*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5005*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 5006*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 5007*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 5008*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 5009*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 5010*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 5011*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 5012*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 5013*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 5014*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 5015*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 5016*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 5017*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 5018*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 5019*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 5020*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 5021*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 5022*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 5023*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 5024*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 5025*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, ptr %a) 5026*207e5cccSFangrui Song // CHECK: ret void 5027*207e5cccSFangrui Song void test_vst4q_lane_u64(uint64_t *a, uint64x2x4_t b) { 5028*207e5cccSFangrui Song vst4q_lane_u64(a, b, 1); 5029*207e5cccSFangrui Song } 5030*207e5cccSFangrui Song 5031*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 { 5032*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 5033*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 5034*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0 5035*207e5cccSFangrui Song // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5036*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5037*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 5038*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 5039*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 5040*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 5041*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 5042*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 5043*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 5044*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 5045*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 5046*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 5047*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 5048*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 5049*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a) 5050*207e5cccSFangrui Song // CHECK: ret void 5051*207e5cccSFangrui Song void test_vst4q_lane_s8(int8_t *a, int8x16x4_t b) { 5052*207e5cccSFangrui Song vst4q_lane_s8(a, b, 15); 5053*207e5cccSFangrui Song } 5054*207e5cccSFangrui Song 5055*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 { 5056*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 5057*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 5058*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0 5059*207e5cccSFangrui Song // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5060*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5061*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 5062*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 5063*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 5064*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 5065*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 5066*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 5067*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 5068*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5069*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 5070*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 5071*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 5072*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5073*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 5074*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 5075*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 5076*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 5077*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 5078*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5079*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5080*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 5081*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, ptr %a) 5082*207e5cccSFangrui Song // CHECK: ret void 5083*207e5cccSFangrui Song void test_vst4q_lane_s16(int16_t *a, int16x8x4_t b) { 5084*207e5cccSFangrui Song vst4q_lane_s16(a, b, 7); 5085*207e5cccSFangrui Song } 5086*207e5cccSFangrui Song 5087*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 { 5088*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 5089*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 5090*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0 5091*207e5cccSFangrui Song // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5092*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5093*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 5094*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 5095*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 5096*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 5097*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 5098*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 5099*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 5100*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 5101*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 5102*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 5103*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 5104*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 5105*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 5106*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3 5107*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16 5108*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 5109*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 5110*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 5111*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 5112*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 5113*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, ptr %a) 5114*207e5cccSFangrui Song // CHECK: ret void 5115*207e5cccSFangrui Song void test_vst4q_lane_s32(int32_t *a, int32x4x4_t b) { 5116*207e5cccSFangrui Song vst4q_lane_s32(a, b, 3); 5117*207e5cccSFangrui Song } 5118*207e5cccSFangrui Song 5119*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 { 5120*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 5121*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 5122*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0 5123*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5124*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5125*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 5126*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 5127*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 5128*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 5129*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 5130*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 5131*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 5132*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 5133*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 5134*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 5135*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 5136*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 5137*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 5138*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 5139*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 5140*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 5141*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 5142*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 5143*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 5144*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 5145*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, ptr %a) 5146*207e5cccSFangrui Song // CHECK: ret void 5147*207e5cccSFangrui Song void test_vst4q_lane_s64(int64_t *a, int64x2x4_t b) { 5148*207e5cccSFangrui Song vst4q_lane_s64(a, b, 1); 5149*207e5cccSFangrui Song } 5150*207e5cccSFangrui Song 5151*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f16(ptr noundef %a, [4 x <8 x half>] alignstack(16) %b.coerce) #0 { 5152*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 5153*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 5154*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0 5155*207e5cccSFangrui Song // CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5156*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5157*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 5158*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0 5159*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 5160*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 5161*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 5162*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 5163*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 5164*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 5165*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 5166*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2 5167*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16 5168*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 5169*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 5170*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3 5171*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16 5172*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 5173*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half> 5174*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half> 5175*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half> 5176*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half> 5177*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], i64 7, ptr %a) 5178*207e5cccSFangrui Song // CHECK: ret void 5179*207e5cccSFangrui Song void test_vst4q_lane_f16(float16_t *a, float16x8x4_t b) { 5180*207e5cccSFangrui Song vst4q_lane_f16(a, b, 7); 5181*207e5cccSFangrui Song } 5182*207e5cccSFangrui Song 5183*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f32(ptr noundef %a, [4 x <4 x float>] alignstack(16) %b.coerce) #0 { 5184*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 5185*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 5186*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0 5187*207e5cccSFangrui Song // CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5188*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5189*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 5190*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0 5191*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 5192*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 5193*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 5194*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 5195*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 5196*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 5197*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 5198*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2 5199*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16 5200*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 5201*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 5202*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3 5203*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16 5204*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 5205*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 5206*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 5207*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 5208*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 5209*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i64 3, ptr %a) 5210*207e5cccSFangrui Song // CHECK: ret void 5211*207e5cccSFangrui Song void test_vst4q_lane_f32(float32_t *a, float32x4x4_t b) { 5212*207e5cccSFangrui Song vst4q_lane_f32(a, b, 3); 5213*207e5cccSFangrui Song } 5214*207e5cccSFangrui Song 5215*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f64(ptr noundef %a, [4 x <2 x double>] alignstack(16) %b.coerce) #0 { 5216*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 5217*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 5218*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0 5219*207e5cccSFangrui Song // CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5220*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5221*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 5222*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0 5223*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 5224*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 5225*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 5226*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 5227*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 5228*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 5229*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 5230*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 5231*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 5232*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 5233*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 5234*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3 5235*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16 5236*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 5237*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 5238*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 5239*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 5240*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 5241*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i64 1, ptr %a) 5242*207e5cccSFangrui Song // CHECK: ret void 5243*207e5cccSFangrui Song void test_vst4q_lane_f64(float64_t *a, float64x2x4_t b) { 5244*207e5cccSFangrui Song vst4q_lane_f64(a, b, 1); 5245*207e5cccSFangrui Song } 5246*207e5cccSFangrui Song 5247*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 { 5248*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 5249*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 5250*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0 5251*207e5cccSFangrui Song // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5252*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5253*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 5254*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 5255*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 5256*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 5257*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 5258*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 5259*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 5260*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 5261*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 5262*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 5263*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 5264*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 5265*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, ptr %a) 5266*207e5cccSFangrui Song // CHECK: ret void 5267*207e5cccSFangrui Song void test_vst4q_lane_p8(poly8_t *a, poly8x16x4_t b) { 5268*207e5cccSFangrui Song vst4q_lane_p8(a, b, 15); 5269*207e5cccSFangrui Song } 5270*207e5cccSFangrui Song 5271*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 { 5272*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 5273*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 5274*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0 5275*207e5cccSFangrui Song // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5276*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5277*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 5278*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 5279*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 5280*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 5281*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 5282*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 5283*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 5284*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 5285*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 5286*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 5287*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 5288*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 5289*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 5290*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 5291*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 5292*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 5293*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 5294*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 5295*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 5296*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 5297*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, ptr %a) 5298*207e5cccSFangrui Song // CHECK: ret void 5299*207e5cccSFangrui Song void test_vst4q_lane_p16(poly16_t *a, poly16x8x4_t b) { 5300*207e5cccSFangrui Song vst4q_lane_p16(a, b, 7); 5301*207e5cccSFangrui Song } 5302*207e5cccSFangrui Song 5303*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 { 5304*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 5305*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 5306*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0 5307*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 5308*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 5309*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 5310*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 5311*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 5312*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 5313*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 5314*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 5315*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 5316*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 5317*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 5318*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 5319*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 5320*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 5321*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 5322*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 5323*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 5324*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 5325*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 5326*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 5327*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 5328*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 5329*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, ptr %a) 5330*207e5cccSFangrui Song // CHECK: ret void 5331*207e5cccSFangrui Song void test_vst4q_lane_p64(poly64_t *a, poly64x2x4_t b) { 5332*207e5cccSFangrui Song vst4q_lane_p64(a, b, 1); 5333*207e5cccSFangrui Song } 5334*207e5cccSFangrui Song 5335*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_u8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 { 5336*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 5337*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 5338*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0 5339*207e5cccSFangrui Song // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5340*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5341*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 5342*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 5343*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 5344*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 5345*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 5346*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 5347*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 5348*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 5349*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 5350*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 5351*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 5352*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 5353*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a) 5354*207e5cccSFangrui Song // CHECK: ret void 5355*207e5cccSFangrui Song void test_vst4_lane_u8(uint8_t *a, uint8x8x4_t b) { 5356*207e5cccSFangrui Song vst4_lane_u8(a, b, 7); 5357*207e5cccSFangrui Song } 5358*207e5cccSFangrui Song 5359*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_u16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 { 5360*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 5361*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 5362*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0 5363*207e5cccSFangrui Song // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5364*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5365*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 5366*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 5367*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 5368*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 5369*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 5370*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 5371*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 5372*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5373*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 5374*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 5375*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 5376*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5377*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 5378*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 5379*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 5380*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 5381*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 5382*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5383*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5384*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 5385*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, ptr %a) 5386*207e5cccSFangrui Song // CHECK: ret void 5387*207e5cccSFangrui Song void test_vst4_lane_u16(uint16_t *a, uint16x4x4_t b) { 5388*207e5cccSFangrui Song vst4_lane_u16(a, b, 3); 5389*207e5cccSFangrui Song } 5390*207e5cccSFangrui Song 5391*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_u32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 { 5392*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 5393*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 5394*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0 5395*207e5cccSFangrui Song // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5396*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5397*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 5398*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 5399*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 5400*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 5401*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 5402*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 5403*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 5404*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5405*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 5406*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 5407*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 5408*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 5409*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 5410*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3 5411*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8 5412*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 5413*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 5414*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5415*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 5416*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 5417*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, ptr %a) 5418*207e5cccSFangrui Song // CHECK: ret void 5419*207e5cccSFangrui Song void test_vst4_lane_u32(uint32_t *a, uint32x2x4_t b) { 5420*207e5cccSFangrui Song vst4_lane_u32(a, b, 1); 5421*207e5cccSFangrui Song } 5422*207e5cccSFangrui Song 5423*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_u64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 { 5424*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 5425*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 5426*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0 5427*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5428*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5429*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 5430*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 5431*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 5432*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 5433*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 5434*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 5435*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 5436*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 5437*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 5438*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 5439*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 5440*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 5441*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 5442*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 5443*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 5444*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 5445*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 5446*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 5447*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 5448*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 5449*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, ptr %a) 5450*207e5cccSFangrui Song // CHECK: ret void 5451*207e5cccSFangrui Song void test_vst4_lane_u64(uint64_t *a, uint64x1x4_t b) { 5452*207e5cccSFangrui Song vst4_lane_u64(a, b, 0); 5453*207e5cccSFangrui Song } 5454*207e5cccSFangrui Song 5455*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_s8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 { 5456*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 5457*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 5458*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0 5459*207e5cccSFangrui Song // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5460*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5461*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 5462*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 5463*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 5464*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 5465*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 5466*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 5467*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 5468*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 5469*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 5470*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 5471*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 5472*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 5473*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a) 5474*207e5cccSFangrui Song // CHECK: ret void 5475*207e5cccSFangrui Song void test_vst4_lane_s8(int8_t *a, int8x8x4_t b) { 5476*207e5cccSFangrui Song vst4_lane_s8(a, b, 7); 5477*207e5cccSFangrui Song } 5478*207e5cccSFangrui Song 5479*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_s16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 { 5480*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 5481*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 5482*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0 5483*207e5cccSFangrui Song // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5484*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5485*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 5486*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 5487*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 5488*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 5489*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 5490*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 5491*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 5492*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5493*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 5494*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 5495*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 5496*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5497*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 5498*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 5499*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 5500*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 5501*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 5502*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5503*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5504*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 5505*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, ptr %a) 5506*207e5cccSFangrui Song // CHECK: ret void 5507*207e5cccSFangrui Song void test_vst4_lane_s16(int16_t *a, int16x4x4_t b) { 5508*207e5cccSFangrui Song vst4_lane_s16(a, b, 3); 5509*207e5cccSFangrui Song } 5510*207e5cccSFangrui Song 5511*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_s32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 { 5512*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 5513*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 5514*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0 5515*207e5cccSFangrui Song // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5516*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5517*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 5518*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 5519*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 5520*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 5521*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 5522*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 5523*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 5524*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 5525*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 5526*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 5527*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 5528*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 5529*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 5530*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3 5531*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8 5532*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 5533*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 5534*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 5535*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 5536*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 5537*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, ptr %a) 5538*207e5cccSFangrui Song // CHECK: ret void 5539*207e5cccSFangrui Song void test_vst4_lane_s32(int32_t *a, int32x2x4_t b) { 5540*207e5cccSFangrui Song vst4_lane_s32(a, b, 1); 5541*207e5cccSFangrui Song } 5542*207e5cccSFangrui Song 5543*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_s64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 { 5544*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 5545*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 5546*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0 5547*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5548*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5549*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 5550*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 5551*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 5552*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 5553*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 5554*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 5555*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 5556*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 5557*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 5558*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 5559*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 5560*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 5561*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 5562*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 5563*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 5564*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 5565*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 5566*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 5567*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 5568*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 5569*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, ptr %a) 5570*207e5cccSFangrui Song // CHECK: ret void 5571*207e5cccSFangrui Song void test_vst4_lane_s64(int64_t *a, int64x1x4_t b) { 5572*207e5cccSFangrui Song vst4_lane_s64(a, b, 0); 5573*207e5cccSFangrui Song } 5574*207e5cccSFangrui Song 5575*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_f16(ptr noundef %a, [4 x <4 x half>] alignstack(8) %b.coerce) #0 { 5576*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 5577*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 5578*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0 5579*207e5cccSFangrui Song // CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5580*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5581*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 5582*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0 5583*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 5584*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 5585*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 5586*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 5587*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 5588*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 5589*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 5590*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2 5591*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8 5592*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 5593*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 5594*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3 5595*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8 5596*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 5597*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half> 5598*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half> 5599*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half> 5600*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half> 5601*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], i64 3, ptr %a) 5602*207e5cccSFangrui Song // CHECK: ret void 5603*207e5cccSFangrui Song void test_vst4_lane_f16(float16_t *a, float16x4x4_t b) { 5604*207e5cccSFangrui Song vst4_lane_f16(a, b, 3); 5605*207e5cccSFangrui Song } 5606*207e5cccSFangrui Song 5607*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_f32(ptr noundef %a, [4 x <2 x float>] alignstack(8) %b.coerce) #0 { 5608*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 5609*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 5610*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0 5611*207e5cccSFangrui Song // CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5612*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5613*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 5614*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0 5615*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 5616*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 5617*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 5618*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 5619*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 5620*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 5621*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 5622*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2 5623*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8 5624*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 5625*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 5626*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3 5627*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8 5628*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 5629*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 5630*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 5631*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 5632*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 5633*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i64 1, ptr %a) 5634*207e5cccSFangrui Song // CHECK: ret void 5635*207e5cccSFangrui Song void test_vst4_lane_f32(float32_t *a, float32x2x4_t b) { 5636*207e5cccSFangrui Song vst4_lane_f32(a, b, 1); 5637*207e5cccSFangrui Song } 5638*207e5cccSFangrui Song 5639*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_f64(ptr noundef %a, [4 x <1 x double>] alignstack(8) %b.coerce) #0 { 5640*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 5641*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 5642*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0 5643*207e5cccSFangrui Song // CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5644*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5645*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 5646*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0 5647*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 5648*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 5649*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 5650*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 5651*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 5652*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 5653*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 5654*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 5655*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 5656*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 5657*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 5658*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3 5659*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8 5660*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 5661*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 5662*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 5663*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 5664*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 5665*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i64 0, ptr %a) 5666*207e5cccSFangrui Song // CHECK: ret void 5667*207e5cccSFangrui Song void test_vst4_lane_f64(float64_t *a, float64x1x4_t b) { 5668*207e5cccSFangrui Song vst4_lane_f64(a, b, 0); 5669*207e5cccSFangrui Song } 5670*207e5cccSFangrui Song 5671*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_p8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 { 5672*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 5673*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 5674*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0 5675*207e5cccSFangrui Song // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5676*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5677*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 5678*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 5679*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 5680*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 5681*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 5682*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 5683*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 5684*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 5685*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 5686*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 5687*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 5688*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 5689*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, ptr %a) 5690*207e5cccSFangrui Song // CHECK: ret void 5691*207e5cccSFangrui Song void test_vst4_lane_p8(poly8_t *a, poly8x8x4_t b) { 5692*207e5cccSFangrui Song vst4_lane_p8(a, b, 7); 5693*207e5cccSFangrui Song } 5694*207e5cccSFangrui Song 5695*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_p16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 { 5696*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 5697*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 5698*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0 5699*207e5cccSFangrui Song // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5700*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5701*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 5702*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 5703*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 5704*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 5705*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 5706*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 5707*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 5708*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 5709*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 5710*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 5711*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 5712*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 5713*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 5714*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 5715*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 5716*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 5717*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 5718*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 5719*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 5720*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 5721*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, ptr %a) 5722*207e5cccSFangrui Song // CHECK: ret void 5723*207e5cccSFangrui Song void test_vst4_lane_p16(poly16_t *a, poly16x4x4_t b) { 5724*207e5cccSFangrui Song vst4_lane_p16(a, b, 3); 5725*207e5cccSFangrui Song } 5726*207e5cccSFangrui Song 5727*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_lane_p64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 { 5728*207e5cccSFangrui Song // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 5729*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 5730*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0 5731*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 5732*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 5733*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 5734*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 5735*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 5736*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 5737*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 5738*207e5cccSFangrui Song // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 5739*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 5740*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 5741*207e5cccSFangrui Song // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 5742*207e5cccSFangrui Song // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 5743*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 5744*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 5745*207e5cccSFangrui Song // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 5746*207e5cccSFangrui Song // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 5747*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 5748*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 5749*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 5750*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 5751*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 5752*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 5753*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, ptr %a) 5754*207e5cccSFangrui Song // CHECK: ret void 5755*207e5cccSFangrui Song void test_vst4_lane_p64(poly64_t *a, poly64x1x4_t b) { 5756*207e5cccSFangrui Song vst4_lane_p64(a, b, 0); 5757*207e5cccSFangrui Song } 5758