1*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2*207e5cccSFangrui Song // RUN: -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \ 3*207e5cccSFangrui Song // RUN: | FileCheck %s 4*207e5cccSFangrui Song 5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target || arm-registered-target 6*207e5cccSFangrui Song 7*207e5cccSFangrui Song #include <arm_neon.h> 8*207e5cccSFangrui Song 9*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vceq_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { 10*207e5cccSFangrui Song // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 11*207e5cccSFangrui Song // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 12*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[SEXT_I]] 13*207e5cccSFangrui Song uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) { 14*207e5cccSFangrui Song return vceq_p64(a, b); 15*207e5cccSFangrui Song } 16*207e5cccSFangrui Song 17*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vceqq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 18*207e5cccSFangrui Song // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b 19*207e5cccSFangrui Song // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 20*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SEXT_I]] 21*207e5cccSFangrui Song uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) { 22*207e5cccSFangrui Song return vceqq_p64(a, b); 23*207e5cccSFangrui Song } 24*207e5cccSFangrui Song 25*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vtst_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { 26*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = and <1 x i64> %a, %b 27*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer 28*207e5cccSFangrui Song // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64> 29*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VTST_I]] 30*207e5cccSFangrui Song uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) { 31*207e5cccSFangrui Song return vtst_p64(a, b); 32*207e5cccSFangrui Song } 33*207e5cccSFangrui Song 34*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtstq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 35*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b 36*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 37*207e5cccSFangrui Song // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 38*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VTST_I]] 39*207e5cccSFangrui Song uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) { 40*207e5cccSFangrui Song return vtstq_p64(a, b); 41*207e5cccSFangrui Song } 42*207e5cccSFangrui Song 43*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 { 44*207e5cccSFangrui Song // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b 45*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, splat (i64 -1) 46*207e5cccSFangrui Song // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c 47*207e5cccSFangrui Song // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 48*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VBSL5_I]] 49*207e5cccSFangrui Song poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) { 50*207e5cccSFangrui Song return vbsl_p64(a, b, c); 51*207e5cccSFangrui Song } 52*207e5cccSFangrui Song 53*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 { 54*207e5cccSFangrui Song // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b 55*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, splat (i64 -1) 56*207e5cccSFangrui Song // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c 57*207e5cccSFangrui Song // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 58*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VBSL5_I]] 59*207e5cccSFangrui Song poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) { 60*207e5cccSFangrui Song return vbslq_p64(a, b, c); 61*207e5cccSFangrui Song } 62*207e5cccSFangrui Song 63*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} i64 @test_vget_lane_p64(<1 x i64> noundef %v) #0 { 64*207e5cccSFangrui Song // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0 65*207e5cccSFangrui Song // CHECK: ret i64 [[VGET_LANE]] 66*207e5cccSFangrui Song poly64_t test_vget_lane_p64(poly64x1_t v) { 67*207e5cccSFangrui Song return vget_lane_p64(v, 0); 68*207e5cccSFangrui Song } 69*207e5cccSFangrui Song 70*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} i64 @test_vgetq_lane_p64(<2 x i64> noundef %v) #0 { 71*207e5cccSFangrui Song // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1 72*207e5cccSFangrui Song // CHECK: ret i64 [[VGETQ_LANE]] 73*207e5cccSFangrui Song poly64_t test_vgetq_lane_p64(poly64x2_t v) { 74*207e5cccSFangrui Song return vgetq_lane_p64(v, 1); 75*207e5cccSFangrui Song } 76*207e5cccSFangrui Song 77*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vset_lane_p64(i64 noundef %a, <1 x i64> noundef %v) #0 { 78*207e5cccSFangrui Song // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0 79*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VSET_LANE]] 80*207e5cccSFangrui Song poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) { 81*207e5cccSFangrui Song return vset_lane_p64(a, v, 0); 82*207e5cccSFangrui Song } 83*207e5cccSFangrui Song 84*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vsetq_lane_p64(i64 noundef %a, <2 x i64> noundef %v) #0 { 85*207e5cccSFangrui Song // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1 86*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VSET_LANE]] 87*207e5cccSFangrui Song poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { 88*207e5cccSFangrui Song return vsetq_lane_p64(a, v, 1); 89*207e5cccSFangrui Song } 90*207e5cccSFangrui Song 91*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vcopy_lane_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { 92*207e5cccSFangrui Song // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0 93*207e5cccSFangrui Song // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0 94*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VSET_LANE]] 95*207e5cccSFangrui Song poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { 96*207e5cccSFangrui Song return vcopy_lane_p64(a, 0, b, 0); 97*207e5cccSFangrui Song 98*207e5cccSFangrui Song } 99*207e5cccSFangrui Song 100*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_lane_p64(<2 x i64> noundef %a, <1 x i64> noundef %b) #0 { 101*207e5cccSFangrui Song // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0 102*207e5cccSFangrui Song // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1 103*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VSET_LANE]] 104*207e5cccSFangrui Song poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { 105*207e5cccSFangrui Song return vcopyq_lane_p64(a, 1, b, 0); 106*207e5cccSFangrui Song } 107*207e5cccSFangrui Song 108*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 109*207e5cccSFangrui Song // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1 110*207e5cccSFangrui Song // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1 111*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VSET_LANE]] 112*207e5cccSFangrui Song poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { 113*207e5cccSFangrui Song return vcopyq_laneq_p64(a, 1, b, 1); 114*207e5cccSFangrui Song } 115*207e5cccSFangrui Song 116*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vcreate_p64(i64 noundef %a) #0 { 117*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> 118*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[TMP0]] 119*207e5cccSFangrui Song poly64x1_t test_vcreate_p64(uint64_t a) { 120*207e5cccSFangrui Song return vcreate_p64(a); 121*207e5cccSFangrui Song } 122*207e5cccSFangrui Song 123*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_n_p64(i64 noundef %a) #0 { 124*207e5cccSFangrui Song // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0 125*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VECINIT_I]] 126*207e5cccSFangrui Song poly64x1_t test_vdup_n_p64(poly64_t a) { 127*207e5cccSFangrui Song return vdup_n_p64(a); 128*207e5cccSFangrui Song } 129*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_n_p64(i64 noundef %a) #0 { 130*207e5cccSFangrui Song // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0 131*207e5cccSFangrui Song // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 132*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VECINIT1_I]] 133*207e5cccSFangrui Song poly64x2_t test_vdupq_n_p64(poly64_t a) { 134*207e5cccSFangrui Song return vdupq_n_p64(a); 135*207e5cccSFangrui Song } 136*207e5cccSFangrui Song 137*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vmov_n_p64(i64 noundef %a) #0 { 138*207e5cccSFangrui Song // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0 139*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VECINIT_I]] 140*207e5cccSFangrui Song poly64x1_t test_vmov_n_p64(poly64_t a) { 141*207e5cccSFangrui Song return vmov_n_p64(a); 142*207e5cccSFangrui Song } 143*207e5cccSFangrui Song 144*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vmovq_n_p64(i64 noundef %a) #0 { 145*207e5cccSFangrui Song // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0 146*207e5cccSFangrui Song // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 147*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VECINIT1_I]] 148*207e5cccSFangrui Song poly64x2_t test_vmovq_n_p64(poly64_t a) { 149*207e5cccSFangrui Song return vmovq_n_p64(a); 150*207e5cccSFangrui Song } 151*207e5cccSFangrui Song 152*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_lane_p64(<1 x i64> noundef %vec) #0 { 153*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8> 154*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 155*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer 156*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[LANE]] 157*207e5cccSFangrui Song poly64x1_t test_vdup_lane_p64(poly64x1_t vec) { 158*207e5cccSFangrui Song return vdup_lane_p64(vec, 0); 159*207e5cccSFangrui Song } 160*207e5cccSFangrui Song 161*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_lane_p64(<1 x i64> noundef %vec) #0 { 162*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8> 163*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 164*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer 165*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[LANE]] 166*207e5cccSFangrui Song poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) { 167*207e5cccSFangrui Song return vdupq_lane_p64(vec, 0); 168*207e5cccSFangrui Song } 169*207e5cccSFangrui Song 170*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_laneq_p64(<2 x i64> noundef %vec) #0 { 171*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> [[VEC:%.*]] to <16 x i8> 172*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 173*207e5cccSFangrui Song // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1> 174*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[LANE]] 175*207e5cccSFangrui Song poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) { 176*207e5cccSFangrui Song return vdupq_laneq_p64(vec, 1); 177*207e5cccSFangrui Song } 178*207e5cccSFangrui Song 179*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcombine_p64(<1 x i64> noundef %low, <1 x i64> noundef %high) #0 { 180*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1> 181*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 182*207e5cccSFangrui Song poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { 183*207e5cccSFangrui Song return vcombine_p64(low, high); 184*207e5cccSFangrui Song } 185*207e5cccSFangrui Song 186*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_p64(ptr noundef %ptr) #0 { 187*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %ptr 188*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[TMP2]] 189*207e5cccSFangrui Song poly64x1_t test_vld1_p64(poly64_t const * ptr) { 190*207e5cccSFangrui Song return vld1_p64(ptr); 191*207e5cccSFangrui Song } 192*207e5cccSFangrui Song 193*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_p64(ptr noundef %ptr) #0 { 194*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %ptr 195*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[TMP2]] 196*207e5cccSFangrui Song poly64x2_t test_vld1q_p64(poly64_t const * ptr) { 197*207e5cccSFangrui Song return vld1q_p64(ptr); 198*207e5cccSFangrui Song } 199*207e5cccSFangrui Song 200*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_p64(ptr noundef %ptr, <1 x i64> noundef %val) #0 { 201*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8> 202*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 203*207e5cccSFangrui Song // CHECK: store <1 x i64> [[TMP3]], ptr %ptr 204*207e5cccSFangrui Song // CHECK: ret void 205*207e5cccSFangrui Song void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { 206*207e5cccSFangrui Song return vst1_p64(ptr, val); 207*207e5cccSFangrui Song } 208*207e5cccSFangrui Song 209*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_p64(ptr noundef %ptr, <2 x i64> noundef %val) #0 { 210*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8> 211*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 212*207e5cccSFangrui Song // CHECK: store <2 x i64> [[TMP3]], ptr %ptr 213*207e5cccSFangrui Song // CHECK: ret void 214*207e5cccSFangrui Song void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { 215*207e5cccSFangrui Song return vst1q_p64(ptr, val); 216*207e5cccSFangrui Song } 217*207e5cccSFangrui Song 218*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_p64(ptr noundef %ptr) #0 { 219*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 220*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 221*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %ptr) 222*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]] 223*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 224*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8 225*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x2_t [[TMP6]] 226*207e5cccSFangrui Song poly64x1x2_t test_vld2_p64(poly64_t const * ptr) { 227*207e5cccSFangrui Song return vld2_p64(ptr); 228*207e5cccSFangrui Song } 229*207e5cccSFangrui Song 230*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_p64(ptr noundef %ptr) #0 { 231*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 232*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 233*207e5cccSFangrui Song // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %ptr) 234*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]] 235*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 236*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16 237*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x2_t [[TMP6]] 238*207e5cccSFangrui Song poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) { 239*207e5cccSFangrui Song return vld2q_p64(ptr); 240*207e5cccSFangrui Song } 241*207e5cccSFangrui Song 242*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_p64(ptr noundef %ptr) #0 { 243*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 244*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 245*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %ptr) 246*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]] 247*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 248*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8 249*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x3_t [[TMP6]] 250*207e5cccSFangrui Song poly64x1x3_t test_vld3_p64(poly64_t const * ptr) { 251*207e5cccSFangrui Song return vld3_p64(ptr); 252*207e5cccSFangrui Song } 253*207e5cccSFangrui Song 254*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_p64(ptr noundef %ptr) #0 { 255*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 256*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 257*207e5cccSFangrui Song // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %ptr) 258*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]] 259*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 260*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16 261*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x3_t [[TMP6]] 262*207e5cccSFangrui Song poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) { 263*207e5cccSFangrui Song return vld3q_p64(ptr); 264*207e5cccSFangrui Song } 265*207e5cccSFangrui Song 266*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_p64(ptr noundef %ptr) #0 { 267*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 268*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 269*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %ptr) 270*207e5cccSFangrui Song // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]] 271*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 272*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8 273*207e5cccSFangrui Song // CHECK: ret %struct.poly64x1x4_t [[TMP6]] 274*207e5cccSFangrui Song poly64x1x4_t test_vld4_p64(poly64_t const * ptr) { 275*207e5cccSFangrui Song return vld4_p64(ptr); 276*207e5cccSFangrui Song } 277*207e5cccSFangrui Song 278*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_p64(ptr noundef %ptr) #0 { 279*207e5cccSFangrui Song // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 280*207e5cccSFangrui Song // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 281*207e5cccSFangrui Song // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %ptr) 282*207e5cccSFangrui Song // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]] 283*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 284*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16 285*207e5cccSFangrui Song // CHECK: ret %struct.poly64x2x4_t [[TMP6]] 286*207e5cccSFangrui Song poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { 287*207e5cccSFangrui Song return vld4q_p64(ptr); 288*207e5cccSFangrui Song } 289*207e5cccSFangrui Song 290*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_p64(ptr noundef %ptr, [2 x <1 x i64>] alignstack(8) %val.coerce) #0 { 291*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 292*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 293*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[VAL]], i32 0, i32 0 294*207e5cccSFangrui Song // CHECK: store [2 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8 295*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 16, i1 false) 296*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 297*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0 298*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 299*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 300*207e5cccSFangrui Song // CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 301*207e5cccSFangrui Song // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1 302*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8 303*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 304*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 305*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 306*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %ptr) 307*207e5cccSFangrui Song // CHECK: ret void 308*207e5cccSFangrui Song void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { 309*207e5cccSFangrui Song return vst2_p64(ptr, val); 310*207e5cccSFangrui Song } 311*207e5cccSFangrui Song 312*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_p64(ptr noundef %ptr, [2 x <2 x i64>] alignstack(16) %val.coerce) #0 { 313*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 314*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 315*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[VAL]], i32 0, i32 0 316*207e5cccSFangrui Song // CHECK: store [2 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16 317*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 32, i1 false) 318*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 319*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0 320*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 321*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 322*207e5cccSFangrui Song // CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 323*207e5cccSFangrui Song // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1 324*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16 325*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 326*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 327*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 328*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %ptr) 329*207e5cccSFangrui Song // CHECK: ret void 330*207e5cccSFangrui Song void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { 331*207e5cccSFangrui Song return vst2q_p64(ptr, val); 332*207e5cccSFangrui Song } 333*207e5cccSFangrui Song 334*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_p64(ptr noundef %ptr, [3 x <1 x i64>] alignstack(8) %val.coerce) #0 { 335*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 336*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 337*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[VAL]], i32 0, i32 0 338*207e5cccSFangrui Song // CHECK: store [3 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8 339*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 24, i1 false) 340*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 341*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0 342*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 343*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 344*207e5cccSFangrui Song // CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 345*207e5cccSFangrui Song // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1 346*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8 347*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 348*207e5cccSFangrui Song // CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 349*207e5cccSFangrui Song // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL4]], i64 0, i64 2 350*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX5]], align 8 351*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 352*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 353*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 354*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 355*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %ptr) 356*207e5cccSFangrui Song // CHECK: ret void 357*207e5cccSFangrui Song void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { 358*207e5cccSFangrui Song return vst3_p64(ptr, val); 359*207e5cccSFangrui Song } 360*207e5cccSFangrui Song 361*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_p64(ptr noundef %ptr, [3 x <2 x i64>] alignstack(16) %val.coerce) #0 { 362*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 363*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 364*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[VAL]], i32 0, i32 0 365*207e5cccSFangrui Song // CHECK: store [3 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16 366*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 48, i1 false) 367*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 368*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0 369*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 370*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 371*207e5cccSFangrui Song // CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 372*207e5cccSFangrui Song // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1 373*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16 374*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 375*207e5cccSFangrui Song // CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 376*207e5cccSFangrui Song // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL4]], i64 0, i64 2 377*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX5]], align 16 378*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 379*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 380*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 381*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 382*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %ptr) 383*207e5cccSFangrui Song // CHECK: ret void 384*207e5cccSFangrui Song void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { 385*207e5cccSFangrui Song return vst3q_p64(ptr, val); 386*207e5cccSFangrui Song } 387*207e5cccSFangrui Song 388*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_p64(ptr noundef %ptr, [4 x <1 x i64>] alignstack(8) %val.coerce) #0 { 389*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 390*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 391*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[VAL]], i32 0, i32 0 392*207e5cccSFangrui Song // CHECK: store [4 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8 393*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 32, i1 false) 394*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 395*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0 396*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 397*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 398*207e5cccSFangrui Song // CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 399*207e5cccSFangrui Song // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1 400*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8 401*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 402*207e5cccSFangrui Song // CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 403*207e5cccSFangrui Song // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL4]], i64 0, i64 2 404*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX5]], align 8 405*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 406*207e5cccSFangrui Song // CHECK: [[VAL6:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 407*207e5cccSFangrui Song // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL6]], i64 0, i64 3 408*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX7]], align 8 409*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 410*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 411*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 412*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 413*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 414*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %ptr) 415*207e5cccSFangrui Song // CHECK: ret void 416*207e5cccSFangrui Song void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { 417*207e5cccSFangrui Song return vst4_p64(ptr, val); 418*207e5cccSFangrui Song } 419*207e5cccSFangrui Song 420*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_p64(ptr noundef %ptr, [4 x <2 x i64>] alignstack(16) %val.coerce) #0 { 421*207e5cccSFangrui Song // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 422*207e5cccSFangrui Song // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 423*207e5cccSFangrui Song // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[VAL]], i32 0, i32 0 424*207e5cccSFangrui Song // CHECK: store [4 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16 425*207e5cccSFangrui Song // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 64, i1 false) 426*207e5cccSFangrui Song // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 427*207e5cccSFangrui Song // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0 428*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 429*207e5cccSFangrui Song // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 430*207e5cccSFangrui Song // CHECK: [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 431*207e5cccSFangrui Song // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1 432*207e5cccSFangrui Song // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16 433*207e5cccSFangrui Song // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 434*207e5cccSFangrui Song // CHECK: [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 435*207e5cccSFangrui Song // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL4]], i64 0, i64 2 436*207e5cccSFangrui Song // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX5]], align 16 437*207e5cccSFangrui Song // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 438*207e5cccSFangrui Song // CHECK: [[VAL6:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 439*207e5cccSFangrui Song // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL6]], i64 0, i64 3 440*207e5cccSFangrui Song // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX7]], align 16 441*207e5cccSFangrui Song // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 442*207e5cccSFangrui Song // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 443*207e5cccSFangrui Song // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 444*207e5cccSFangrui Song // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 445*207e5cccSFangrui Song // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 446*207e5cccSFangrui Song // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %ptr) 447*207e5cccSFangrui Song // CHECK: ret void 448*207e5cccSFangrui Song void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { 449*207e5cccSFangrui Song return vst4q_p64(ptr, val); 450*207e5cccSFangrui Song } 451*207e5cccSFangrui Song 452*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vext_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { 453*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 454*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 455*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 456*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 457*207e5cccSFangrui Song // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer 458*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VEXT]] 459*207e5cccSFangrui Song poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) { 460*207e5cccSFangrui Song return vext_u64(a, b, 0); 461*207e5cccSFangrui Song 462*207e5cccSFangrui Song } 463*207e5cccSFangrui Song 464*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vextq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 465*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 466*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 467*207e5cccSFangrui Song // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 468*207e5cccSFangrui Song // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 469*207e5cccSFangrui Song // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2> 470*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VEXT]] 471*207e5cccSFangrui Song poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { 472*207e5cccSFangrui Song return vextq_p64(a, b, 1); 473*207e5cccSFangrui Song } 474*207e5cccSFangrui Song 475*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 476*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 477*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 478*207e5cccSFangrui Song poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { 479*207e5cccSFangrui Song return vzip1q_p64(a, b); 480*207e5cccSFangrui Song } 481*207e5cccSFangrui Song 482*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 483*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 484*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 485*207e5cccSFangrui Song poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { 486*207e5cccSFangrui Song return vzip2q_u64(a, b); 487*207e5cccSFangrui Song } 488*207e5cccSFangrui Song 489*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 490*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 491*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 492*207e5cccSFangrui Song poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { 493*207e5cccSFangrui Song return vuzp1q_p64(a, b); 494*207e5cccSFangrui Song } 495*207e5cccSFangrui Song 496*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 497*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 498*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 499*207e5cccSFangrui Song poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { 500*207e5cccSFangrui Song return vuzp2q_u64(a, b); 501*207e5cccSFangrui Song } 502*207e5cccSFangrui Song 503*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 504*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> 505*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 506*207e5cccSFangrui Song poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { 507*207e5cccSFangrui Song return vtrn1q_p64(a, b); 508*207e5cccSFangrui Song } 509*207e5cccSFangrui Song 510*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 511*207e5cccSFangrui Song // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3> 512*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[SHUFFLE_I]] 513*207e5cccSFangrui Song poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { 514*207e5cccSFangrui Song return vtrn2q_u64(a, b); 515*207e5cccSFangrui Song } 516*207e5cccSFangrui Song 517*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vsri_n_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 { 518*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 519*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 520*207e5cccSFangrui Song // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 521*207e5cccSFangrui Song // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 522*207e5cccSFangrui Song // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33) 523*207e5cccSFangrui Song // CHECK: ret <1 x i64> [[VSRI_N2]] 524*207e5cccSFangrui Song poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { 525*207e5cccSFangrui Song return vsri_n_p64(a, b, 33); 526*207e5cccSFangrui Song } 527*207e5cccSFangrui Song 528*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vsriq_n_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 { 529*207e5cccSFangrui Song // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 530*207e5cccSFangrui Song // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 531*207e5cccSFangrui Song // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 532*207e5cccSFangrui Song // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 533*207e5cccSFangrui Song // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64) 534*207e5cccSFangrui Song // CHECK: ret <2 x i64> [[VSRI_N2]] 535*207e5cccSFangrui Song poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { 536*207e5cccSFangrui Song return vsriq_n_p64(a, b, 64); 537*207e5cccSFangrui Song } 538