xref: /llvm-project/clang/test/CodeGen/AArch64/poly64.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2*207e5cccSFangrui Song // RUN:  -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
3*207e5cccSFangrui Song // RUN:  | FileCheck %s
4*207e5cccSFangrui Song 
5*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target || arm-registered-target
6*207e5cccSFangrui Song 
7*207e5cccSFangrui Song #include <arm_neon.h>
8*207e5cccSFangrui Song 
9*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vceq_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
10*207e5cccSFangrui Song // CHECK:   [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
11*207e5cccSFangrui Song // CHECK:   [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
12*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[SEXT_I]]
13*207e5cccSFangrui Song uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
14*207e5cccSFangrui Song   return vceq_p64(a, b);
15*207e5cccSFangrui Song }
16*207e5cccSFangrui Song 
17*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vceqq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
18*207e5cccSFangrui Song // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
19*207e5cccSFangrui Song // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
20*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SEXT_I]]
21*207e5cccSFangrui Song uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
22*207e5cccSFangrui Song   return vceqq_p64(a, b);
23*207e5cccSFangrui Song }
24*207e5cccSFangrui Song 
25*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vtst_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
26*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = and <1 x i64> %a, %b
27*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
28*207e5cccSFangrui Song // CHECK:   [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
29*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VTST_I]]
30*207e5cccSFangrui Song uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
31*207e5cccSFangrui Song   return vtst_p64(a, b);
32*207e5cccSFangrui Song }
33*207e5cccSFangrui Song 
34*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtstq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
35*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = and <2 x i64> %a, %b
36*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
37*207e5cccSFangrui Song // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
38*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VTST_I]]
39*207e5cccSFangrui Song uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
40*207e5cccSFangrui Song   return vtstq_p64(a, b);
41*207e5cccSFangrui Song }
42*207e5cccSFangrui Song 
43*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 {
44*207e5cccSFangrui Song // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %a, %b
45*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = xor <1 x i64> %a, splat (i64 -1)
46*207e5cccSFangrui Song // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c
47*207e5cccSFangrui Song // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
48*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VBSL5_I]]
49*207e5cccSFangrui Song poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
50*207e5cccSFangrui Song   return vbsl_p64(a, b, c);
51*207e5cccSFangrui Song }
52*207e5cccSFangrui Song 
53*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 {
54*207e5cccSFangrui Song // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
55*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = xor <2 x i64> %a, splat (i64 -1)
56*207e5cccSFangrui Song // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
57*207e5cccSFangrui Song // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
58*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VBSL5_I]]
59*207e5cccSFangrui Song poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
60*207e5cccSFangrui Song   return vbslq_p64(a, b, c);
61*207e5cccSFangrui Song }
62*207e5cccSFangrui Song 
63*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} i64 @test_vget_lane_p64(<1 x i64> noundef %v) #0 {
64*207e5cccSFangrui Song // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0
65*207e5cccSFangrui Song // CHECK:   ret i64 [[VGET_LANE]]
66*207e5cccSFangrui Song poly64_t test_vget_lane_p64(poly64x1_t v) {
67*207e5cccSFangrui Song   return vget_lane_p64(v, 0);
68*207e5cccSFangrui Song }
69*207e5cccSFangrui Song 
70*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} i64 @test_vgetq_lane_p64(<2 x i64> noundef %v) #0 {
71*207e5cccSFangrui Song // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1
72*207e5cccSFangrui Song // CHECK:   ret i64 [[VGETQ_LANE]]
73*207e5cccSFangrui Song poly64_t test_vgetq_lane_p64(poly64x2_t v) {
74*207e5cccSFangrui Song   return vgetq_lane_p64(v, 1);
75*207e5cccSFangrui Song }
76*207e5cccSFangrui Song 
77*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vset_lane_p64(i64 noundef %a, <1 x i64> noundef %v) #0 {
78*207e5cccSFangrui Song // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0
79*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VSET_LANE]]
80*207e5cccSFangrui Song poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
81*207e5cccSFangrui Song   return vset_lane_p64(a, v, 0);
82*207e5cccSFangrui Song }
83*207e5cccSFangrui Song 
84*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vsetq_lane_p64(i64 noundef %a, <2 x i64> noundef %v) #0 {
85*207e5cccSFangrui Song // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1
86*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VSET_LANE]]
87*207e5cccSFangrui Song poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
88*207e5cccSFangrui Song   return vsetq_lane_p64(a, v, 1);
89*207e5cccSFangrui Song }
90*207e5cccSFangrui Song 
91*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vcopy_lane_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
92*207e5cccSFangrui Song // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
93*207e5cccSFangrui Song // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0
94*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VSET_LANE]]
95*207e5cccSFangrui Song poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
96*207e5cccSFangrui Song   return vcopy_lane_p64(a, 0, b, 0);
97*207e5cccSFangrui Song 
98*207e5cccSFangrui Song }
99*207e5cccSFangrui Song 
100*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_lane_p64(<2 x i64> noundef %a, <1 x i64> noundef %b) #0 {
101*207e5cccSFangrui Song // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
102*207e5cccSFangrui Song // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1
103*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VSET_LANE]]
104*207e5cccSFangrui Song poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
105*207e5cccSFangrui Song   return vcopyq_lane_p64(a, 1, b, 0);
106*207e5cccSFangrui Song }
107*207e5cccSFangrui Song 
108*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
109*207e5cccSFangrui Song // CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1
110*207e5cccSFangrui Song // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1
111*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VSET_LANE]]
112*207e5cccSFangrui Song poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
113*207e5cccSFangrui Song   return vcopyq_laneq_p64(a, 1, b, 1);
114*207e5cccSFangrui Song }
115*207e5cccSFangrui Song 
116*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vcreate_p64(i64 noundef %a) #0 {
117*207e5cccSFangrui Song // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
118*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[TMP0]]
119*207e5cccSFangrui Song poly64x1_t test_vcreate_p64(uint64_t a) {
120*207e5cccSFangrui Song   return vcreate_p64(a);
121*207e5cccSFangrui Song }
122*207e5cccSFangrui Song 
123*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_n_p64(i64 noundef %a) #0 {
124*207e5cccSFangrui Song // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
125*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VECINIT_I]]
126*207e5cccSFangrui Song poly64x1_t test_vdup_n_p64(poly64_t a) {
127*207e5cccSFangrui Song   return vdup_n_p64(a);
128*207e5cccSFangrui Song }
129*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_n_p64(i64 noundef %a) #0 {
130*207e5cccSFangrui Song // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
131*207e5cccSFangrui Song // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
132*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VECINIT1_I]]
133*207e5cccSFangrui Song poly64x2_t test_vdupq_n_p64(poly64_t a) {
134*207e5cccSFangrui Song   return vdupq_n_p64(a);
135*207e5cccSFangrui Song }
136*207e5cccSFangrui Song 
137*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vmov_n_p64(i64 noundef %a) #0 {
138*207e5cccSFangrui Song // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> poison, i64 %a, i32 0
139*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VECINIT_I]]
140*207e5cccSFangrui Song poly64x1_t test_vmov_n_p64(poly64_t a) {
141*207e5cccSFangrui Song   return vmov_n_p64(a);
142*207e5cccSFangrui Song }
143*207e5cccSFangrui Song 
144*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vmovq_n_p64(i64 noundef %a) #0 {
145*207e5cccSFangrui Song // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 %a, i32 0
146*207e5cccSFangrui Song // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
147*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VECINIT1_I]]
148*207e5cccSFangrui Song poly64x2_t test_vmovq_n_p64(poly64_t a) {
149*207e5cccSFangrui Song   return vmovq_n_p64(a);
150*207e5cccSFangrui Song }
151*207e5cccSFangrui Song 
152*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_lane_p64(<1 x i64> noundef %vec) #0 {
153*207e5cccSFangrui Song // CHECK:    [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
154*207e5cccSFangrui Song // CHECK:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
155*207e5cccSFangrui Song // CHECK:    [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
156*207e5cccSFangrui Song // CHECK:    ret <1 x i64> [[LANE]]
157*207e5cccSFangrui Song poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
158*207e5cccSFangrui Song   return vdup_lane_p64(vec, 0);
159*207e5cccSFangrui Song }
160*207e5cccSFangrui Song 
161*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_lane_p64(<1 x i64> noundef %vec) #0 {
162*207e5cccSFangrui Song // CHECK:    [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
163*207e5cccSFangrui Song // CHECK:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
164*207e5cccSFangrui Song // CHECK:    [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
165*207e5cccSFangrui Song // CHECK:    ret <2 x i64> [[LANE]]
166*207e5cccSFangrui Song poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
167*207e5cccSFangrui Song   return vdupq_lane_p64(vec, 0);
168*207e5cccSFangrui Song }
169*207e5cccSFangrui Song 
170*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_laneq_p64(<2 x i64> noundef %vec) #0 {
171*207e5cccSFangrui Song // CHECK:    [[TMP0:%.*]] = bitcast <2 x i64> [[VEC:%.*]] to <16 x i8>
172*207e5cccSFangrui Song // CHECK:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
173*207e5cccSFangrui Song // CHECK:    [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1>
174*207e5cccSFangrui Song // CHECK:    ret <2 x i64> [[LANE]]
175*207e5cccSFangrui Song poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
176*207e5cccSFangrui Song   return vdupq_laneq_p64(vec, 1);
177*207e5cccSFangrui Song }
178*207e5cccSFangrui Song 
179*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcombine_p64(<1 x i64> noundef %low, <1 x i64> noundef %high) #0 {
180*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
181*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
182*207e5cccSFangrui Song poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
183*207e5cccSFangrui Song   return vcombine_p64(low, high);
184*207e5cccSFangrui Song }
185*207e5cccSFangrui Song 
186*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_p64(ptr noundef %ptr) #0 {
187*207e5cccSFangrui Song // CHECK:   [[TMP2:%.*]] = load <1 x i64>, ptr %ptr
188*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[TMP2]]
189*207e5cccSFangrui Song poly64x1_t test_vld1_p64(poly64_t const * ptr) {
190*207e5cccSFangrui Song   return vld1_p64(ptr);
191*207e5cccSFangrui Song }
192*207e5cccSFangrui Song 
193*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_p64(ptr noundef %ptr) #0 {
194*207e5cccSFangrui Song // CHECK:   [[TMP2:%.*]] = load <2 x i64>, ptr %ptr
195*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[TMP2]]
196*207e5cccSFangrui Song poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
197*207e5cccSFangrui Song   return vld1q_p64(ptr);
198*207e5cccSFangrui Song }
199*207e5cccSFangrui Song 
200*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1_p64(ptr noundef %ptr, <1 x i64> noundef %val) #0 {
201*207e5cccSFangrui Song // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8>
202*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
203*207e5cccSFangrui Song // CHECK:   store <1 x i64> [[TMP3]], ptr %ptr
204*207e5cccSFangrui Song // CHECK:   ret void
205*207e5cccSFangrui Song void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
206*207e5cccSFangrui Song   return vst1_p64(ptr, val);
207*207e5cccSFangrui Song }
208*207e5cccSFangrui Song 
209*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst1q_p64(ptr noundef %ptr, <2 x i64> noundef %val) #0 {
210*207e5cccSFangrui Song // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
211*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
212*207e5cccSFangrui Song // CHECK:   store <2 x i64> [[TMP3]], ptr %ptr
213*207e5cccSFangrui Song // CHECK:   ret void
214*207e5cccSFangrui Song void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
215*207e5cccSFangrui Song   return vst1q_p64(ptr, val);
216*207e5cccSFangrui Song }
217*207e5cccSFangrui Song 
218*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_p64(ptr noundef %ptr) #0 {
219*207e5cccSFangrui Song // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
220*207e5cccSFangrui Song // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
221*207e5cccSFangrui Song // CHECK:   [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %ptr)
222*207e5cccSFangrui Song // CHECK:   store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]]
223*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false)
224*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8
225*207e5cccSFangrui Song // CHECK:   ret %struct.poly64x1x2_t [[TMP6]]
226*207e5cccSFangrui Song poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
227*207e5cccSFangrui Song   return vld2_p64(ptr);
228*207e5cccSFangrui Song }
229*207e5cccSFangrui Song 
230*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_p64(ptr noundef %ptr) #0 {
231*207e5cccSFangrui Song // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
232*207e5cccSFangrui Song // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
233*207e5cccSFangrui Song // CHECK:   [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %ptr)
234*207e5cccSFangrui Song // CHECK:   store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]]
235*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false)
236*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16
237*207e5cccSFangrui Song // CHECK:   ret %struct.poly64x2x2_t [[TMP6]]
238*207e5cccSFangrui Song poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
239*207e5cccSFangrui Song   return vld2q_p64(ptr);
240*207e5cccSFangrui Song }
241*207e5cccSFangrui Song 
242*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_p64(ptr noundef %ptr) #0 {
243*207e5cccSFangrui Song // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
244*207e5cccSFangrui Song // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
245*207e5cccSFangrui Song // CHECK:   [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %ptr)
246*207e5cccSFangrui Song // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]]
247*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false)
248*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8
249*207e5cccSFangrui Song // CHECK:   ret %struct.poly64x1x3_t [[TMP6]]
250*207e5cccSFangrui Song poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
251*207e5cccSFangrui Song   return vld3_p64(ptr);
252*207e5cccSFangrui Song }
253*207e5cccSFangrui Song 
254*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_p64(ptr noundef %ptr) #0 {
255*207e5cccSFangrui Song // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
256*207e5cccSFangrui Song // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
257*207e5cccSFangrui Song // CHECK:   [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %ptr)
258*207e5cccSFangrui Song // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]]
259*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false)
260*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16
261*207e5cccSFangrui Song // CHECK:   ret %struct.poly64x2x3_t [[TMP6]]
262*207e5cccSFangrui Song poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
263*207e5cccSFangrui Song   return vld3q_p64(ptr);
264*207e5cccSFangrui Song }
265*207e5cccSFangrui Song 
266*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_p64(ptr noundef %ptr) #0 {
267*207e5cccSFangrui Song // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
268*207e5cccSFangrui Song // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
269*207e5cccSFangrui Song // CHECK:   [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %ptr)
270*207e5cccSFangrui Song // CHECK:   store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]]
271*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false)
272*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8
273*207e5cccSFangrui Song // CHECK:   ret %struct.poly64x1x4_t [[TMP6]]
274*207e5cccSFangrui Song poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
275*207e5cccSFangrui Song   return vld4_p64(ptr);
276*207e5cccSFangrui Song }
277*207e5cccSFangrui Song 
278*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_p64(ptr noundef %ptr) #0 {
279*207e5cccSFangrui Song // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
280*207e5cccSFangrui Song // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
281*207e5cccSFangrui Song // CHECK:   [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %ptr)
282*207e5cccSFangrui Song // CHECK:   store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]]
283*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false)
284*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16
285*207e5cccSFangrui Song // CHECK:   ret %struct.poly64x2x4_t [[TMP6]]
286*207e5cccSFangrui Song poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
287*207e5cccSFangrui Song   return vld4q_p64(ptr);
288*207e5cccSFangrui Song }
289*207e5cccSFangrui Song 
290*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2_p64(ptr noundef %ptr, [2 x <1 x i64>] alignstack(8) %val.coerce) #0 {
291*207e5cccSFangrui Song // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
292*207e5cccSFangrui Song // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
293*207e5cccSFangrui Song // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[VAL]], i32 0, i32 0
294*207e5cccSFangrui Song // CHECK:   store [2 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8
295*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 16, i1 false)
296*207e5cccSFangrui Song // CHECK:   [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
297*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0
298*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
299*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
300*207e5cccSFangrui Song // CHECK:   [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0
301*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1
302*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8
303*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
304*207e5cccSFangrui Song // CHECK:   [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
305*207e5cccSFangrui Song // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
306*207e5cccSFangrui Song // CHECK:   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %ptr)
307*207e5cccSFangrui Song // CHECK:   ret void
308*207e5cccSFangrui Song void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
309*207e5cccSFangrui Song   return vst2_p64(ptr, val);
310*207e5cccSFangrui Song }
311*207e5cccSFangrui Song 
312*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst2q_p64(ptr noundef %ptr, [2 x <2 x i64>] alignstack(16) %val.coerce) #0 {
313*207e5cccSFangrui Song // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
314*207e5cccSFangrui Song // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
315*207e5cccSFangrui Song // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[VAL]], i32 0, i32 0
316*207e5cccSFangrui Song // CHECK:   store [2 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16
317*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 32, i1 false)
318*207e5cccSFangrui Song // CHECK:   [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
319*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0
320*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
321*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
322*207e5cccSFangrui Song // CHECK:   [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0
323*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1
324*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16
325*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
326*207e5cccSFangrui Song // CHECK:   [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
327*207e5cccSFangrui Song // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
328*207e5cccSFangrui Song // CHECK:   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %ptr)
329*207e5cccSFangrui Song // CHECK:   ret void
330*207e5cccSFangrui Song void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
331*207e5cccSFangrui Song   return vst2q_p64(ptr, val);
332*207e5cccSFangrui Song }
333*207e5cccSFangrui Song 
334*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3_p64(ptr noundef %ptr, [3 x <1 x i64>] alignstack(8) %val.coerce) #0 {
335*207e5cccSFangrui Song // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
336*207e5cccSFangrui Song // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
337*207e5cccSFangrui Song // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[VAL]], i32 0, i32 0
338*207e5cccSFangrui Song // CHECK:   store [3 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8
339*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 24, i1 false)
340*207e5cccSFangrui Song // CHECK:   [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
341*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0
342*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
343*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
344*207e5cccSFangrui Song // CHECK:   [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
345*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1
346*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8
347*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
348*207e5cccSFangrui Song // CHECK:   [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0
349*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL4]], i64 0, i64 2
350*207e5cccSFangrui Song // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX5]], align 8
351*207e5cccSFangrui Song // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
352*207e5cccSFangrui Song // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
353*207e5cccSFangrui Song // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
354*207e5cccSFangrui Song // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
355*207e5cccSFangrui Song // CHECK:   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %ptr)
356*207e5cccSFangrui Song // CHECK:   ret void
357*207e5cccSFangrui Song void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
358*207e5cccSFangrui Song   return vst3_p64(ptr, val);
359*207e5cccSFangrui Song }
360*207e5cccSFangrui Song 
361*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst3q_p64(ptr noundef %ptr, [3 x <2 x i64>] alignstack(16) %val.coerce) #0 {
362*207e5cccSFangrui Song // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
363*207e5cccSFangrui Song // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
364*207e5cccSFangrui Song // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[VAL]], i32 0, i32 0
365*207e5cccSFangrui Song // CHECK:   store [3 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16
366*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 48, i1 false)
367*207e5cccSFangrui Song // CHECK:   [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
368*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0
369*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
370*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
371*207e5cccSFangrui Song // CHECK:   [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
372*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1
373*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16
374*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
375*207e5cccSFangrui Song // CHECK:   [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0
376*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL4]], i64 0, i64 2
377*207e5cccSFangrui Song // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX5]], align 16
378*207e5cccSFangrui Song // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
379*207e5cccSFangrui Song // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
380*207e5cccSFangrui Song // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
381*207e5cccSFangrui Song // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
382*207e5cccSFangrui Song // CHECK:   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %ptr)
383*207e5cccSFangrui Song // CHECK:   ret void
384*207e5cccSFangrui Song void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
385*207e5cccSFangrui Song   return vst3q_p64(ptr, val);
386*207e5cccSFangrui Song }
387*207e5cccSFangrui Song 
388*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4_p64(ptr noundef %ptr, [4 x <1 x i64>] alignstack(8) %val.coerce) #0 {
389*207e5cccSFangrui Song // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
390*207e5cccSFangrui Song // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
391*207e5cccSFangrui Song // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[VAL]], i32 0, i32 0
392*207e5cccSFangrui Song // CHECK:   store [4 x <1 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 8
393*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[VAL]], i64 32, i1 false)
394*207e5cccSFangrui Song // CHECK:   [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
395*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 0
396*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8
397*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
398*207e5cccSFangrui Song // CHECK:   [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
399*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL2]], i64 0, i64 1
400*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX3]], align 8
401*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
402*207e5cccSFangrui Song // CHECK:   [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
403*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL4]], i64 0, i64 2
404*207e5cccSFangrui Song // CHECK:   [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX5]], align 8
405*207e5cccSFangrui Song // CHECK:   [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
406*207e5cccSFangrui Song // CHECK:   [[VAL6:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0
407*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL6]], i64 0, i64 3
408*207e5cccSFangrui Song // CHECK:   [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX7]], align 8
409*207e5cccSFangrui Song // CHECK:   [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
410*207e5cccSFangrui Song // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
411*207e5cccSFangrui Song // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
412*207e5cccSFangrui Song // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
413*207e5cccSFangrui Song // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
414*207e5cccSFangrui Song // CHECK:   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %ptr)
415*207e5cccSFangrui Song // CHECK:   ret void
416*207e5cccSFangrui Song void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
417*207e5cccSFangrui Song   return vst4_p64(ptr, val);
418*207e5cccSFangrui Song }
419*207e5cccSFangrui Song 
420*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} void @test_vst4q_p64(ptr noundef %ptr, [4 x <2 x i64>] alignstack(16) %val.coerce) #0 {
421*207e5cccSFangrui Song // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
422*207e5cccSFangrui Song // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
423*207e5cccSFangrui Song // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[VAL]], i32 0, i32 0
424*207e5cccSFangrui Song // CHECK:   store [4 x <2 x i64>] [[VAL]].coerce, ptr [[COERCE_DIVE]], align 16
425*207e5cccSFangrui Song // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[VAL]], i64 64, i1 false)
426*207e5cccSFangrui Song // CHECK:   [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
427*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 0
428*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16
429*207e5cccSFangrui Song // CHECK:   [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
430*207e5cccSFangrui Song // CHECK:   [[VAL2:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
431*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL2]], i64 0, i64 1
432*207e5cccSFangrui Song // CHECK:   [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX3]], align 16
433*207e5cccSFangrui Song // CHECK:   [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
434*207e5cccSFangrui Song // CHECK:   [[VAL4:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
435*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL4]], i64 0, i64 2
436*207e5cccSFangrui Song // CHECK:   [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX5]], align 16
437*207e5cccSFangrui Song // CHECK:   [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
438*207e5cccSFangrui Song // CHECK:   [[VAL6:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0
439*207e5cccSFangrui Song // CHECK:   [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL6]], i64 0, i64 3
440*207e5cccSFangrui Song // CHECK:   [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX7]], align 16
441*207e5cccSFangrui Song // CHECK:   [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
442*207e5cccSFangrui Song // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
443*207e5cccSFangrui Song // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
444*207e5cccSFangrui Song // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
445*207e5cccSFangrui Song // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
446*207e5cccSFangrui Song // CHECK:   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %ptr)
447*207e5cccSFangrui Song // CHECK:   ret void
448*207e5cccSFangrui Song void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
449*207e5cccSFangrui Song   return vst4q_p64(ptr, val);
450*207e5cccSFangrui Song }
451*207e5cccSFangrui Song 
452*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vext_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
453*207e5cccSFangrui Song // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
454*207e5cccSFangrui Song // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
455*207e5cccSFangrui Song // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
456*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
457*207e5cccSFangrui Song // CHECK:   [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
458*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VEXT]]
459*207e5cccSFangrui Song poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
460*207e5cccSFangrui Song   return vext_u64(a, b, 0);
461*207e5cccSFangrui Song 
462*207e5cccSFangrui Song }
463*207e5cccSFangrui Song 
464*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vextq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
465*207e5cccSFangrui Song // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
466*207e5cccSFangrui Song // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
467*207e5cccSFangrui Song // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
468*207e5cccSFangrui Song // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
469*207e5cccSFangrui Song // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
470*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VEXT]]
471*207e5cccSFangrui Song poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
472*207e5cccSFangrui Song   return vextq_p64(a, b, 1);
473*207e5cccSFangrui Song }
474*207e5cccSFangrui Song 
475*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
476*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
477*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
478*207e5cccSFangrui Song poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
479*207e5cccSFangrui Song   return vzip1q_p64(a, b);
480*207e5cccSFangrui Song }
481*207e5cccSFangrui Song 
482*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
483*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
484*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
485*207e5cccSFangrui Song poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
486*207e5cccSFangrui Song   return vzip2q_u64(a, b);
487*207e5cccSFangrui Song }
488*207e5cccSFangrui Song 
489*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
490*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
491*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
492*207e5cccSFangrui Song poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
493*207e5cccSFangrui Song   return vuzp1q_p64(a, b);
494*207e5cccSFangrui Song }
495*207e5cccSFangrui Song 
496*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
497*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
498*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
499*207e5cccSFangrui Song poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
500*207e5cccSFangrui Song   return vuzp2q_u64(a, b);
501*207e5cccSFangrui Song }
502*207e5cccSFangrui Song 
503*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn1q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
504*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
505*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
506*207e5cccSFangrui Song poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
507*207e5cccSFangrui Song   return vtrn1q_p64(a, b);
508*207e5cccSFangrui Song }
509*207e5cccSFangrui Song 
510*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn2q_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
511*207e5cccSFangrui Song // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
512*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
513*207e5cccSFangrui Song poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
514*207e5cccSFangrui Song   return vtrn2q_u64(a, b);
515*207e5cccSFangrui Song }
516*207e5cccSFangrui Song 
517*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <1 x i64> @test_vsri_n_p64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
518*207e5cccSFangrui Song // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
519*207e5cccSFangrui Song // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
520*207e5cccSFangrui Song // CHECK:   [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
521*207e5cccSFangrui Song // CHECK:   [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
522*207e5cccSFangrui Song // CHECK:   [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33)
523*207e5cccSFangrui Song // CHECK:   ret <1 x i64> [[VSRI_N2]]
524*207e5cccSFangrui Song poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
525*207e5cccSFangrui Song   return vsri_n_p64(a, b, 33);
526*207e5cccSFangrui Song }
527*207e5cccSFangrui Song 
528*207e5cccSFangrui Song // CHECK-LABEL: define{{.*}} <2 x i64> @test_vsriq_n_p64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
529*207e5cccSFangrui Song // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
530*207e5cccSFangrui Song // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
531*207e5cccSFangrui Song // CHECK:   [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
532*207e5cccSFangrui Song // CHECK:   [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
533*207e5cccSFangrui Song // CHECK:   [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64)
534*207e5cccSFangrui Song // CHECK:   ret <2 x i64> [[VSRI_N2]]
535*207e5cccSFangrui Song poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
536*207e5cccSFangrui Song   return vsriq_n_p64(a, b, 64);
537*207e5cccSFangrui Song }
538