xref: /llvm-project/clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
3*207e5cccSFangrui Song // RUN:  -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \
4*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | FileCheck %s
5*207e5cccSFangrui Song 
6*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target
7*207e5cccSFangrui Song 
8*207e5cccSFangrui Song #include <arm_neon.h>
9*207e5cccSFangrui Song 
10*207e5cccSFangrui Song 
11*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_u64(
12*207e5cccSFangrui Song // CHECK-NEXT:  entry:
13*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
14*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
16*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
17*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
18*207e5cccSFangrui Song //
19*207e5cccSFangrui Song uint64x2_t test_vldap1q_lane_u64(uint64_t  *a, uint64x2_t b) {
20*207e5cccSFangrui Song   return vldap1q_lane_u64(a, b, 1);
21*207e5cccSFangrui Song }
22*207e5cccSFangrui Song 
23*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_s64(
24*207e5cccSFangrui Song // CHECK-NEXT:  entry:
25*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
26*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
27*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
28*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
29*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
30*207e5cccSFangrui Song //
31*207e5cccSFangrui Song int64x2_t test_vldap1q_lane_s64(int64_t  *a, int64x2_t b) {
32*207e5cccSFangrui Song   return vldap1q_lane_s64(a, b, 1);
33*207e5cccSFangrui Song }
34*207e5cccSFangrui Song 
35*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_f64(
36*207e5cccSFangrui Song // CHECK-NEXT:  entry:
37*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
38*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
39*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
40*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
41*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x double> [[VLDAP1_LANE]]
42*207e5cccSFangrui Song //
43*207e5cccSFangrui Song float64x2_t test_vldap1q_lane_f64(float64_t  *a, float64x2_t b) {
44*207e5cccSFangrui Song   return vldap1q_lane_f64(a, b, 1);
45*207e5cccSFangrui Song }
46*207e5cccSFangrui Song 
47*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1q_lane_p64(
48*207e5cccSFangrui Song // CHECK-NEXT:  entry:
49*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
50*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
51*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
52*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
53*207e5cccSFangrui Song // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
54*207e5cccSFangrui Song //
55*207e5cccSFangrui Song poly64x2_t test_vldap1q_lane_p64(poly64_t  *a, poly64x2_t b) {
56*207e5cccSFangrui Song   return vldap1q_lane_p64(a, b, 1);
57*207e5cccSFangrui Song }
58*207e5cccSFangrui Song 
59*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_u64(
60*207e5cccSFangrui Song // CHECK-NEXT:  entry:
61*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
62*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
63*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
64*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
65*207e5cccSFangrui Song // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
66*207e5cccSFangrui Song //
67*207e5cccSFangrui Song uint64x1_t test_vldap1_lane_u64(uint64_t  *a, uint64x1_t b) {
68*207e5cccSFangrui Song   return vldap1_lane_u64(a, b, 0);
69*207e5cccSFangrui Song }
70*207e5cccSFangrui Song 
71*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_s64(
72*207e5cccSFangrui Song // CHECK-NEXT:  entry:
73*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
74*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
75*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
76*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
77*207e5cccSFangrui Song // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
78*207e5cccSFangrui Song //
79*207e5cccSFangrui Song int64x1_t test_vldap1_lane_s64(int64_t  *a, int64x1_t b) {
80*207e5cccSFangrui Song   return vldap1_lane_s64(a, b, 0);
81*207e5cccSFangrui Song }
82*207e5cccSFangrui Song 
83*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_f64(
84*207e5cccSFangrui Song // CHECK-NEXT:  entry:
85*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
86*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
87*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
88*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
89*207e5cccSFangrui Song // CHECK-NEXT:    ret <1 x double> [[VLDAP1_LANE]]
90*207e5cccSFangrui Song //
91*207e5cccSFangrui Song float64x1_t test_vldap1_lane_f64(float64_t  *a, float64x1_t b) {
92*207e5cccSFangrui Song   return vldap1_lane_f64(a, b, 0);
93*207e5cccSFangrui Song }
94*207e5cccSFangrui Song 
95*207e5cccSFangrui Song // CHECK-LABEL: @test_vldap1_lane_p64(
96*207e5cccSFangrui Song // CHECK-NEXT:  entry:
97*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
98*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
99*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
100*207e5cccSFangrui Song // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
101*207e5cccSFangrui Song // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
102*207e5cccSFangrui Song //
103*207e5cccSFangrui Song poly64x1_t test_vldap1_lane_p64(poly64_t  *a, poly64x1_t b) {
104*207e5cccSFangrui Song   return vldap1_lane_p64(a, b, 0);
105*207e5cccSFangrui Song }
106*207e5cccSFangrui Song 
107*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_u64(
108*207e5cccSFangrui Song // CHECK-NEXT:  entry:
109*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
110*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
111*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
112*207e5cccSFangrui Song // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
113*207e5cccSFangrui Song // CHECK-NEXT:    ret void
114*207e5cccSFangrui Song //
115*207e5cccSFangrui Song void test_vstl1q_lane_u64(uint64_t  *a, uint64x2_t b) {
116*207e5cccSFangrui Song   vstl1q_lane_u64(a, b, 1);
117*207e5cccSFangrui Song }
118*207e5cccSFangrui Song 
119*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_s64(
120*207e5cccSFangrui Song // CHECK-NEXT:  entry:
121*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
122*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
123*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
124*207e5cccSFangrui Song // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
125*207e5cccSFangrui Song // CHECK-NEXT:    ret void
126*207e5cccSFangrui Song //
127*207e5cccSFangrui Song void test_vstl1q_lane_s64(int64_t  *a, int64x2_t b) {
128*207e5cccSFangrui Song   vstl1q_lane_s64(a, b, 1);
129*207e5cccSFangrui Song }
130*207e5cccSFangrui Song 
131*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_f64(
132*207e5cccSFangrui Song // CHECK-NEXT:  entry:
133*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
134*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
135*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
136*207e5cccSFangrui Song // CHECK-NEXT:    store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
137*207e5cccSFangrui Song // CHECK-NEXT:    ret void
138*207e5cccSFangrui Song //
139*207e5cccSFangrui Song void test_vstl1q_lane_f64(float64_t  *a, float64x2_t b) {
140*207e5cccSFangrui Song   vstl1q_lane_f64(a, b, 1);
141*207e5cccSFangrui Song }
142*207e5cccSFangrui Song 
143*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1q_lane_p64(
144*207e5cccSFangrui Song // CHECK-NEXT:  entry:
145*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
146*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
147*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
148*207e5cccSFangrui Song // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
149*207e5cccSFangrui Song // CHECK-NEXT:    ret void
150*207e5cccSFangrui Song //
151*207e5cccSFangrui Song void test_vstl1q_lane_p64(poly64_t  *a, poly64x2_t b) {
152*207e5cccSFangrui Song   vstl1q_lane_p64(a, b, 1);
153*207e5cccSFangrui Song }
154*207e5cccSFangrui Song 
155*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_u64(
156*207e5cccSFangrui Song // CHECK-NEXT:  entry:
157*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
158*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
159*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
160*207e5cccSFangrui Song // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
161*207e5cccSFangrui Song // CHECK-NEXT:    ret void
162*207e5cccSFangrui Song //
163*207e5cccSFangrui Song void test_vstl1_lane_u64(uint64_t  *a, uint64x1_t b) {
164*207e5cccSFangrui Song   vstl1_lane_u64(a, b, 0);
165*207e5cccSFangrui Song }
166*207e5cccSFangrui Song 
167*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_s64(
168*207e5cccSFangrui Song // CHECK-NEXT:  entry:
169*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
170*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
171*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
172*207e5cccSFangrui Song // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
173*207e5cccSFangrui Song // CHECK-NEXT:    ret void
174*207e5cccSFangrui Song //
175*207e5cccSFangrui Song void test_vstl1_lane_s64(int64_t  *a, int64x1_t b) {
176*207e5cccSFangrui Song   vstl1_lane_s64(a, b, 0);
177*207e5cccSFangrui Song }
178*207e5cccSFangrui Song 
179*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_f64(
180*207e5cccSFangrui Song // CHECK-NEXT:  entry:
181*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
182*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
183*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
184*207e5cccSFangrui Song // CHECK-NEXT:    store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
185*207e5cccSFangrui Song // CHECK-NEXT:    ret void
186*207e5cccSFangrui Song //
187*207e5cccSFangrui Song void test_vstl1_lane_f64(float64_t  *a, float64x1_t b) {
188*207e5cccSFangrui Song   vstl1_lane_f64(a, b, 0);
189*207e5cccSFangrui Song }
190*207e5cccSFangrui Song 
191*207e5cccSFangrui Song // CHECK-LABEL: @test_vstl1_lane_p64(
192*207e5cccSFangrui Song // CHECK-NEXT:  entry:
193*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
194*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
195*207e5cccSFangrui Song // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
196*207e5cccSFangrui Song // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
197*207e5cccSFangrui Song // CHECK-NEXT:    ret void
198*207e5cccSFangrui Song //
199*207e5cccSFangrui Song void test_vstl1_lane_p64(poly64_t  *a, poly64x1_t b) {
200*207e5cccSFangrui Song   vstl1_lane_p64(a, b, 0);
201*207e5cccSFangrui Song }
202