xref: /llvm-project/clang/test/CodeGen/AArch64/neon-ldst-one-rcpc3.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon \
3 // RUN:  -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \
4 // RUN: | opt -S -passes=mem2reg | FileCheck %s
5 
6 // REQUIRES: aarch64-registered-target
7 
8 #include <arm_neon.h>
9 
10 
11 // CHECK-LABEL: @test_vldap1q_lane_u64(
12 // CHECK-NEXT:  entry:
13 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
14 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
16 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
17 // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
18 //
19 uint64x2_t test_vldap1q_lane_u64(uint64_t  *a, uint64x2_t b) {
20   return vldap1q_lane_u64(a, b, 1);
21 }
22 
23 // CHECK-LABEL: @test_vldap1q_lane_s64(
24 // CHECK-NEXT:  entry:
25 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
26 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
27 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
28 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
29 // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
30 //
31 int64x2_t test_vldap1q_lane_s64(int64_t  *a, int64x2_t b) {
32   return vldap1q_lane_s64(a, b, 1);
33 }
34 
35 // CHECK-LABEL: @test_vldap1q_lane_f64(
36 // CHECK-NEXT:  entry:
37 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
38 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
39 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
40 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1
41 // CHECK-NEXT:    ret <2 x double> [[VLDAP1_LANE]]
42 //
43 float64x2_t test_vldap1q_lane_f64(float64_t  *a, float64x2_t b) {
44   return vldap1q_lane_f64(a, b, 1);
45 }
46 
47 // CHECK-LABEL: @test_vldap1q_lane_p64(
48 // CHECK-NEXT:  entry:
49 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
50 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
51 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
52 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1
53 // CHECK-NEXT:    ret <2 x i64> [[VLDAP1_LANE]]
54 //
55 poly64x2_t test_vldap1q_lane_p64(poly64_t  *a, poly64x2_t b) {
56   return vldap1q_lane_p64(a, b, 1);
57 }
58 
59 // CHECK-LABEL: @test_vldap1_lane_u64(
60 // CHECK-NEXT:  entry:
61 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
62 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
63 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
64 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
65 // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
66 //
67 uint64x1_t test_vldap1_lane_u64(uint64_t  *a, uint64x1_t b) {
68   return vldap1_lane_u64(a, b, 0);
69 }
70 
71 // CHECK-LABEL: @test_vldap1_lane_s64(
72 // CHECK-NEXT:  entry:
73 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
74 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
75 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
76 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
77 // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
78 //
79 int64x1_t test_vldap1_lane_s64(int64_t  *a, int64x1_t b) {
80   return vldap1_lane_s64(a, b, 0);
81 }
82 
83 // CHECK-LABEL: @test_vldap1_lane_f64(
84 // CHECK-NEXT:  entry:
85 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
86 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
87 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8
88 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0
89 // CHECK-NEXT:    ret <1 x double> [[VLDAP1_LANE]]
90 //
91 float64x1_t test_vldap1_lane_f64(float64_t  *a, float64x1_t b) {
92   return vldap1_lane_f64(a, b, 0);
93 }
94 
95 // CHECK-LABEL: @test_vldap1_lane_p64(
96 // CHECK-NEXT:  entry:
97 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
98 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
99 // CHECK-NEXT:    [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8
100 // CHECK-NEXT:    [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0
101 // CHECK-NEXT:    ret <1 x i64> [[VLDAP1_LANE]]
102 //
103 poly64x1_t test_vldap1_lane_p64(poly64_t  *a, poly64x1_t b) {
104   return vldap1_lane_p64(a, b, 0);
105 }
106 
107 // CHECK-LABEL: @test_vstl1q_lane_u64(
108 // CHECK-NEXT:  entry:
109 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
110 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
111 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
112 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
113 // CHECK-NEXT:    ret void
114 //
115 void test_vstl1q_lane_u64(uint64_t  *a, uint64x2_t b) {
116   vstl1q_lane_u64(a, b, 1);
117 }
118 
119 // CHECK-LABEL: @test_vstl1q_lane_s64(
120 // CHECK-NEXT:  entry:
121 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
122 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
123 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
124 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
125 // CHECK-NEXT:    ret void
126 //
127 void test_vstl1q_lane_s64(int64_t  *a, int64x2_t b) {
128   vstl1q_lane_s64(a, b, 1);
129 }
130 
131 // CHECK-LABEL: @test_vstl1q_lane_f64(
132 // CHECK-NEXT:  entry:
133 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8>
134 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
135 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
136 // CHECK-NEXT:    store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
137 // CHECK-NEXT:    ret void
138 //
139 void test_vstl1q_lane_f64(float64_t  *a, float64x2_t b) {
140   vstl1q_lane_f64(a, b, 1);
141 }
142 
143 // CHECK-LABEL: @test_vstl1q_lane_p64(
144 // CHECK-NEXT:  entry:
145 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8>
146 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
147 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
148 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
149 // CHECK-NEXT:    ret void
150 //
151 void test_vstl1q_lane_p64(poly64_t  *a, poly64x2_t b) {
152   vstl1q_lane_p64(a, b, 1);
153 }
154 
155 // CHECK-LABEL: @test_vstl1_lane_u64(
156 // CHECK-NEXT:  entry:
157 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
158 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
159 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
160 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
161 // CHECK-NEXT:    ret void
162 //
163 void test_vstl1_lane_u64(uint64_t  *a, uint64x1_t b) {
164   vstl1_lane_u64(a, b, 0);
165 }
166 
167 // CHECK-LABEL: @test_vstl1_lane_s64(
168 // CHECK-NEXT:  entry:
169 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
170 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
171 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
172 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
173 // CHECK-NEXT:    ret void
174 //
175 void test_vstl1_lane_s64(int64_t  *a, int64x1_t b) {
176   vstl1_lane_s64(a, b, 0);
177 }
178 
179 // CHECK-LABEL: @test_vstl1_lane_f64(
180 // CHECK-NEXT:  entry:
181 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8>
182 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
183 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
184 // CHECK-NEXT:    store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8
185 // CHECK-NEXT:    ret void
186 //
187 void test_vstl1_lane_f64(float64_t  *a, float64x1_t b) {
188   vstl1_lane_f64(a, b, 0);
189 }
190 
191 // CHECK-LABEL: @test_vstl1_lane_p64(
192 // CHECK-NEXT:  entry:
193 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8>
194 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
195 // CHECK-NEXT:    [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
196 // CHECK-NEXT:    store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8
197 // CHECK-NEXT:    ret void
198 //
199 void test_vstl1_lane_p64(poly64_t  *a, poly64x1_t b) {
200   vstl1_lane_p64(a, b, 0);
201 }
202