xref: /llvm-project/clang/test/CodeGen/AArch64/neon-luti.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
2 // REQUIRES: aarch64-registered-target
3 #include <arm_neon.h>
4 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s
5 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
6 
7 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8(
8 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9 // CHECK-NEXT:  entry:
10 // CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
11 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
12 //
13 uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) {
14   return vluti2_lane_u8(vn, vm, 0);
15 }
16 
17 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8(
18 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
19 // CHECK-NEXT:  entry:
20 // CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
21 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
22 //
23 uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) {
24   return vluti2_laneq_u8(vn, vm, 0);
25 }
26 
27 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8(
28 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
29 // CHECK-NEXT:  entry:
30 // CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
31 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
32 //
33 uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
34   return vluti2q_lane_u8(vn, vm, 1);
35 }
36 
37 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8(
38 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
39 // CHECK-NEXT:  entry:
40 // CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
41 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
42 //
43 uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
44   return vluti2q_laneq_u8(vn, vm, 3);
45 }
46 
47 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8(
48 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
49 // CHECK-NEXT:  entry:
50 // CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
51 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
52 //
53 int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) {
54   return vluti2_lane_s8(vn, vm, 0);
55 }
56 
57 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8(
58 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
59 // CHECK-NEXT:  entry:
60 // CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
61 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
62 //
63 int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) {
64   return vluti2_laneq_s8(vn, vm, 0);
65 }
66 
67 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8(
68 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
69 // CHECK-NEXT:  entry:
70 // CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
71 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
72 //
73 int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) {
74   return vluti2q_lane_s8(vn, vm, 1);
75 }
76 
77 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8(
78 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
79 // CHECK-NEXT:  entry:
80 // CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
81 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
82 //
83 int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
84   return vluti2q_laneq_s8(vn, vm, 3);
85 }
86 
87 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8(
88 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
89 // CHECK-NEXT:  entry:
90 // CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
91 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
92 //
93 poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) {
94   return vluti2_lane_p8(vn, vm, 0);
95 }
96 
97 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8(
98 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
99 // CHECK-NEXT:  entry:
100 // CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
101 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
102 //
103 poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) {
104   return vluti2_laneq_p8(vn, vm, 0);
105 }
106 
107 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8(
108 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
109 // CHECK-NEXT:  entry:
110 // CHECK-NEXT:    [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1)
111 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANE]]
112 //
113 poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
114   return vluti2q_lane_p8(vn, vm, 1);
115 }
116 
117 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8(
118 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
119 // CHECK-NEXT:  entry:
120 // CHECK-NEXT:    [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3)
121 // CHECK-NEXT:    ret <16 x i8> [[VLUTI2_LANEQ]]
122 //
123 poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
124   return vluti2q_laneq_p8(vn, vm, 3);
125 }
126 
127 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16(
128 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
129 // CHECK-NEXT:  entry:
130 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
131 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
132 //
133 uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) {
134   return vluti2_lane_u16(vn, vm, 0);
135 }
136 
137 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_u16(
138 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
139 // CHECK-NEXT:  entry:
140 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
141 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
142 //
143 uint16x8_t test_vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm) {
144   return vluti2_laneq_u16(vn, vm, 0);
145 }
146 
147 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_u16(
148 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
149 // CHECK-NEXT:  entry:
150 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
151 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
152 //
153 uint16x8_t test_vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm) {
154   return vluti2q_lane_u16(vn, vm, 3);
155 }
156 
157 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_u16(
158 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
159 // CHECK-NEXT:  entry:
160 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
161 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
162 //
163 uint16x8_t test_vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm) {
164   return vluti2q_laneq_u16(vn, vm, 7);
165 }
166 
167 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_s16(
168 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
169 // CHECK-NEXT:  entry:
170 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
171 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
172 //
173 int16x8_t test_vluti2_lane_s16(int16x4_t vn, uint8x8_t vm) {
174   return vluti2_lane_s16(vn, vm, 0);
175 }
176 
177 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_s16(
178 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
179 // CHECK-NEXT:  entry:
180 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
181 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
182 //
183 int16x8_t test_vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm) {
184   return vluti2_laneq_s16(vn, vm, 0);
185 }
186 
187 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_s16(
188 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
189 // CHECK-NEXT:  entry:
190 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
191 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
192 //
193 int16x8_t test_vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm) {
194   return vluti2q_lane_s16(vn, vm, 3);
195 }
196 
197 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_s16(
198 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
199 // CHECK-NEXT:  entry:
200 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
201 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
202 //
203 int16x8_t test_vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm) {
204   return vluti2q_laneq_s16(vn, vm, 7);
205 }
206 
207 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_lane_f16(
208 // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
209 // CHECK-NEXT:  entry:
210 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> [[VN]], <8 x i8> [[VM]], i32 0)
211 // CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANE1]]
212 //
213 float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) {
214   return vluti2_lane_f16(vn, vm, 0);
215 }
216 
217 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_laneq_f16(
218 // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
219 // CHECK-NEXT:  entry:
220 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4f16(<4 x half> [[VN]], <16 x i8> [[VM]], i32 0)
221 // CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANEQ1]]
222 //
223 float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) {
224   return vluti2_laneq_f16(vn, vm, 0);
225 }
226 
227 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_lane_f16(
228 // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
229 // CHECK-NEXT:  entry:
230 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> [[VN]], <8 x i8> [[VM]], i32 3)
231 // CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANE1]]
232 //
233 float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) {
234   return vluti2q_lane_f16(vn, vm, 3);
235 }
236 
237 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_laneq_f16(
238 // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
239 // CHECK-NEXT:  entry:
240 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> [[VN]], <16 x i8> [[VM]], i32 7)
241 // CHECK-NEXT:    ret <8 x half> [[VLUTI2_LANEQ1]]
242 //
243 float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) {
244   return vluti2q_laneq_f16(vn, vm, 7);
245 }
246 
247 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_lane_bf16(
248 // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
249 // CHECK-NEXT:  entry:
250 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> [[VN]], <8 x i8> [[VM]], i32 0)
251 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANE1]]
252 //
253 bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) {
254   return vluti2_lane_bf16(vn, vm, 0);
255 }
256 
257 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_laneq_bf16(
258 // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
259 // CHECK-NEXT:  entry:
260 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> [[VN]], <16 x i8> [[VM]], i32 0)
261 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANEQ1]]
262 //
263 bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) {
264   return vluti2_laneq_bf16(vn, vm, 0);
265 }
266 
267 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_lane_bf16(
268 // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
269 // CHECK-NEXT:  entry:
270 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<8 x bfloat> [[VN]], <8 x i8> [[VM]], i32 3)
271 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANE1]]
272 //
273 bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) {
274   return vluti2q_lane_bf16(vn, vm, 3);
275 }
276 
277 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_laneq_bf16(
278 // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
279 // CHECK-NEXT:  entry:
280 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> [[VN]], <16 x i8> [[VM]], i32 7)
281 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI2_LANEQ1]]
282 //
283 bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) {
284   return vluti2q_laneq_bf16(vn, vm, 7);
285 }
286 
287 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_p16(
288 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
289 // CHECK-NEXT:  entry:
290 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0)
291 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
292 //
293 poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) {
294   return vluti2_lane_p16(vn, vm, 0);
295 }
296 
297 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_p16(
298 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
299 // CHECK-NEXT:  entry:
300 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0)
301 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
302 //
303 poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) {
304   return vluti2_laneq_p16(vn, vm, 0);
305 }
306 
307 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_p16(
308 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
309 // CHECK-NEXT:  entry:
310 // CHECK-NEXT:    [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3)
311 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANE1]]
312 //
313 poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) {
314   return vluti2q_lane_p16(vn, vm, 3);
315 }
316 
317 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_p16(
318 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
319 // CHECK-NEXT:  entry:
320 // CHECK-NEXT:    [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7)
321 // CHECK-NEXT:    ret <8 x i16> [[VLUTI2_LANEQ1]]
322 //
323 poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) {
324   return vluti2q_laneq_p16(vn, vm, 7);
325 }
326 
327 //
328 
329 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_u8(
330 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
331 // CHECK-NEXT:  entry:
332 // CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
333 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
334 //
335 uint8x16_t test_vluti4q_lane_u8(uint8x16_t vn, uint8x8_t vm) {
336   return vluti4q_lane_u8(vn, vm, 0);
337 }
338 
339 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_u8(
340 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
341 // CHECK-NEXT:  entry:
342 // CHECK-NEXT:    [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 0)
343 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANEQ]]
344 //
345 uint8x16_t test_vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm) {
346   return vluti4q_laneq_u8(vn, vm, 0);
347 }
348 
349 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_s8(
350 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
351 // CHECK-NEXT:  entry:
352 // CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
353 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
354 //
355 int8x16_t test_vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm) {
356   return vluti4q_lane_s8(vn, vm, 0);
357 }
358 
359 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_s8(
360 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
361 // CHECK-NEXT:  entry:
362 // CHECK-NEXT:    [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1)
363 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANEQ]]
364 //
365 int8x16_t test_vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm) {
366   return vluti4q_laneq_s8(vn, vm, 1);
367 }
368 
369 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_p8(
370 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
371 // CHECK-NEXT:  entry:
372 // CHECK-NEXT:    [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0)
373 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANE]]
374 //
375 poly8x16_t test_vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
376   return vluti4q_lane_p8(vn, vm, 0);
377 }
378 
379 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_p8(
380 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
381 // CHECK-NEXT:  entry:
382 // CHECK-NEXT:    [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1)
383 // CHECK-NEXT:    ret <16 x i8> [[VLUTI4Q_LANEQ]]
384 //
385 poly8x16_t test_vluti4q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
386   return vluti4q_laneq_p8(vn, vm, 1);
387 }
388 
389 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_u16_x2(
390 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
391 // CHECK-NEXT:  entry:
392 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
393 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
394 // CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0)
395 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
396 //
397 uint16x8_t test_vluti4q_lane_u16_x2(uint16x8x2_t vn, uint8x8_t vm) {
398   return vluti4q_lane_u16_x2(vn, vm, 0);
399 }
400 
401 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_u16_x2(
402 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
403 // CHECK-NEXT:  entry:
404 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
405 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
406 // CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0)
407 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
408 //
409 uint16x8_t test_vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm) {
410   return vluti4q_laneq_u16_x2(vn, vm, 0);
411 }
412 
413 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_s16_x2(
414 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
415 // CHECK-NEXT:  entry:
416 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
417 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
418 // CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
419 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
420 //
421 int16x8_t test_vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm) {
422   return vluti4q_lane_s16_x2(vn, vm, 1);
423 }
424 
425 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_s16_x2(
426 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
427 // CHECK-NEXT:  entry:
428 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
429 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
430 // CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 3)
431 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
432 //
433 int16x8_t test_vluti4q_laneq_s16_x2(int16x8x2_t vn, uint8x16_t vm) {
434   return vluti4q_laneq_s16_x2(vn, vm, 3);
435 }
436 
437 // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_lane_f16_x2(
438 // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
439 // CHECK-NEXT:  entry:
440 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0
441 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1
442 // CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
443 // CHECK-NEXT:    ret <8 x half> [[VLUTI4Q_LANE_X24]]
444 //
445 float16x8_t test_vluti4q_lane_f16_x2(float16x8x2_t vn, uint8x8_t vm) {
446   return vluti4q_lane_f16_x2(vn, vm, 1);
447 }
448 
449 // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_laneq_f16_x2(
450 // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
451 // CHECK-NEXT:  entry:
452 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0
453 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1
454 // CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 1)
455 // CHECK-NEXT:    ret <8 x half> [[VLUTI4Q_LANEQ_X24]]
456 //
457 float16x8_t test_vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm) {
458   return vluti4q_laneq_f16_x2(vn, vm, 1);
459 }
460 
461 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_lane_bf16_x2(
462 // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
463 // CHECK-NEXT:  entry:
464 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
465 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
466 // CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1)
467 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI4Q_LANE_X24]]
468 //
469 bfloat16x8_t test_vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm) {
470   return vluti4q_lane_bf16_x2(vn, vm, 1);
471 }
472 
473 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_laneq_bf16_x2(
474 // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
475 // CHECK-NEXT:  entry:
476 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0
477 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1
478 // CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 2)
479 // CHECK-NEXT:    ret <8 x bfloat> [[VLUTI4Q_LANEQ_X24]]
480 //
481 bfloat16x8_t test_vluti4q_laneq_bf16_x2(bfloat16x8x2_t vn, uint8x16_t vm) {
482   return vluti4q_laneq_bf16_x2(vn, vm, 2);
483 }
484 
485 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_p16_x2(
486 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
487 // CHECK-NEXT:  entry:
488 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
489 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
490 // CHECK-NEXT:    [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0)
491 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANE_X24]]
492 //
493 poly16x8_t test_vluti4q_lane_p16_x2(poly16x8x2_t vn, uint8x8_t vm) {
494   return vluti4q_lane_p16_x2(vn, vm, 0);
495 }
496 
497 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_p16_x2(
498 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
499 // CHECK-NEXT:  entry:
500 // CHECK-NEXT:    [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0
501 // CHECK-NEXT:    [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1
502 // CHECK-NEXT:    [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0)
503 // CHECK-NEXT:    ret <8 x i16> [[VLUTI4Q_LANEQ_X24]]
504 //
505 poly16x8_t test_vluti4q_laneq_p16_x2(poly16x8x2_t vn, uint8x16_t vm) {
506   return vluti4q_laneq_p16_x2(vn, vm, 0);
507 }
508