1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
4
5 // REQUIRES: aarch64-registered-target || arm-registered-target
6
7 #include <arm_mve.h>
8
9 // CHECK-LABEL: @test_vidupq_n_u8(
10 // CHECK-NEXT: entry:
11 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[A:%.*]], i32 4)
12 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
13 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
14 //
test_vidupq_n_u8(uint32_t a)15 uint8x16_t test_vidupq_n_u8(uint32_t a)
16 {
17 #ifdef POLYMORPHIC
18 return vidupq_u8(a, 4);
19 #else /* POLYMORPHIC */
20 return vidupq_n_u8(a, 4);
21 #endif /* POLYMORPHIC */
22 }
23
24 // CHECK-LABEL: @test_vidupq_n_u16(
25 // CHECK-NEXT: entry:
26 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[A:%.*]], i32 1)
27 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
28 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
29 //
test_vidupq_n_u16(uint32_t a)30 uint16x8_t test_vidupq_n_u16(uint32_t a)
31 {
32 #ifdef POLYMORPHIC
33 return vidupq_u16(a, 1);
34 #else /* POLYMORPHIC */
35 return vidupq_n_u16(a, 1);
36 #endif /* POLYMORPHIC */
37 }
38
39 // CHECK-LABEL: @test_vidupq_n_u32(
40 // CHECK-NEXT: entry:
41 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[A:%.*]], i32 4)
42 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
43 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
44 //
test_vidupq_n_u32(uint32_t a)45 uint32x4_t test_vidupq_n_u32(uint32_t a)
46 {
47 #ifdef POLYMORPHIC
48 return vidupq_u32(a, 4);
49 #else /* POLYMORPHIC */
50 return vidupq_n_u32(a, 4);
51 #endif /* POLYMORPHIC */
52 }
53
54 // CHECK-LABEL: @test_vddupq_n_u8(
55 // CHECK-NEXT: entry:
56 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[A:%.*]], i32 2)
57 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
58 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
59 //
test_vddupq_n_u8(uint32_t a)60 uint8x16_t test_vddupq_n_u8(uint32_t a)
61 {
62 #ifdef POLYMORPHIC
63 return vddupq_u8(a, 2);
64 #else /* POLYMORPHIC */
65 return vddupq_n_u8(a, 2);
66 #endif /* POLYMORPHIC */
67 }
68
69 // CHECK-LABEL: @test_vddupq_n_u16(
70 // CHECK-NEXT: entry:
71 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[A:%.*]], i32 4)
72 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
73 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
74 //
test_vddupq_n_u16(uint32_t a)75 uint16x8_t test_vddupq_n_u16(uint32_t a)
76 {
77 #ifdef POLYMORPHIC
78 return vddupq_u16(a, 4);
79 #else /* POLYMORPHIC */
80 return vddupq_n_u16(a, 4);
81 #endif /* POLYMORPHIC */
82 }
83
84 // CHECK-LABEL: @test_vddupq_n_u32(
85 // CHECK-NEXT: entry:
86 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[A:%.*]], i32 2)
87 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
88 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
89 //
test_vddupq_n_u32(uint32_t a)90 uint32x4_t test_vddupq_n_u32(uint32_t a)
91 {
92 #ifdef POLYMORPHIC
93 return vddupq_u32(a, 2);
94 #else /* POLYMORPHIC */
95 return vddupq_n_u32(a, 2);
96 #endif /* POLYMORPHIC */
97 }
98
99 // CHECK-LABEL: @test_viwdupq_n_u8(
100 // CHECK-NEXT: entry:
101 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
102 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
103 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
104 //
test_viwdupq_n_u8(uint32_t a,uint32_t b)105 uint8x16_t test_viwdupq_n_u8(uint32_t a, uint32_t b)
106 {
107 #ifdef POLYMORPHIC
108 return viwdupq_u8(a, b, 4);
109 #else /* POLYMORPHIC */
110 return viwdupq_n_u8(a, b, 4);
111 #endif /* POLYMORPHIC */
112 }
113
114 // CHECK-LABEL: @test_viwdupq_n_u16(
115 // CHECK-NEXT: entry:
116 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 2)
117 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
118 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
119 //
test_viwdupq_n_u16(uint32_t a,uint32_t b)120 uint16x8_t test_viwdupq_n_u16(uint32_t a, uint32_t b)
121 {
122 #ifdef POLYMORPHIC
123 return viwdupq_u16(a, b, 2);
124 #else /* POLYMORPHIC */
125 return viwdupq_n_u16(a, b, 2);
126 #endif /* POLYMORPHIC */
127 }
128
129 // CHECK-LABEL: @test_viwdupq_n_u32(
130 // CHECK-NEXT: entry:
131 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
132 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
133 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
134 //
test_viwdupq_n_u32(uint32_t a,uint32_t b)135 uint32x4_t test_viwdupq_n_u32(uint32_t a, uint32_t b)
136 {
137 #ifdef POLYMORPHIC
138 return viwdupq_u32(a, b, 8);
139 #else /* POLYMORPHIC */
140 return viwdupq_n_u32(a, b, 8);
141 #endif /* POLYMORPHIC */
142 }
143
144 // CHECK-LABEL: @test_vdwdupq_n_u8(
145 // CHECK-NEXT: entry:
146 // CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
147 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
148 // CHECK-NEXT: ret <16 x i8> [[TMP1]]
149 //
test_vdwdupq_n_u8(uint32_t a,uint32_t b)150 uint8x16_t test_vdwdupq_n_u8(uint32_t a, uint32_t b)
151 {
152 #ifdef POLYMORPHIC
153 return vdwdupq_u8(a, b, 4);
154 #else /* POLYMORPHIC */
155 return vdwdupq_n_u8(a, b, 4);
156 #endif /* POLYMORPHIC */
157 }
158
159 // CHECK-LABEL: @test_vdwdupq_n_u16(
160 // CHECK-NEXT: entry:
161 // CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
162 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
163 // CHECK-NEXT: ret <8 x i16> [[TMP1]]
164 //
test_vdwdupq_n_u16(uint32_t a,uint32_t b)165 uint16x8_t test_vdwdupq_n_u16(uint32_t a, uint32_t b)
166 {
167 #ifdef POLYMORPHIC
168 return vdwdupq_u16(a, b, 8);
169 #else /* POLYMORPHIC */
170 return vdwdupq_n_u16(a, b, 8);
171 #endif /* POLYMORPHIC */
172 }
173
174 // CHECK-LABEL: @test_vdwdupq_n_u32(
175 // CHECK-NEXT: entry:
176 // CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 1)
177 // CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
178 // CHECK-NEXT: ret <4 x i32> [[TMP1]]
179 //
test_vdwdupq_n_u32(uint32_t a,uint32_t b)180 uint32x4_t test_vdwdupq_n_u32(uint32_t a, uint32_t b)
181 {
182 #ifdef POLYMORPHIC
183 return vdwdupq_u32(a, b, 1);
184 #else /* POLYMORPHIC */
185 return vdwdupq_n_u32(a, b, 1);
186 #endif /* POLYMORPHIC */
187 }
188
189 // CHECK-LABEL: @test_vidupq_wb_u8(
190 // CHECK-NEXT: entry:
191 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
192 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[TMP0]], i32 8)
193 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
194 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
195 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
196 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
197 //
test_vidupq_wb_u8(uint32_t * a)198 uint8x16_t test_vidupq_wb_u8(uint32_t *a)
199 {
200 #ifdef POLYMORPHIC
201 return vidupq_u8(a, 8);
202 #else /* POLYMORPHIC */
203 return vidupq_wb_u8(a, 8);
204 #endif /* POLYMORPHIC */
205 }
206
207 // CHECK-LABEL: @test_vidupq_wb_u16(
208 // CHECK-NEXT: entry:
209 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
210 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[TMP0]], i32 1)
211 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
212 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
213 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
214 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
215 //
test_vidupq_wb_u16(uint32_t * a)216 uint16x8_t test_vidupq_wb_u16(uint32_t *a)
217 {
218 #ifdef POLYMORPHIC
219 return vidupq_u16(a, 1);
220 #else /* POLYMORPHIC */
221 return vidupq_wb_u16(a, 1);
222 #endif /* POLYMORPHIC */
223 }
224
225 // CHECK-LABEL: @test_vidupq_wb_u32(
226 // CHECK-NEXT: entry:
227 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
228 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[TMP0]], i32 4)
229 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
230 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
231 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
232 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
233 //
test_vidupq_wb_u32(uint32_t * a)234 uint32x4_t test_vidupq_wb_u32(uint32_t *a)
235 {
236 #ifdef POLYMORPHIC
237 return vidupq_u32(a, 4);
238 #else /* POLYMORPHIC */
239 return vidupq_wb_u32(a, 4);
240 #endif /* POLYMORPHIC */
241 }
242
243 // CHECK-LABEL: @test_vddupq_wb_u8(
244 // CHECK-NEXT: entry:
245 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
246 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[TMP0]], i32 2)
247 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
248 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
249 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
250 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
251 //
test_vddupq_wb_u8(uint32_t * a)252 uint8x16_t test_vddupq_wb_u8(uint32_t *a)
253 {
254 #ifdef POLYMORPHIC
255 return vddupq_u8(a, 2);
256 #else /* POLYMORPHIC */
257 return vddupq_wb_u8(a, 2);
258 #endif /* POLYMORPHIC */
259 }
260
261 // CHECK-LABEL: @test_vddupq_wb_u16(
262 // CHECK-NEXT: entry:
263 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
264 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[TMP0]], i32 8)
265 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
266 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
267 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
268 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
269 //
test_vddupq_wb_u16(uint32_t * a)270 uint16x8_t test_vddupq_wb_u16(uint32_t *a)
271 {
272 #ifdef POLYMORPHIC
273 return vddupq_u16(a, 8);
274 #else /* POLYMORPHIC */
275 return vddupq_wb_u16(a, 8);
276 #endif /* POLYMORPHIC */
277 }
278
279 // CHECK-LABEL: @test_vddupq_wb_u32(
280 // CHECK-NEXT: entry:
281 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
282 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[TMP0]], i32 2)
283 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
284 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
285 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
286 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
287 //
test_vddupq_wb_u32(uint32_t * a)288 uint32x4_t test_vddupq_wb_u32(uint32_t *a)
289 {
290 #ifdef POLYMORPHIC
291 return vddupq_u32(a, 2);
292 #else /* POLYMORPHIC */
293 return vddupq_wb_u32(a, 2);
294 #endif /* POLYMORPHIC */
295 }
296
297 // CHECK-LABEL: @test_vdwdupq_wb_u8(
298 // CHECK-NEXT: entry:
299 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
300 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
301 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
302 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
303 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
304 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
305 //
test_vdwdupq_wb_u8(uint32_t * a,uint32_t b)306 uint8x16_t test_vdwdupq_wb_u8(uint32_t *a, uint32_t b)
307 {
308 #ifdef POLYMORPHIC
309 return vdwdupq_u8(a, b, 4);
310 #else /* POLYMORPHIC */
311 return vdwdupq_wb_u8(a, b, 4);
312 #endif /* POLYMORPHIC */
313 }
314
315 // CHECK-LABEL: @test_vdwdupq_wb_u16(
316 // CHECK-NEXT: entry:
317 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
318 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
319 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
320 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
321 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
322 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
323 //
test_vdwdupq_wb_u16(uint32_t * a,uint32_t b)324 uint16x8_t test_vdwdupq_wb_u16(uint32_t *a, uint32_t b)
325 {
326 #ifdef POLYMORPHIC
327 return vdwdupq_u16(a, b, 4);
328 #else /* POLYMORPHIC */
329 return vdwdupq_wb_u16(a, b, 4);
330 #endif /* POLYMORPHIC */
331 }
332
333 // CHECK-LABEL: @test_viwdupq_wb_u8(
334 // CHECK-NEXT: entry:
335 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
336 // CHECK-NEXT: [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
337 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
338 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
339 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
340 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
341 //
test_viwdupq_wb_u8(uint32_t * a,uint32_t b)342 uint8x16_t test_viwdupq_wb_u8(uint32_t *a, uint32_t b)
343 {
344 #ifdef POLYMORPHIC
345 return viwdupq_u8(a, b, 1);
346 #else /* POLYMORPHIC */
347 return viwdupq_wb_u8(a, b, 1);
348 #endif /* POLYMORPHIC */
349 }
350
351 // CHECK-LABEL: @test_viwdupq_wb_u16(
352 // CHECK-NEXT: entry:
353 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
354 // CHECK-NEXT: [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
355 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
356 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
357 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
358 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
359 //
test_viwdupq_wb_u16(uint32_t * a,uint32_t b)360 uint16x8_t test_viwdupq_wb_u16(uint32_t *a, uint32_t b)
361 {
362 #ifdef POLYMORPHIC
363 return viwdupq_u16(a, b, 1);
364 #else /* POLYMORPHIC */
365 return viwdupq_wb_u16(a, b, 1);
366 #endif /* POLYMORPHIC */
367 }
368
369 // CHECK-LABEL: @test_viwdupq_wb_u32(
370 // CHECK-NEXT: entry:
371 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
372 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 8)
373 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
374 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
375 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
376 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
377 //
test_viwdupq_wb_u32(uint32_t * a,uint32_t b)378 uint32x4_t test_viwdupq_wb_u32(uint32_t *a, uint32_t b)
379 {
380 #ifdef POLYMORPHIC
381 return viwdupq_u32(a, b, 8);
382 #else /* POLYMORPHIC */
383 return viwdupq_wb_u32(a, b, 8);
384 #endif /* POLYMORPHIC */
385 }
386
387 // CHECK-LABEL: @test_vdwdupq_wb_u32(
388 // CHECK-NEXT: entry:
389 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
390 // CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 2)
391 // CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
392 // CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4
393 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
394 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
395 //
test_vdwdupq_wb_u32(uint32_t * a,uint32_t b)396 uint32x4_t test_vdwdupq_wb_u32(uint32_t *a, uint32_t b)
397 {
398 #ifdef POLYMORPHIC
399 return vdwdupq_u32(a, b, 2);
400 #else /* POLYMORPHIC */
401 return vdwdupq_wb_u32(a, b, 2);
402 #endif /* POLYMORPHIC */
403 }
404
405 // CHECK-LABEL: @test_vidupq_m_n_u8(
406 // CHECK-NEXT: entry:
407 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
408 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
409 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
410 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
411 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
412 //
test_vidupq_m_n_u8(uint8x16_t inactive,uint32_t a,mve_pred16_t p)413 uint8x16_t test_vidupq_m_n_u8(uint8x16_t inactive, uint32_t a, mve_pred16_t p)
414 {
415 #ifdef POLYMORPHIC
416 return vidupq_m(inactive, a, 8, p);
417 #else /* POLYMORPHIC */
418 return vidupq_m_n_u8(inactive, a, 8, p);
419 #endif /* POLYMORPHIC */
420 }
421
422 // CHECK-LABEL: @test_vidupq_m_n_u16(
423 // CHECK-NEXT: entry:
424 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
425 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
426 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <8 x i1> [[TMP1]])
427 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
428 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
429 //
test_vidupq_m_n_u16(uint16x8_t inactive,uint32_t a,mve_pred16_t p)430 uint16x8_t test_vidupq_m_n_u16(uint16x8_t inactive, uint32_t a, mve_pred16_t p)
431 {
432 #ifdef POLYMORPHIC
433 return vidupq_m(inactive, a, 8, p);
434 #else /* POLYMORPHIC */
435 return vidupq_m_n_u16(inactive, a, 8, p);
436 #endif /* POLYMORPHIC */
437 }
438
439 // CHECK-LABEL: @test_vidupq_m_n_u32(
440 // CHECK-NEXT: entry:
441 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
442 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
443 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
444 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
445 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
446 //
test_vidupq_m_n_u32(uint32x4_t inactive,uint32_t a,mve_pred16_t p)447 uint32x4_t test_vidupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
448 {
449 #ifdef POLYMORPHIC
450 return vidupq_m(inactive, a, 2, p);
451 #else /* POLYMORPHIC */
452 return vidupq_m_n_u32(inactive, a, 2, p);
453 #endif /* POLYMORPHIC */
454 }
455
456 // CHECK-LABEL: @test_vddupq_m_n_u8(
457 // CHECK-NEXT: entry:
458 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
459 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
460 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
461 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
462 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
463 //
test_vddupq_m_n_u8(uint8x16_t inactive,uint32_t a,mve_pred16_t p)464 uint8x16_t test_vddupq_m_n_u8(uint8x16_t inactive, uint32_t a, mve_pred16_t p)
465 {
466 #ifdef POLYMORPHIC
467 return vddupq_m(inactive, a, 8, p);
468 #else /* POLYMORPHIC */
469 return vddupq_m_n_u8(inactive, a, 8, p);
470 #endif /* POLYMORPHIC */
471 }
472
473 // CHECK-LABEL: @test_vddupq_m_n_u16(
474 // CHECK-NEXT: entry:
475 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
476 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
477 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
478 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
479 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
480 //
test_vddupq_m_n_u16(uint16x8_t inactive,uint32_t a,mve_pred16_t p)481 uint16x8_t test_vddupq_m_n_u16(uint16x8_t inactive, uint32_t a, mve_pred16_t p)
482 {
483 #ifdef POLYMORPHIC
484 return vddupq_m(inactive, a, 2, p);
485 #else /* POLYMORPHIC */
486 return vddupq_m_n_u16(inactive, a, 2, p);
487 #endif /* POLYMORPHIC */
488 }
489
490 // CHECK-LABEL: @test_vddupq_m_n_u32(
491 // CHECK-NEXT: entry:
492 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
493 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
494 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
495 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
496 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
497 //
test_vddupq_m_n_u32(uint32x4_t inactive,uint32_t a,mve_pred16_t p)498 uint32x4_t test_vddupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
499 {
500 #ifdef POLYMORPHIC
501 return vddupq_m(inactive, a, 8, p);
502 #else /* POLYMORPHIC */
503 return vddupq_m_n_u32(inactive, a, 8, p);
504 #endif /* POLYMORPHIC */
505 }
506
507 // CHECK-LABEL: @test_viwdupq_m_n_u8(
508 // CHECK-NEXT: entry:
509 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
510 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
511 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP1]])
512 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
513 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
514 //
test_viwdupq_m_n_u8(uint8x16_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)515 uint8x16_t test_viwdupq_m_n_u8(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
516 {
517 #ifdef POLYMORPHIC
518 return viwdupq_m(inactive, a, b, 8, p);
519 #else /* POLYMORPHIC */
520 return viwdupq_m_n_u8(inactive, a, b, 8, p);
521 #endif /* POLYMORPHIC */
522 }
523
524 // CHECK-LABEL: @test_viwdupq_m_n_u16(
525 // CHECK-NEXT: entry:
526 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
527 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
528 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP1]])
529 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
530 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
531 //
test_viwdupq_m_n_u16(uint16x8_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)532 uint16x8_t test_viwdupq_m_n_u16(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
533 {
534 #ifdef POLYMORPHIC
535 return viwdupq_m(inactive, a, b, 8, p);
536 #else /* POLYMORPHIC */
537 return viwdupq_m_n_u16(inactive, a, b, 8, p);
538 #endif /* POLYMORPHIC */
539 }
540
541 // CHECK-LABEL: @test_viwdupq_m_n_u32(
542 // CHECK-NEXT: entry:
543 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
544 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
545 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
546 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
547 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
548 //
test_viwdupq_m_n_u32(uint32x4_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)549 uint32x4_t test_viwdupq_m_n_u32(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
550 {
551 #ifdef POLYMORPHIC
552 return viwdupq_m(inactive, a, b, 4, p);
553 #else /* POLYMORPHIC */
554 return viwdupq_m_n_u32(inactive, a, b, 4, p);
555 #endif /* POLYMORPHIC */
556 }
557
558 // CHECK-LABEL: @test_vdwdupq_m_n_u8(
559 // CHECK-NEXT: entry:
560 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
561 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
562 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP1]])
563 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
564 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
565 //
test_vdwdupq_m_n_u8(uint8x16_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)566 uint8x16_t test_vdwdupq_m_n_u8(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
567 {
568 #ifdef POLYMORPHIC
569 return vdwdupq_m(inactive, a, b, 1, p);
570 #else /* POLYMORPHIC */
571 return vdwdupq_m_n_u8(inactive, a, b, 1, p);
572 #endif /* POLYMORPHIC */
573 }
574
575 // CHECK-LABEL: @test_vdwdupq_m_n_u16(
576 // CHECK-NEXT: entry:
577 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
578 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
579 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
580 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
581 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
582 //
test_vdwdupq_m_n_u16(uint16x8_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)583 uint16x8_t test_vdwdupq_m_n_u16(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
584 {
585 #ifdef POLYMORPHIC
586 return vdwdupq_m(inactive, a, b, 2, p);
587 #else /* POLYMORPHIC */
588 return vdwdupq_m_n_u16(inactive, a, b, 2, p);
589 #endif /* POLYMORPHIC */
590 }
591
592 // CHECK-LABEL: @test_vdwdupq_m_n_u32(
593 // CHECK-NEXT: entry:
594 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
595 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
596 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
597 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
598 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
599 //
test_vdwdupq_m_n_u32(uint32x4_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)600 uint32x4_t test_vdwdupq_m_n_u32(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
601 {
602 #ifdef POLYMORPHIC
603 return vdwdupq_m(inactive, a, b, 4, p);
604 #else /* POLYMORPHIC */
605 return vdwdupq_m_n_u32(inactive, a, b, 4, p);
606 #endif /* POLYMORPHIC */
607 }
608
609 // CHECK-LABEL: @test_vidupq_m_wb_u8(
610 // CHECK-NEXT: entry:
611 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
612 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
613 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
614 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <16 x i1> [[TMP2]])
615 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
616 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
617 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
618 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
619 //
test_vidupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,mve_pred16_t p)620 uint8x16_t test_vidupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, mve_pred16_t p)
621 {
622 #ifdef POLYMORPHIC
623 return vidupq_m(inactive, a, 8, p);
624 #else /* POLYMORPHIC */
625 return vidupq_m_wb_u8(inactive, a, 8, p);
626 #endif /* POLYMORPHIC */
627 }
628
629 // CHECK-LABEL: @test_vidupq_m_wb_u16(
630 // CHECK-NEXT: entry:
631 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
632 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
633 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
634 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 2, <8 x i1> [[TMP2]])
635 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
636 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
637 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
638 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
639 //
test_vidupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,mve_pred16_t p)640 uint16x8_t test_vidupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
641 {
642 #ifdef POLYMORPHIC
643 return vidupq_m(inactive, a, 2, p);
644 #else /* POLYMORPHIC */
645 return vidupq_m_wb_u16(inactive, a, 2, p);
646 #endif /* POLYMORPHIC */
647 }
648
649 // CHECK-LABEL: @test_vidupq_m_wb_u32(
650 // CHECK-NEXT: entry:
651 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
652 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
653 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
654 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <4 x i1> [[TMP2]])
655 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
656 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
657 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
658 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
659 //
test_vidupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,mve_pred16_t p)660 uint32x4_t test_vidupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
661 {
662 #ifdef POLYMORPHIC
663 return vidupq_m(inactive, a, 8, p);
664 #else /* POLYMORPHIC */
665 return vidupq_m_wb_u32(inactive, a, 8, p);
666 #endif /* POLYMORPHIC */
667 }
668
669 // CHECK-LABEL: @test_vddupq_m_wb_u8(
670 // CHECK-NEXT: entry:
671 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
672 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
673 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
674 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
675 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
676 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
677 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
678 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
679 //
test_vddupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,mve_pred16_t p)680 uint8x16_t test_vddupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, mve_pred16_t p)
681 {
682 #ifdef POLYMORPHIC
683 return vddupq_m(inactive, a, 1, p);
684 #else /* POLYMORPHIC */
685 return vddupq_m_wb_u8(inactive, a, 1, p);
686 #endif /* POLYMORPHIC */
687 }
688
689 // CHECK-LABEL: @test_vddupq_m_wb_u16(
690 // CHECK-NEXT: entry:
691 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
692 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
693 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
694 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <8 x i1> [[TMP2]])
695 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
696 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
697 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
698 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
699 //
test_vddupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,mve_pred16_t p)700 uint16x8_t test_vddupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
701 {
702 #ifdef POLYMORPHIC
703 return vddupq_m(inactive, a, 1, p);
704 #else /* POLYMORPHIC */
705 return vddupq_m_wb_u16(inactive, a, 1, p);
706 #endif /* POLYMORPHIC */
707 }
708
709 // CHECK-LABEL: @test_vddupq_m_wb_u32(
710 // CHECK-NEXT: entry:
711 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
712 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
713 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
714 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
715 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
716 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
717 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
718 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
719 //
test_vddupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,mve_pred16_t p)720 uint32x4_t test_vddupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
721 {
722 #ifdef POLYMORPHIC
723 return vddupq_m(inactive, a, 4, p);
724 #else /* POLYMORPHIC */
725 return vddupq_m_wb_u32(inactive, a, 4, p);
726 #endif /* POLYMORPHIC */
727 }
728
729 // CHECK-LABEL: @test_viwdupq_m_wb_u8(
730 // CHECK-NEXT: entry:
731 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
732 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
733 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
734 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP2]])
735 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
736 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
737 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
738 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
739 //
test_viwdupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)740 uint8x16_t test_viwdupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
741 {
742 #ifdef POLYMORPHIC
743 return viwdupq_m(inactive, a, b, 8, p);
744 #else /* POLYMORPHIC */
745 return viwdupq_m_wb_u8(inactive, a, b, 8, p);
746 #endif /* POLYMORPHIC */
747 }
748
749 // CHECK-LABEL: @test_viwdupq_m_wb_u16(
750 // CHECK-NEXT: entry:
751 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
752 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
753 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
754 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP2]])
755 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
756 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
757 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
758 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
759 //
test_viwdupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)760 uint16x8_t test_viwdupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
761 {
762 #ifdef POLYMORPHIC
763 return viwdupq_m(inactive, a, b, 8, p);
764 #else /* POLYMORPHIC */
765 return viwdupq_m_wb_u16(inactive, a, b, 8, p);
766 #endif /* POLYMORPHIC */
767 }
768
769 // CHECK-LABEL: @test_viwdupq_m_wb_u32(
770 // CHECK-NEXT: entry:
771 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
772 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
773 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
774 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
775 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
776 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
777 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
778 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
779 //
test_viwdupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)780 uint32x4_t test_viwdupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
781 {
782 #ifdef POLYMORPHIC
783 return viwdupq_m(inactive, a, b, 4, p);
784 #else /* POLYMORPHIC */
785 return viwdupq_m_wb_u32(inactive, a, b, 4, p);
786 #endif /* POLYMORPHIC */
787 }
788
789 // CHECK-LABEL: @test_vdwdupq_m_wb_u8(
790 // CHECK-NEXT: entry:
791 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
792 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
793 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
794 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
795 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
796 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
797 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
798 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
799 //
test_vdwdupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)800 uint8x16_t test_vdwdupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
801 {
802 #ifdef POLYMORPHIC
803 return vdwdupq_m(inactive, a, b, 1, p);
804 #else /* POLYMORPHIC */
805 return vdwdupq_m_wb_u8(inactive, a, b, 1, p);
806 #endif /* POLYMORPHIC */
807 }
808
809 // CHECK-LABEL: @test_vdwdupq_m_wb_u16(
810 // CHECK-NEXT: entry:
811 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
812 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
813 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
814 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
815 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
816 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
817 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
818 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
819 //
test_vdwdupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)820 uint16x8_t test_vdwdupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
821 {
822 #ifdef POLYMORPHIC
823 return vdwdupq_m(inactive, a, b, 4, p);
824 #else /* POLYMORPHIC */
825 return vdwdupq_m_wb_u16(inactive, a, b, 4, p);
826 #endif /* POLYMORPHIC */
827 }
828
829 // CHECK-LABEL: @test_vdwdupq_m_wb_u32(
830 // CHECK-NEXT: entry:
831 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
832 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
833 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
834 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
835 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
836 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
837 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
838 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
839 //
test_vdwdupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)840 uint32x4_t test_vdwdupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
841 {
842 #ifdef POLYMORPHIC
843 return vdwdupq_m(inactive, a, b, 4, p);
844 #else /* POLYMORPHIC */
845 return vdwdupq_m_wb_u32(inactive, a, b, 4, p);
846 #endif /* POLYMORPHIC */
847 }
848
849 // CHECK-LABEL: @test_vidupq_x_n_u8(
850 // CHECK-NEXT: entry:
851 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
852 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
853 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 2, <16 x i1> [[TMP1]])
854 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
855 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
856 //
test_vidupq_x_n_u8(uint32_t a,mve_pred16_t p)857 uint8x16_t test_vidupq_x_n_u8(uint32_t a, mve_pred16_t p)
858 {
859 #ifdef POLYMORPHIC
860 return vidupq_x_u8(a, 2, p);
861 #else /* POLYMORPHIC */
862 return vidupq_x_n_u8(a, 2, p);
863 #endif /* POLYMORPHIC */
864 }
865
866 // CHECK-LABEL: @test_vidupq_x_n_u16(
867 // CHECK-NEXT: entry:
868 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
869 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
870 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
871 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
872 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
873 //
test_vidupq_x_n_u16(uint32_t a,mve_pred16_t p)874 uint16x8_t test_vidupq_x_n_u16(uint32_t a, mve_pred16_t p)
875 {
876 #ifdef POLYMORPHIC
877 return vidupq_x_u16(a, 2, p);
878 #else /* POLYMORPHIC */
879 return vidupq_x_n_u16(a, 2, p);
880 #endif /* POLYMORPHIC */
881 }
882
883 // CHECK-LABEL: @test_vidupq_x_n_u32(
884 // CHECK-NEXT: entry:
885 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
886 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
887 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
888 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
889 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
890 //
test_vidupq_x_n_u32(uint32_t a,mve_pred16_t p)891 uint32x4_t test_vidupq_x_n_u32(uint32_t a, mve_pred16_t p)
892 {
893 #ifdef POLYMORPHIC
894 return vidupq_x_u32(a, 8, p);
895 #else /* POLYMORPHIC */
896 return vidupq_x_n_u32(a, 8, p);
897 #endif /* POLYMORPHIC */
898 }
899
900 // CHECK-LABEL: @test_vddupq_x_n_u8(
901 // CHECK-NEXT: entry:
902 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
903 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
904 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
905 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
906 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
907 //
test_vddupq_x_n_u8(uint32_t a,mve_pred16_t p)908 uint8x16_t test_vddupq_x_n_u8(uint32_t a, mve_pred16_t p)
909 {
910 #ifdef POLYMORPHIC
911 return vddupq_x_u8(a, 8, p);
912 #else /* POLYMORPHIC */
913 return vddupq_x_n_u8(a, 8, p);
914 #endif /* POLYMORPHIC */
915 }
916
917 // CHECK-LABEL: @test_vddupq_x_n_u16(
918 // CHECK-NEXT: entry:
919 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
920 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
921 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 4, <8 x i1> [[TMP1]])
922 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
923 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
924 //
test_vddupq_x_n_u16(uint32_t a,mve_pred16_t p)925 uint16x8_t test_vddupq_x_n_u16(uint32_t a, mve_pred16_t p)
926 {
927 #ifdef POLYMORPHIC
928 return vddupq_x_u16(a, 4, p);
929 #else /* POLYMORPHIC */
930 return vddupq_x_n_u16(a, 4, p);
931 #endif /* POLYMORPHIC */
932 }
933
934 // CHECK-LABEL: @test_vddupq_x_n_u32(
935 // CHECK-NEXT: entry:
936 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
937 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
938 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
939 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
940 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
941 //
test_vddupq_x_n_u32(uint32_t a,mve_pred16_t p)942 uint32x4_t test_vddupq_x_n_u32(uint32_t a, mve_pred16_t p)
943 {
944 #ifdef POLYMORPHIC
945 return vddupq_x_u32(a, 2, p);
946 #else /* POLYMORPHIC */
947 return vddupq_x_n_u32(a, 2, p);
948 #endif /* POLYMORPHIC */
949 }
950
951 // CHECK-LABEL: @test_viwdupq_x_n_u8(
952 // CHECK-NEXT: entry:
953 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
954 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
955 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
956 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
957 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
958 //
test_viwdupq_x_n_u8(uint32_t a,uint32_t b,mve_pred16_t p)959 uint8x16_t test_viwdupq_x_n_u8(uint32_t a, uint32_t b, mve_pred16_t p)
960 {
961 #ifdef POLYMORPHIC
962 return viwdupq_x_u8(a, b, 2, p);
963 #else /* POLYMORPHIC */
964 return viwdupq_x_n_u8(a, b, 2, p);
965 #endif /* POLYMORPHIC */
966 }
967
968 // CHECK-LABEL: @test_viwdupq_x_n_u16(
969 // CHECK-NEXT: entry:
970 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
971 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
972 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP1]])
973 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
974 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
975 //
test_viwdupq_x_n_u16(uint32_t a,uint32_t b,mve_pred16_t p)976 uint16x8_t test_viwdupq_x_n_u16(uint32_t a, uint32_t b, mve_pred16_t p)
977 {
978 #ifdef POLYMORPHIC
979 return viwdupq_x_u16(a, b, 4, p);
980 #else /* POLYMORPHIC */
981 return viwdupq_x_n_u16(a, b, 4, p);
982 #endif /* POLYMORPHIC */
983 }
984
985 // CHECK-LABEL: @test_viwdupq_x_n_u32(
986 // CHECK-NEXT: entry:
987 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
988 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
989 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <4 x i1> [[TMP1]])
990 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
991 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
992 //
test_viwdupq_x_n_u32(uint32_t a,uint32_t b,mve_pred16_t p)993 uint32x4_t test_viwdupq_x_n_u32(uint32_t a, uint32_t b, mve_pred16_t p)
994 {
995 #ifdef POLYMORPHIC
996 return viwdupq_x_u32(a, b, 2, p);
997 #else /* POLYMORPHIC */
998 return viwdupq_x_n_u32(a, b, 2, p);
999 #endif /* POLYMORPHIC */
1000 }
1001
1002 // CHECK-LABEL: @test_vdwdupq_x_n_u8(
1003 // CHECK-NEXT: entry:
1004 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1005 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
1006 // CHECK-NEXT: [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
1007 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
1008 // CHECK-NEXT: ret <16 x i8> [[TMP3]]
1009 //
test_vdwdupq_x_n_u8(uint32_t a,uint32_t b,mve_pred16_t p)1010 uint8x16_t test_vdwdupq_x_n_u8(uint32_t a, uint32_t b, mve_pred16_t p)
1011 {
1012 #ifdef POLYMORPHIC
1013 return vdwdupq_x_u8(a, b, 2, p);
1014 #else /* POLYMORPHIC */
1015 return vdwdupq_x_n_u8(a, b, 2, p);
1016 #endif /* POLYMORPHIC */
1017 }
1018
1019 // CHECK-LABEL: @test_vdwdupq_x_n_u16(
1020 // CHECK-NEXT: entry:
1021 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1022 // CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1023 // CHECK-NEXT: [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
1024 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
1025 // CHECK-NEXT: ret <8 x i16> [[TMP3]]
1026 //
test_vdwdupq_x_n_u16(uint32_t a,uint32_t b,mve_pred16_t p)1027 uint16x8_t test_vdwdupq_x_n_u16(uint32_t a, uint32_t b, mve_pred16_t p)
1028 {
1029 #ifdef POLYMORPHIC
1030 return vdwdupq_x_u16(a, b, 2, p);
1031 #else /* POLYMORPHIC */
1032 return vdwdupq_x_n_u16(a, b, 2, p);
1033 #endif /* POLYMORPHIC */
1034 }
1035
1036 // CHECK-LABEL: @test_vdwdupq_x_n_u32(
1037 // CHECK-NEXT: entry:
1038 // CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1039 // CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1040 // CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <4 x i1> [[TMP1]])
1041 // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
1042 // CHECK-NEXT: ret <4 x i32> [[TMP3]]
1043 //
test_vdwdupq_x_n_u32(uint32_t a,uint32_t b,mve_pred16_t p)1044 uint32x4_t test_vdwdupq_x_n_u32(uint32_t a, uint32_t b, mve_pred16_t p)
1045 {
1046 #ifdef POLYMORPHIC
1047 return vdwdupq_x_u32(a, b, 8, p);
1048 #else /* POLYMORPHIC */
1049 return vdwdupq_x_n_u32(a, b, 8, p);
1050 #endif /* POLYMORPHIC */
1051 }
1052
1053 // CHECK-LABEL: @test_vidupq_x_wb_u8(
1054 // CHECK-NEXT: entry:
1055 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1056 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1057 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1058 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 2, <16 x i1> [[TMP2]])
1059 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1060 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1061 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1062 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1063 //
test_vidupq_x_wb_u8(uint32_t * a,mve_pred16_t p)1064 uint8x16_t test_vidupq_x_wb_u8(uint32_t *a, mve_pred16_t p)
1065 {
1066 #ifdef POLYMORPHIC
1067 return vidupq_x_u8(a, 2, p);
1068 #else /* POLYMORPHIC */
1069 return vidupq_x_wb_u8(a, 2, p);
1070 #endif /* POLYMORPHIC */
1071 }
1072
1073 // CHECK-LABEL: @test_vidupq_x_wb_u16(
1074 // CHECK-NEXT: entry:
1075 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1076 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1077 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1078 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1079 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1080 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1081 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1082 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1083 //
test_vidupq_x_wb_u16(uint32_t * a,mve_pred16_t p)1084 uint16x8_t test_vidupq_x_wb_u16(uint32_t *a, mve_pred16_t p)
1085 {
1086 #ifdef POLYMORPHIC
1087 return vidupq_x_u16(a, 4, p);
1088 #else /* POLYMORPHIC */
1089 return vidupq_x_wb_u16(a, 4, p);
1090 #endif /* POLYMORPHIC */
1091 }
1092
1093 // CHECK-LABEL: @test_vidupq_x_wb_u32(
1094 // CHECK-NEXT: entry:
1095 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1096 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1097 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1098 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 2, <4 x i1> [[TMP2]])
1099 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1100 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1101 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1102 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1103 //
test_vidupq_x_wb_u32(uint32_t * a,mve_pred16_t p)1104 uint32x4_t test_vidupq_x_wb_u32(uint32_t *a, mve_pred16_t p)
1105 {
1106 #ifdef POLYMORPHIC
1107 return vidupq_x_u32(a, 2, p);
1108 #else /* POLYMORPHIC */
1109 return vidupq_x_wb_u32(a, 2, p);
1110 #endif /* POLYMORPHIC */
1111 }
1112
1113 // CHECK-LABEL: @test_vddupq_x_wb_u8(
1114 // CHECK-NEXT: entry:
1115 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1116 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1117 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1118 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
1119 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1120 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1121 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1122 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1123 //
test_vddupq_x_wb_u8(uint32_t * a,mve_pred16_t p)1124 uint8x16_t test_vddupq_x_wb_u8(uint32_t *a, mve_pred16_t p)
1125 {
1126 #ifdef POLYMORPHIC
1127 return vddupq_x_u8(a, 1, p);
1128 #else /* POLYMORPHIC */
1129 return vddupq_x_wb_u8(a, 1, p);
1130 #endif /* POLYMORPHIC */
1131 }
1132
1133 // CHECK-LABEL: @test_vddupq_x_wb_u16(
1134 // CHECK-NEXT: entry:
1135 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1136 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1137 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1138 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1139 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1140 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1141 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1142 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1143 //
test_vddupq_x_wb_u16(uint32_t * a,mve_pred16_t p)1144 uint16x8_t test_vddupq_x_wb_u16(uint32_t *a, mve_pred16_t p)
1145 {
1146 #ifdef POLYMORPHIC
1147 return vddupq_x_u16(a, 4, p);
1148 #else /* POLYMORPHIC */
1149 return vddupq_x_wb_u16(a, 4, p);
1150 #endif /* POLYMORPHIC */
1151 }
1152
1153 // CHECK-LABEL: @test_vddupq_x_wb_u32(
1154 // CHECK-NEXT: entry:
1155 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1156 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1157 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1158 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1159 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1160 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1161 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1162 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1163 //
test_vddupq_x_wb_u32(uint32_t * a,mve_pred16_t p)1164 uint32x4_t test_vddupq_x_wb_u32(uint32_t *a, mve_pred16_t p)
1165 {
1166 #ifdef POLYMORPHIC
1167 return vddupq_x_u32(a, 4, p);
1168 #else /* POLYMORPHIC */
1169 return vddupq_x_wb_u32(a, 4, p);
1170 #endif /* POLYMORPHIC */
1171 }
1172
1173 // CHECK-LABEL: @test_viwdupq_x_wb_u8(
1174 // CHECK-NEXT: entry:
1175 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1176 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1177 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1178 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
1179 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1180 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1181 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1182 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1183 //
test_viwdupq_x_wb_u8(uint32_t * a,uint32_t b,mve_pred16_t p)1184 uint8x16_t test_viwdupq_x_wb_u8(uint32_t *a, uint32_t b, mve_pred16_t p)
1185 {
1186 #ifdef POLYMORPHIC
1187 return viwdupq_x_u8(a, b, 1, p);
1188 #else /* POLYMORPHIC */
1189 return viwdupq_x_wb_u8(a, b, 1, p);
1190 #endif /* POLYMORPHIC */
1191 }
1192
1193 // CHECK-LABEL: @test_viwdupq_x_wb_u16(
1194 // CHECK-NEXT: entry:
1195 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1196 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1197 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1198 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP2]])
1199 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1200 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1201 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1202 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1203 //
test_viwdupq_x_wb_u16(uint32_t * a,uint32_t b,mve_pred16_t p)1204 uint16x8_t test_viwdupq_x_wb_u16(uint32_t *a, uint32_t b, mve_pred16_t p)
1205 {
1206 #ifdef POLYMORPHIC
1207 return viwdupq_x_u16(a, b, 2, p);
1208 #else /* POLYMORPHIC */
1209 return viwdupq_x_wb_u16(a, b, 2, p);
1210 #endif /* POLYMORPHIC */
1211 }
1212
1213 // CHECK-LABEL: @test_viwdupq_x_wb_u32(
1214 // CHECK-NEXT: entry:
1215 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1216 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1217 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1218 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <4 x i1> [[TMP2]])
1219 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1220 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1221 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1222 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1223 //
test_viwdupq_x_wb_u32(uint32_t * a,uint32_t b,mve_pred16_t p)1224 uint32x4_t test_viwdupq_x_wb_u32(uint32_t *a, uint32_t b, mve_pred16_t p)
1225 {
1226 #ifdef POLYMORPHIC
1227 return viwdupq_x_u32(a, b, 1, p);
1228 #else /* POLYMORPHIC */
1229 return viwdupq_x_wb_u32(a, b, 1, p);
1230 #endif /* POLYMORPHIC */
1231 }
1232
1233 // CHECK-LABEL: @test_vdwdupq_x_wb_u8(
1234 // CHECK-NEXT: entry:
1235 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1236 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1237 // CHECK-NEXT: [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1238 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <16 x i1> [[TMP2]])
1239 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1240 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1241 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1242 // CHECK-NEXT: ret <16 x i8> [[TMP5]]
1243 //
test_vdwdupq_x_wb_u8(uint32_t * a,uint32_t b,mve_pred16_t p)1244 uint8x16_t test_vdwdupq_x_wb_u8(uint32_t *a, uint32_t b, mve_pred16_t p)
1245 {
1246 #ifdef POLYMORPHIC
1247 return vdwdupq_x_u8(a, b, 4, p);
1248 #else /* POLYMORPHIC */
1249 return vdwdupq_x_wb_u8(a, b, 4, p);
1250 #endif /* POLYMORPHIC */
1251 }
1252
1253 // CHECK-LABEL: @test_vdwdupq_x_wb_u16(
1254 // CHECK-NEXT: entry:
1255 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1256 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1257 // CHECK-NEXT: [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1258 // CHECK-NEXT: [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
1259 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1260 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1261 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1262 // CHECK-NEXT: ret <8 x i16> [[TMP5]]
1263 //
test_vdwdupq_x_wb_u16(uint32_t * a,uint32_t b,mve_pred16_t p)1264 uint16x8_t test_vdwdupq_x_wb_u16(uint32_t *a, uint32_t b, mve_pred16_t p)
1265 {
1266 #ifdef POLYMORPHIC
1267 return vdwdupq_x_u16(a, b, 4, p);
1268 #else /* POLYMORPHIC */
1269 return vdwdupq_x_wb_u16(a, b, 4, p);
1270 #endif /* POLYMORPHIC */
1271 }
1272
1273 // CHECK-LABEL: @test_vdwdupq_x_wb_u32(
1274 // CHECK-NEXT: entry:
1275 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1276 // CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1277 // CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1278 // CHECK-NEXT: [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
1279 // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1280 // CHECK-NEXT: store i32 [[TMP4]], ptr [[A]], align 4
1281 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1282 // CHECK-NEXT: ret <4 x i32> [[TMP5]]
1283 //
test_vdwdupq_x_wb_u32(uint32_t * a,uint32_t b,mve_pred16_t p)1284 uint32x4_t test_vdwdupq_x_wb_u32(uint32_t *a, uint32_t b, mve_pred16_t p)
1285 {
1286 #ifdef POLYMORPHIC
1287 return vdwdupq_x_u32(a, b, 4, p);
1288 #else /* POLYMORPHIC */
1289 return vdwdupq_x_wb_u32(a, b, 4, p);
1290 #endif /* POLYMORPHIC */
1291 }
1292