xref: /llvm-project/clang/test/CodeGen/arm-mve-intrinsics/idup.c (revision c5de4dd1eab00df76c1a68c5f397304ceacb71f2)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
4 
5 // REQUIRES: aarch64-registered-target || arm-registered-target
6 
7 #include <arm_mve.h>
8 
9 // CHECK-LABEL: @test_vidupq_n_u8(
10 // CHECK-NEXT:  entry:
11 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[A:%.*]], i32 4)
12 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
13 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
14 //
test_vidupq_n_u8(uint32_t a)15 uint8x16_t test_vidupq_n_u8(uint32_t a)
16 {
17 #ifdef POLYMORPHIC
18     return vidupq_u8(a, 4);
19 #else /* POLYMORPHIC */
20     return vidupq_n_u8(a, 4);
21 #endif /* POLYMORPHIC */
22 }
23 
24 // CHECK-LABEL: @test_vidupq_n_u16(
25 // CHECK-NEXT:  entry:
26 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[A:%.*]], i32 1)
27 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
28 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
29 //
test_vidupq_n_u16(uint32_t a)30 uint16x8_t test_vidupq_n_u16(uint32_t a)
31 {
32 #ifdef POLYMORPHIC
33     return vidupq_u16(a, 1);
34 #else /* POLYMORPHIC */
35     return vidupq_n_u16(a, 1);
36 #endif /* POLYMORPHIC */
37 }
38 
39 // CHECK-LABEL: @test_vidupq_n_u32(
40 // CHECK-NEXT:  entry:
41 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[A:%.*]], i32 4)
42 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
43 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
44 //
test_vidupq_n_u32(uint32_t a)45 uint32x4_t test_vidupq_n_u32(uint32_t a)
46 {
47 #ifdef POLYMORPHIC
48     return vidupq_u32(a, 4);
49 #else /* POLYMORPHIC */
50     return vidupq_n_u32(a, 4);
51 #endif /* POLYMORPHIC */
52 }
53 
54 // CHECK-LABEL: @test_vddupq_n_u8(
55 // CHECK-NEXT:  entry:
56 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[A:%.*]], i32 2)
57 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
58 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
59 //
test_vddupq_n_u8(uint32_t a)60 uint8x16_t test_vddupq_n_u8(uint32_t a)
61 {
62 #ifdef POLYMORPHIC
63     return vddupq_u8(a, 2);
64 #else /* POLYMORPHIC */
65     return vddupq_n_u8(a, 2);
66 #endif /* POLYMORPHIC */
67 }
68 
69 // CHECK-LABEL: @test_vddupq_n_u16(
70 // CHECK-NEXT:  entry:
71 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[A:%.*]], i32 4)
72 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
73 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
74 //
test_vddupq_n_u16(uint32_t a)75 uint16x8_t test_vddupq_n_u16(uint32_t a)
76 {
77 #ifdef POLYMORPHIC
78     return vddupq_u16(a, 4);
79 #else /* POLYMORPHIC */
80     return vddupq_n_u16(a, 4);
81 #endif /* POLYMORPHIC */
82 }
83 
84 // CHECK-LABEL: @test_vddupq_n_u32(
85 // CHECK-NEXT:  entry:
86 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[A:%.*]], i32 2)
87 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
88 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
89 //
test_vddupq_n_u32(uint32_t a)90 uint32x4_t test_vddupq_n_u32(uint32_t a)
91 {
92 #ifdef POLYMORPHIC
93     return vddupq_u32(a, 2);
94 #else /* POLYMORPHIC */
95     return vddupq_n_u32(a, 2);
96 #endif /* POLYMORPHIC */
97 }
98 
99 // CHECK-LABEL: @test_viwdupq_n_u8(
100 // CHECK-NEXT:  entry:
101 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
102 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
103 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
104 //
test_viwdupq_n_u8(uint32_t a,uint32_t b)105 uint8x16_t test_viwdupq_n_u8(uint32_t a, uint32_t b)
106 {
107 #ifdef POLYMORPHIC
108     return viwdupq_u8(a, b, 4);
109 #else /* POLYMORPHIC */
110     return viwdupq_n_u8(a, b, 4);
111 #endif /* POLYMORPHIC */
112 }
113 
114 // CHECK-LABEL: @test_viwdupq_n_u16(
115 // CHECK-NEXT:  entry:
116 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 2)
117 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
118 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
119 //
test_viwdupq_n_u16(uint32_t a,uint32_t b)120 uint16x8_t test_viwdupq_n_u16(uint32_t a, uint32_t b)
121 {
122 #ifdef POLYMORPHIC
123     return viwdupq_u16(a, b, 2);
124 #else /* POLYMORPHIC */
125     return viwdupq_n_u16(a, b, 2);
126 #endif /* POLYMORPHIC */
127 }
128 
129 // CHECK-LABEL: @test_viwdupq_n_u32(
130 // CHECK-NEXT:  entry:
131 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
132 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
133 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
134 //
test_viwdupq_n_u32(uint32_t a,uint32_t b)135 uint32x4_t test_viwdupq_n_u32(uint32_t a, uint32_t b)
136 {
137 #ifdef POLYMORPHIC
138     return viwdupq_u32(a, b, 8);
139 #else /* POLYMORPHIC */
140     return viwdupq_n_u32(a, b, 8);
141 #endif /* POLYMORPHIC */
142 }
143 
144 // CHECK-LABEL: @test_vdwdupq_n_u8(
145 // CHECK-NEXT:  entry:
146 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
147 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
148 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
149 //
test_vdwdupq_n_u8(uint32_t a,uint32_t b)150 uint8x16_t test_vdwdupq_n_u8(uint32_t a, uint32_t b)
151 {
152 #ifdef POLYMORPHIC
153     return vdwdupq_u8(a, b, 4);
154 #else /* POLYMORPHIC */
155     return vdwdupq_n_u8(a, b, 4);
156 #endif /* POLYMORPHIC */
157 }
158 
159 // CHECK-LABEL: @test_vdwdupq_n_u16(
160 // CHECK-NEXT:  entry:
161 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
162 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
163 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
164 //
test_vdwdupq_n_u16(uint32_t a,uint32_t b)165 uint16x8_t test_vdwdupq_n_u16(uint32_t a, uint32_t b)
166 {
167 #ifdef POLYMORPHIC
168     return vdwdupq_u16(a, b, 8);
169 #else /* POLYMORPHIC */
170     return vdwdupq_n_u16(a, b, 8);
171 #endif /* POLYMORPHIC */
172 }
173 
174 // CHECK-LABEL: @test_vdwdupq_n_u32(
175 // CHECK-NEXT:  entry:
176 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 1)
177 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
178 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
179 //
test_vdwdupq_n_u32(uint32_t a,uint32_t b)180 uint32x4_t test_vdwdupq_n_u32(uint32_t a, uint32_t b)
181 {
182 #ifdef POLYMORPHIC
183     return vdwdupq_u32(a, b, 1);
184 #else /* POLYMORPHIC */
185     return vdwdupq_n_u32(a, b, 1);
186 #endif /* POLYMORPHIC */
187 }
188 
189 // CHECK-LABEL: @test_vidupq_wb_u8(
190 // CHECK-NEXT:  entry:
191 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
192 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[TMP0]], i32 8)
193 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
194 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
195 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
196 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
197 //
test_vidupq_wb_u8(uint32_t * a)198 uint8x16_t test_vidupq_wb_u8(uint32_t *a)
199 {
200 #ifdef POLYMORPHIC
201     return vidupq_u8(a, 8);
202 #else /* POLYMORPHIC */
203     return vidupq_wb_u8(a, 8);
204 #endif /* POLYMORPHIC */
205 }
206 
207 // CHECK-LABEL: @test_vidupq_wb_u16(
208 // CHECK-NEXT:  entry:
209 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
210 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[TMP0]], i32 1)
211 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
212 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
213 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
214 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
215 //
test_vidupq_wb_u16(uint32_t * a)216 uint16x8_t test_vidupq_wb_u16(uint32_t *a)
217 {
218 #ifdef POLYMORPHIC
219     return vidupq_u16(a, 1);
220 #else /* POLYMORPHIC */
221     return vidupq_wb_u16(a, 1);
222 #endif /* POLYMORPHIC */
223 }
224 
225 // CHECK-LABEL: @test_vidupq_wb_u32(
226 // CHECK-NEXT:  entry:
227 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
228 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[TMP0]], i32 4)
229 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
230 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
231 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
232 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
233 //
test_vidupq_wb_u32(uint32_t * a)234 uint32x4_t test_vidupq_wb_u32(uint32_t *a)
235 {
236 #ifdef POLYMORPHIC
237     return vidupq_u32(a, 4);
238 #else /* POLYMORPHIC */
239     return vidupq_wb_u32(a, 4);
240 #endif /* POLYMORPHIC */
241 }
242 
243 // CHECK-LABEL: @test_vddupq_wb_u8(
244 // CHECK-NEXT:  entry:
245 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
246 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[TMP0]], i32 2)
247 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
248 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
249 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
250 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
251 //
test_vddupq_wb_u8(uint32_t * a)252 uint8x16_t test_vddupq_wb_u8(uint32_t *a)
253 {
254 #ifdef POLYMORPHIC
255     return vddupq_u8(a, 2);
256 #else /* POLYMORPHIC */
257     return vddupq_wb_u8(a, 2);
258 #endif /* POLYMORPHIC */
259 }
260 
261 // CHECK-LABEL: @test_vddupq_wb_u16(
262 // CHECK-NEXT:  entry:
263 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
264 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[TMP0]], i32 8)
265 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
266 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
267 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
268 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
269 //
test_vddupq_wb_u16(uint32_t * a)270 uint16x8_t test_vddupq_wb_u16(uint32_t *a)
271 {
272 #ifdef POLYMORPHIC
273     return vddupq_u16(a, 8);
274 #else /* POLYMORPHIC */
275     return vddupq_wb_u16(a, 8);
276 #endif /* POLYMORPHIC */
277 }
278 
279 // CHECK-LABEL: @test_vddupq_wb_u32(
280 // CHECK-NEXT:  entry:
281 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
282 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[TMP0]], i32 2)
283 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
284 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
285 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
286 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
287 //
test_vddupq_wb_u32(uint32_t * a)288 uint32x4_t test_vddupq_wb_u32(uint32_t *a)
289 {
290 #ifdef POLYMORPHIC
291     return vddupq_u32(a, 2);
292 #else /* POLYMORPHIC */
293     return vddupq_wb_u32(a, 2);
294 #endif /* POLYMORPHIC */
295 }
296 
297 // CHECK-LABEL: @test_vdwdupq_wb_u8(
298 // CHECK-NEXT:  entry:
299 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
300 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
301 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
302 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
303 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
304 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
305 //
test_vdwdupq_wb_u8(uint32_t * a,uint32_t b)306 uint8x16_t test_vdwdupq_wb_u8(uint32_t *a, uint32_t b)
307 {
308 #ifdef POLYMORPHIC
309     return vdwdupq_u8(a, b, 4);
310 #else /* POLYMORPHIC */
311     return vdwdupq_wb_u8(a, b, 4);
312 #endif /* POLYMORPHIC */
313 }
314 
315 // CHECK-LABEL: @test_vdwdupq_wb_u16(
316 // CHECK-NEXT:  entry:
317 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
318 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
319 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
320 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
321 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
322 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
323 //
test_vdwdupq_wb_u16(uint32_t * a,uint32_t b)324 uint16x8_t test_vdwdupq_wb_u16(uint32_t *a, uint32_t b)
325 {
326 #ifdef POLYMORPHIC
327     return vdwdupq_u16(a, b, 4);
328 #else /* POLYMORPHIC */
329     return vdwdupq_wb_u16(a, b, 4);
330 #endif /* POLYMORPHIC */
331 }
332 
333 // CHECK-LABEL: @test_viwdupq_wb_u8(
334 // CHECK-NEXT:  entry:
335 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
336 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
337 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
338 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
339 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
340 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
341 //
test_viwdupq_wb_u8(uint32_t * a,uint32_t b)342 uint8x16_t test_viwdupq_wb_u8(uint32_t *a, uint32_t b)
343 {
344 #ifdef POLYMORPHIC
345     return viwdupq_u8(a, b, 1);
346 #else /* POLYMORPHIC */
347     return viwdupq_wb_u8(a, b, 1);
348 #endif /* POLYMORPHIC */
349 }
350 
351 // CHECK-LABEL: @test_viwdupq_wb_u16(
352 // CHECK-NEXT:  entry:
353 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
354 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
355 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
356 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
357 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
358 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
359 //
test_viwdupq_wb_u16(uint32_t * a,uint32_t b)360 uint16x8_t test_viwdupq_wb_u16(uint32_t *a, uint32_t b)
361 {
362 #ifdef POLYMORPHIC
363     return viwdupq_u16(a, b, 1);
364 #else /* POLYMORPHIC */
365     return viwdupq_wb_u16(a, b, 1);
366 #endif /* POLYMORPHIC */
367 }
368 
369 // CHECK-LABEL: @test_viwdupq_wb_u32(
370 // CHECK-NEXT:  entry:
371 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
372 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 8)
373 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
374 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
375 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
376 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
377 //
test_viwdupq_wb_u32(uint32_t * a,uint32_t b)378 uint32x4_t test_viwdupq_wb_u32(uint32_t *a, uint32_t b)
379 {
380 #ifdef POLYMORPHIC
381     return viwdupq_u32(a, b, 8);
382 #else /* POLYMORPHIC */
383     return viwdupq_wb_u32(a, b, 8);
384 #endif /* POLYMORPHIC */
385 }
386 
387 // CHECK-LABEL: @test_vdwdupq_wb_u32(
388 // CHECK-NEXT:  entry:
389 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
390 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 2)
391 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
392 // CHECK-NEXT:    store i32 [[TMP2]], ptr [[A]], align 4
393 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
394 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
395 //
test_vdwdupq_wb_u32(uint32_t * a,uint32_t b)396 uint32x4_t test_vdwdupq_wb_u32(uint32_t *a, uint32_t b)
397 {
398 #ifdef POLYMORPHIC
399     return vdwdupq_u32(a, b, 2);
400 #else /* POLYMORPHIC */
401     return vdwdupq_wb_u32(a, b, 2);
402 #endif /* POLYMORPHIC */
403 }
404 
405 // CHECK-LABEL: @test_vidupq_m_n_u8(
406 // CHECK-NEXT:  entry:
407 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
408 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
409 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
410 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
411 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
412 //
test_vidupq_m_n_u8(uint8x16_t inactive,uint32_t a,mve_pred16_t p)413 uint8x16_t test_vidupq_m_n_u8(uint8x16_t inactive, uint32_t a, mve_pred16_t p)
414 {
415 #ifdef POLYMORPHIC
416     return vidupq_m(inactive, a, 8, p);
417 #else /* POLYMORPHIC */
418     return vidupq_m_n_u8(inactive, a, 8, p);
419 #endif /* POLYMORPHIC */
420 }
421 
422 // CHECK-LABEL: @test_vidupq_m_n_u16(
423 // CHECK-NEXT:  entry:
424 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
425 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
426 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <8 x i1> [[TMP1]])
427 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
428 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
429 //
test_vidupq_m_n_u16(uint16x8_t inactive,uint32_t a,mve_pred16_t p)430 uint16x8_t test_vidupq_m_n_u16(uint16x8_t inactive, uint32_t a, mve_pred16_t p)
431 {
432 #ifdef POLYMORPHIC
433     return vidupq_m(inactive, a, 8, p);
434 #else /* POLYMORPHIC */
435     return vidupq_m_n_u16(inactive, a, 8, p);
436 #endif /* POLYMORPHIC */
437 }
438 
439 // CHECK-LABEL: @test_vidupq_m_n_u32(
440 // CHECK-NEXT:  entry:
441 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
442 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
443 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
444 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
445 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
446 //
test_vidupq_m_n_u32(uint32x4_t inactive,uint32_t a,mve_pred16_t p)447 uint32x4_t test_vidupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
448 {
449 #ifdef POLYMORPHIC
450     return vidupq_m(inactive, a, 2, p);
451 #else /* POLYMORPHIC */
452     return vidupq_m_n_u32(inactive, a, 2, p);
453 #endif /* POLYMORPHIC */
454 }
455 
456 // CHECK-LABEL: @test_vddupq_m_n_u8(
457 // CHECK-NEXT:  entry:
458 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
459 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
460 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
461 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
462 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
463 //
test_vddupq_m_n_u8(uint8x16_t inactive,uint32_t a,mve_pred16_t p)464 uint8x16_t test_vddupq_m_n_u8(uint8x16_t inactive, uint32_t a, mve_pred16_t p)
465 {
466 #ifdef POLYMORPHIC
467     return vddupq_m(inactive, a, 8, p);
468 #else /* POLYMORPHIC */
469     return vddupq_m_n_u8(inactive, a, 8, p);
470 #endif /* POLYMORPHIC */
471 }
472 
473 // CHECK-LABEL: @test_vddupq_m_n_u16(
474 // CHECK-NEXT:  entry:
475 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
476 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
477 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
478 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
479 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
480 //
test_vddupq_m_n_u16(uint16x8_t inactive,uint32_t a,mve_pred16_t p)481 uint16x8_t test_vddupq_m_n_u16(uint16x8_t inactive, uint32_t a, mve_pred16_t p)
482 {
483 #ifdef POLYMORPHIC
484     return vddupq_m(inactive, a, 2, p);
485 #else /* POLYMORPHIC */
486     return vddupq_m_n_u16(inactive, a, 2, p);
487 #endif /* POLYMORPHIC */
488 }
489 
490 // CHECK-LABEL: @test_vddupq_m_n_u32(
491 // CHECK-NEXT:  entry:
492 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
493 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
494 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
495 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
496 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
497 //
test_vddupq_m_n_u32(uint32x4_t inactive,uint32_t a,mve_pred16_t p)498 uint32x4_t test_vddupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
499 {
500 #ifdef POLYMORPHIC
501     return vddupq_m(inactive, a, 8, p);
502 #else /* POLYMORPHIC */
503     return vddupq_m_n_u32(inactive, a, 8, p);
504 #endif /* POLYMORPHIC */
505 }
506 
507 // CHECK-LABEL: @test_viwdupq_m_n_u8(
508 // CHECK-NEXT:  entry:
509 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
510 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
511 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP1]])
512 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
513 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
514 //
test_viwdupq_m_n_u8(uint8x16_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)515 uint8x16_t test_viwdupq_m_n_u8(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
516 {
517 #ifdef POLYMORPHIC
518     return viwdupq_m(inactive, a, b, 8, p);
519 #else /* POLYMORPHIC */
520     return viwdupq_m_n_u8(inactive, a, b, 8, p);
521 #endif /* POLYMORPHIC */
522 }
523 
524 // CHECK-LABEL: @test_viwdupq_m_n_u16(
525 // CHECK-NEXT:  entry:
526 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
527 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
528 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP1]])
529 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
530 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
531 //
test_viwdupq_m_n_u16(uint16x8_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)532 uint16x8_t test_viwdupq_m_n_u16(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
533 {
534 #ifdef POLYMORPHIC
535     return viwdupq_m(inactive, a, b, 8, p);
536 #else /* POLYMORPHIC */
537     return viwdupq_m_n_u16(inactive, a, b, 8, p);
538 #endif /* POLYMORPHIC */
539 }
540 
541 // CHECK-LABEL: @test_viwdupq_m_n_u32(
542 // CHECK-NEXT:  entry:
543 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
544 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
545 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
546 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
547 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
548 //
test_viwdupq_m_n_u32(uint32x4_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)549 uint32x4_t test_viwdupq_m_n_u32(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
550 {
551 #ifdef POLYMORPHIC
552     return viwdupq_m(inactive, a, b, 4, p);
553 #else /* POLYMORPHIC */
554     return viwdupq_m_n_u32(inactive, a, b, 4, p);
555 #endif /* POLYMORPHIC */
556 }
557 
558 // CHECK-LABEL: @test_vdwdupq_m_n_u8(
559 // CHECK-NEXT:  entry:
560 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
561 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
562 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP1]])
563 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
564 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
565 //
test_vdwdupq_m_n_u8(uint8x16_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)566 uint8x16_t test_vdwdupq_m_n_u8(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
567 {
568 #ifdef POLYMORPHIC
569     return vdwdupq_m(inactive, a, b, 1, p);
570 #else /* POLYMORPHIC */
571     return vdwdupq_m_n_u8(inactive, a, b, 1, p);
572 #endif /* POLYMORPHIC */
573 }
574 
575 // CHECK-LABEL: @test_vdwdupq_m_n_u16(
576 // CHECK-NEXT:  entry:
577 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
578 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
579 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
580 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
581 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
582 //
test_vdwdupq_m_n_u16(uint16x8_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)583 uint16x8_t test_vdwdupq_m_n_u16(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
584 {
585 #ifdef POLYMORPHIC
586     return vdwdupq_m(inactive, a, b, 2, p);
587 #else /* POLYMORPHIC */
588     return vdwdupq_m_n_u16(inactive, a, b, 2, p);
589 #endif /* POLYMORPHIC */
590 }
591 
592 // CHECK-LABEL: @test_vdwdupq_m_n_u32(
593 // CHECK-NEXT:  entry:
594 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
595 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
596 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
597 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
598 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
599 //
test_vdwdupq_m_n_u32(uint32x4_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)600 uint32x4_t test_vdwdupq_m_n_u32(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
601 {
602 #ifdef POLYMORPHIC
603     return vdwdupq_m(inactive, a, b, 4, p);
604 #else /* POLYMORPHIC */
605     return vdwdupq_m_n_u32(inactive, a, b, 4, p);
606 #endif /* POLYMORPHIC */
607 }
608 
609 // CHECK-LABEL: @test_vidupq_m_wb_u8(
610 // CHECK-NEXT:  entry:
611 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
612 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
613 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
614 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <16 x i1> [[TMP2]])
615 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
616 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
617 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
618 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
619 //
test_vidupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,mve_pred16_t p)620 uint8x16_t test_vidupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, mve_pred16_t p)
621 {
622 #ifdef POLYMORPHIC
623     return vidupq_m(inactive, a, 8, p);
624 #else /* POLYMORPHIC */
625     return vidupq_m_wb_u8(inactive, a, 8, p);
626 #endif /* POLYMORPHIC */
627 }
628 
629 // CHECK-LABEL: @test_vidupq_m_wb_u16(
630 // CHECK-NEXT:  entry:
631 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
632 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
633 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
634 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 2, <8 x i1> [[TMP2]])
635 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
636 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
637 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
638 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
639 //
test_vidupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,mve_pred16_t p)640 uint16x8_t test_vidupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
641 {
642 #ifdef POLYMORPHIC
643     return vidupq_m(inactive, a, 2, p);
644 #else /* POLYMORPHIC */
645     return vidupq_m_wb_u16(inactive, a, 2, p);
646 #endif /* POLYMORPHIC */
647 }
648 
649 // CHECK-LABEL: @test_vidupq_m_wb_u32(
650 // CHECK-NEXT:  entry:
651 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
652 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
653 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
654 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <4 x i1> [[TMP2]])
655 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
656 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
657 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
658 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
659 //
test_vidupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,mve_pred16_t p)660 uint32x4_t test_vidupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
661 {
662 #ifdef POLYMORPHIC
663     return vidupq_m(inactive, a, 8, p);
664 #else /* POLYMORPHIC */
665     return vidupq_m_wb_u32(inactive, a, 8, p);
666 #endif /* POLYMORPHIC */
667 }
668 
669 // CHECK-LABEL: @test_vddupq_m_wb_u8(
670 // CHECK-NEXT:  entry:
671 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
672 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
673 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
674 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
675 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
676 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
677 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
678 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
679 //
test_vddupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,mve_pred16_t p)680 uint8x16_t test_vddupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, mve_pred16_t p)
681 {
682 #ifdef POLYMORPHIC
683     return vddupq_m(inactive, a, 1, p);
684 #else /* POLYMORPHIC */
685     return vddupq_m_wb_u8(inactive, a, 1, p);
686 #endif /* POLYMORPHIC */
687 }
688 
689 // CHECK-LABEL: @test_vddupq_m_wb_u16(
690 // CHECK-NEXT:  entry:
691 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
692 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
693 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
694 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <8 x i1> [[TMP2]])
695 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
696 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
697 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
698 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
699 //
test_vddupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,mve_pred16_t p)700 uint16x8_t test_vddupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
701 {
702 #ifdef POLYMORPHIC
703     return vddupq_m(inactive, a, 1, p);
704 #else /* POLYMORPHIC */
705     return vddupq_m_wb_u16(inactive, a, 1, p);
706 #endif /* POLYMORPHIC */
707 }
708 
709 // CHECK-LABEL: @test_vddupq_m_wb_u32(
710 // CHECK-NEXT:  entry:
711 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
712 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
713 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
714 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
715 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
716 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
717 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
718 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
719 //
test_vddupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,mve_pred16_t p)720 uint32x4_t test_vddupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
721 {
722 #ifdef POLYMORPHIC
723     return vddupq_m(inactive, a, 4, p);
724 #else /* POLYMORPHIC */
725     return vddupq_m_wb_u32(inactive, a, 4, p);
726 #endif /* POLYMORPHIC */
727 }
728 
729 // CHECK-LABEL: @test_viwdupq_m_wb_u8(
730 // CHECK-NEXT:  entry:
731 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
732 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
733 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
734 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP2]])
735 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
736 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
737 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
738 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
739 //
test_viwdupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)740 uint8x16_t test_viwdupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
741 {
742 #ifdef POLYMORPHIC
743     return viwdupq_m(inactive, a, b, 8, p);
744 #else /* POLYMORPHIC */
745     return viwdupq_m_wb_u8(inactive, a, b, 8, p);
746 #endif /* POLYMORPHIC */
747 }
748 
749 // CHECK-LABEL: @test_viwdupq_m_wb_u16(
750 // CHECK-NEXT:  entry:
751 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
752 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
753 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
754 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP2]])
755 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
756 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
757 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
758 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
759 //
test_viwdupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)760 uint16x8_t test_viwdupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
761 {
762 #ifdef POLYMORPHIC
763     return viwdupq_m(inactive, a, b, 8, p);
764 #else /* POLYMORPHIC */
765     return viwdupq_m_wb_u16(inactive, a, b, 8, p);
766 #endif /* POLYMORPHIC */
767 }
768 
769 // CHECK-LABEL: @test_viwdupq_m_wb_u32(
770 // CHECK-NEXT:  entry:
771 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
772 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
773 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
774 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
775 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
776 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
777 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
778 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
779 //
test_viwdupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)780 uint32x4_t test_viwdupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
781 {
782 #ifdef POLYMORPHIC
783     return viwdupq_m(inactive, a, b, 4, p);
784 #else /* POLYMORPHIC */
785     return viwdupq_m_wb_u32(inactive, a, b, 4, p);
786 #endif /* POLYMORPHIC */
787 }
788 
789 // CHECK-LABEL: @test_vdwdupq_m_wb_u8(
790 // CHECK-NEXT:  entry:
791 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
792 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
793 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
794 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
795 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
796 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
797 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
798 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
799 //
test_vdwdupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)800 uint8x16_t test_vdwdupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
801 {
802 #ifdef POLYMORPHIC
803     return vdwdupq_m(inactive, a, b, 1, p);
804 #else /* POLYMORPHIC */
805     return vdwdupq_m_wb_u8(inactive, a, b, 1, p);
806 #endif /* POLYMORPHIC */
807 }
808 
809 // CHECK-LABEL: @test_vdwdupq_m_wb_u16(
810 // CHECK-NEXT:  entry:
811 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
812 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
813 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
814 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
815 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
816 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
817 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
818 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
819 //
test_vdwdupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)820 uint16x8_t test_vdwdupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
821 {
822 #ifdef POLYMORPHIC
823     return vdwdupq_m(inactive, a, b, 4, p);
824 #else /* POLYMORPHIC */
825     return vdwdupq_m_wb_u16(inactive, a, b, 4, p);
826 #endif /* POLYMORPHIC */
827 }
828 
829 // CHECK-LABEL: @test_vdwdupq_m_wb_u32(
830 // CHECK-NEXT:  entry:
831 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
832 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
833 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
834 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
835 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
836 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
837 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
838 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
839 //
test_vdwdupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)840 uint32x4_t test_vdwdupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
841 {
842 #ifdef POLYMORPHIC
843     return vdwdupq_m(inactive, a, b, 4, p);
844 #else /* POLYMORPHIC */
845     return vdwdupq_m_wb_u32(inactive, a, b, 4, p);
846 #endif /* POLYMORPHIC */
847 }
848 
849 // CHECK-LABEL: @test_vidupq_x_n_u8(
850 // CHECK-NEXT:  entry:
851 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
852 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
853 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 2, <16 x i1> [[TMP1]])
854 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
855 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
856 //
test_vidupq_x_n_u8(uint32_t a,mve_pred16_t p)857 uint8x16_t test_vidupq_x_n_u8(uint32_t a, mve_pred16_t p)
858 {
859 #ifdef POLYMORPHIC
860     return vidupq_x_u8(a, 2, p);
861 #else /* POLYMORPHIC */
862     return vidupq_x_n_u8(a, 2, p);
863 #endif /* POLYMORPHIC */
864 }
865 
866 // CHECK-LABEL: @test_vidupq_x_n_u16(
867 // CHECK-NEXT:  entry:
868 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
869 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
870 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
871 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
872 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
873 //
test_vidupq_x_n_u16(uint32_t a,mve_pred16_t p)874 uint16x8_t test_vidupq_x_n_u16(uint32_t a, mve_pred16_t p)
875 {
876 #ifdef POLYMORPHIC
877     return vidupq_x_u16(a, 2, p);
878 #else /* POLYMORPHIC */
879     return vidupq_x_n_u16(a, 2, p);
880 #endif /* POLYMORPHIC */
881 }
882 
883 // CHECK-LABEL: @test_vidupq_x_n_u32(
884 // CHECK-NEXT:  entry:
885 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
886 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
887 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
888 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
889 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
890 //
test_vidupq_x_n_u32(uint32_t a,mve_pred16_t p)891 uint32x4_t test_vidupq_x_n_u32(uint32_t a, mve_pred16_t p)
892 {
893 #ifdef POLYMORPHIC
894     return vidupq_x_u32(a, 8, p);
895 #else /* POLYMORPHIC */
896     return vidupq_x_n_u32(a, 8, p);
897 #endif /* POLYMORPHIC */
898 }
899 
900 // CHECK-LABEL: @test_vddupq_x_n_u8(
901 // CHECK-NEXT:  entry:
902 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
903 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
904 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
905 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
906 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
907 //
test_vddupq_x_n_u8(uint32_t a,mve_pred16_t p)908 uint8x16_t test_vddupq_x_n_u8(uint32_t a, mve_pred16_t p)
909 {
910 #ifdef POLYMORPHIC
911     return vddupq_x_u8(a, 8, p);
912 #else /* POLYMORPHIC */
913     return vddupq_x_n_u8(a, 8, p);
914 #endif /* POLYMORPHIC */
915 }
916 
917 // CHECK-LABEL: @test_vddupq_x_n_u16(
918 // CHECK-NEXT:  entry:
919 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
920 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
921 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 4, <8 x i1> [[TMP1]])
922 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
923 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
924 //
test_vddupq_x_n_u16(uint32_t a,mve_pred16_t p)925 uint16x8_t test_vddupq_x_n_u16(uint32_t a, mve_pred16_t p)
926 {
927 #ifdef POLYMORPHIC
928     return vddupq_x_u16(a, 4, p);
929 #else /* POLYMORPHIC */
930     return vddupq_x_n_u16(a, 4, p);
931 #endif /* POLYMORPHIC */
932 }
933 
934 // CHECK-LABEL: @test_vddupq_x_n_u32(
935 // CHECK-NEXT:  entry:
936 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
937 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
938 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
939 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
940 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
941 //
test_vddupq_x_n_u32(uint32_t a,mve_pred16_t p)942 uint32x4_t test_vddupq_x_n_u32(uint32_t a, mve_pred16_t p)
943 {
944 #ifdef POLYMORPHIC
945     return vddupq_x_u32(a, 2, p);
946 #else /* POLYMORPHIC */
947     return vddupq_x_n_u32(a, 2, p);
948 #endif /* POLYMORPHIC */
949 }
950 
951 // CHECK-LABEL: @test_viwdupq_x_n_u8(
952 // CHECK-NEXT:  entry:
953 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
954 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
955 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
956 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
957 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
958 //
test_viwdupq_x_n_u8(uint32_t a,uint32_t b,mve_pred16_t p)959 uint8x16_t test_viwdupq_x_n_u8(uint32_t a, uint32_t b, mve_pred16_t p)
960 {
961 #ifdef POLYMORPHIC
962     return viwdupq_x_u8(a, b, 2, p);
963 #else /* POLYMORPHIC */
964     return viwdupq_x_n_u8(a, b, 2, p);
965 #endif /* POLYMORPHIC */
966 }
967 
968 // CHECK-LABEL: @test_viwdupq_x_n_u16(
969 // CHECK-NEXT:  entry:
970 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
971 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
972 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP1]])
973 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
974 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
975 //
test_viwdupq_x_n_u16(uint32_t a,uint32_t b,mve_pred16_t p)976 uint16x8_t test_viwdupq_x_n_u16(uint32_t a, uint32_t b, mve_pred16_t p)
977 {
978 #ifdef POLYMORPHIC
979     return viwdupq_x_u16(a, b, 4, p);
980 #else /* POLYMORPHIC */
981     return viwdupq_x_n_u16(a, b, 4, p);
982 #endif /* POLYMORPHIC */
983 }
984 
985 // CHECK-LABEL: @test_viwdupq_x_n_u32(
986 // CHECK-NEXT:  entry:
987 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
988 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
989 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <4 x i1> [[TMP1]])
990 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
991 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
992 //
test_viwdupq_x_n_u32(uint32_t a,uint32_t b,mve_pred16_t p)993 uint32x4_t test_viwdupq_x_n_u32(uint32_t a, uint32_t b, mve_pred16_t p)
994 {
995 #ifdef POLYMORPHIC
996     return viwdupq_x_u32(a, b, 2, p);
997 #else /* POLYMORPHIC */
998     return viwdupq_x_n_u32(a, b, 2, p);
999 #endif /* POLYMORPHIC */
1000 }
1001 
1002 // CHECK-LABEL: @test_vdwdupq_x_n_u8(
1003 // CHECK-NEXT:  entry:
1004 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1005 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
1006 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
1007 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
1008 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
1009 //
test_vdwdupq_x_n_u8(uint32_t a,uint32_t b,mve_pred16_t p)1010 uint8x16_t test_vdwdupq_x_n_u8(uint32_t a, uint32_t b, mve_pred16_t p)
1011 {
1012 #ifdef POLYMORPHIC
1013     return vdwdupq_x_u8(a, b, 2, p);
1014 #else /* POLYMORPHIC */
1015     return vdwdupq_x_n_u8(a, b, 2, p);
1016 #endif /* POLYMORPHIC */
1017 }
1018 
1019 // CHECK-LABEL: @test_vdwdupq_x_n_u16(
1020 // CHECK-NEXT:  entry:
1021 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1022 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1023 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
1024 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
1025 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
1026 //
test_vdwdupq_x_n_u16(uint32_t a,uint32_t b,mve_pred16_t p)1027 uint16x8_t test_vdwdupq_x_n_u16(uint32_t a, uint32_t b, mve_pred16_t p)
1028 {
1029 #ifdef POLYMORPHIC
1030     return vdwdupq_x_u16(a, b, 2, p);
1031 #else /* POLYMORPHIC */
1032     return vdwdupq_x_n_u16(a, b, 2, p);
1033 #endif /* POLYMORPHIC */
1034 }
1035 
1036 // CHECK-LABEL: @test_vdwdupq_x_n_u32(
1037 // CHECK-NEXT:  entry:
1038 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1039 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1040 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <4 x i1> [[TMP1]])
1041 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
1042 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1043 //
test_vdwdupq_x_n_u32(uint32_t a,uint32_t b,mve_pred16_t p)1044 uint32x4_t test_vdwdupq_x_n_u32(uint32_t a, uint32_t b, mve_pred16_t p)
1045 {
1046 #ifdef POLYMORPHIC
1047     return vdwdupq_x_u32(a, b, 8, p);
1048 #else /* POLYMORPHIC */
1049     return vdwdupq_x_n_u32(a, b, 8, p);
1050 #endif /* POLYMORPHIC */
1051 }
1052 
1053 // CHECK-LABEL: @test_vidupq_x_wb_u8(
1054 // CHECK-NEXT:  entry:
1055 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1056 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1057 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1058 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 2, <16 x i1> [[TMP2]])
1059 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1060 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1061 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1062 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1063 //
test_vidupq_x_wb_u8(uint32_t * a,mve_pred16_t p)1064 uint8x16_t test_vidupq_x_wb_u8(uint32_t *a, mve_pred16_t p)
1065 {
1066 #ifdef POLYMORPHIC
1067     return vidupq_x_u8(a, 2, p);
1068 #else /* POLYMORPHIC */
1069     return vidupq_x_wb_u8(a, 2, p);
1070 #endif /* POLYMORPHIC */
1071 }
1072 
1073 // CHECK-LABEL: @test_vidupq_x_wb_u16(
1074 // CHECK-NEXT:  entry:
1075 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1076 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1077 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1078 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1079 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1080 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1081 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1082 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1083 //
test_vidupq_x_wb_u16(uint32_t * a,mve_pred16_t p)1084 uint16x8_t test_vidupq_x_wb_u16(uint32_t *a, mve_pred16_t p)
1085 {
1086 #ifdef POLYMORPHIC
1087     return vidupq_x_u16(a, 4, p);
1088 #else /* POLYMORPHIC */
1089     return vidupq_x_wb_u16(a, 4, p);
1090 #endif /* POLYMORPHIC */
1091 }
1092 
1093 // CHECK-LABEL: @test_vidupq_x_wb_u32(
1094 // CHECK-NEXT:  entry:
1095 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1096 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1097 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1098 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 2, <4 x i1> [[TMP2]])
1099 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1100 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1101 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1102 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1103 //
test_vidupq_x_wb_u32(uint32_t * a,mve_pred16_t p)1104 uint32x4_t test_vidupq_x_wb_u32(uint32_t *a, mve_pred16_t p)
1105 {
1106 #ifdef POLYMORPHIC
1107     return vidupq_x_u32(a, 2, p);
1108 #else /* POLYMORPHIC */
1109     return vidupq_x_wb_u32(a, 2, p);
1110 #endif /* POLYMORPHIC */
1111 }
1112 
1113 // CHECK-LABEL: @test_vddupq_x_wb_u8(
1114 // CHECK-NEXT:  entry:
1115 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1116 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1117 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1118 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
1119 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1120 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1121 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1122 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1123 //
test_vddupq_x_wb_u8(uint32_t * a,mve_pred16_t p)1124 uint8x16_t test_vddupq_x_wb_u8(uint32_t *a, mve_pred16_t p)
1125 {
1126 #ifdef POLYMORPHIC
1127     return vddupq_x_u8(a, 1, p);
1128 #else /* POLYMORPHIC */
1129     return vddupq_x_wb_u8(a, 1, p);
1130 #endif /* POLYMORPHIC */
1131 }
1132 
1133 // CHECK-LABEL: @test_vddupq_x_wb_u16(
1134 // CHECK-NEXT:  entry:
1135 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1136 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1137 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1138 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1139 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1140 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1141 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1142 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1143 //
test_vddupq_x_wb_u16(uint32_t * a,mve_pred16_t p)1144 uint16x8_t test_vddupq_x_wb_u16(uint32_t *a, mve_pred16_t p)
1145 {
1146 #ifdef POLYMORPHIC
1147     return vddupq_x_u16(a, 4, p);
1148 #else /* POLYMORPHIC */
1149     return vddupq_x_wb_u16(a, 4, p);
1150 #endif /* POLYMORPHIC */
1151 }
1152 
1153 // CHECK-LABEL: @test_vddupq_x_wb_u32(
1154 // CHECK-NEXT:  entry:
1155 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1156 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1157 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1158 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1159 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1160 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1161 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1162 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1163 //
test_vddupq_x_wb_u32(uint32_t * a,mve_pred16_t p)1164 uint32x4_t test_vddupq_x_wb_u32(uint32_t *a, mve_pred16_t p)
1165 {
1166 #ifdef POLYMORPHIC
1167     return vddupq_x_u32(a, 4, p);
1168 #else /* POLYMORPHIC */
1169     return vddupq_x_wb_u32(a, 4, p);
1170 #endif /* POLYMORPHIC */
1171 }
1172 
1173 // CHECK-LABEL: @test_viwdupq_x_wb_u8(
1174 // CHECK-NEXT:  entry:
1175 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1176 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1177 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1178 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
1179 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1180 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1181 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1182 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1183 //
test_viwdupq_x_wb_u8(uint32_t * a,uint32_t b,mve_pred16_t p)1184 uint8x16_t test_viwdupq_x_wb_u8(uint32_t *a, uint32_t b, mve_pred16_t p)
1185 {
1186 #ifdef POLYMORPHIC
1187     return viwdupq_x_u8(a, b, 1, p);
1188 #else /* POLYMORPHIC */
1189     return viwdupq_x_wb_u8(a, b, 1, p);
1190 #endif /* POLYMORPHIC */
1191 }
1192 
1193 // CHECK-LABEL: @test_viwdupq_x_wb_u16(
1194 // CHECK-NEXT:  entry:
1195 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1196 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1197 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1198 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP2]])
1199 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1200 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1201 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1202 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1203 //
test_viwdupq_x_wb_u16(uint32_t * a,uint32_t b,mve_pred16_t p)1204 uint16x8_t test_viwdupq_x_wb_u16(uint32_t *a, uint32_t b, mve_pred16_t p)
1205 {
1206 #ifdef POLYMORPHIC
1207     return viwdupq_x_u16(a, b, 2, p);
1208 #else /* POLYMORPHIC */
1209     return viwdupq_x_wb_u16(a, b, 2, p);
1210 #endif /* POLYMORPHIC */
1211 }
1212 
1213 // CHECK-LABEL: @test_viwdupq_x_wb_u32(
1214 // CHECK-NEXT:  entry:
1215 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1216 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1217 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1218 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <4 x i1> [[TMP2]])
1219 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1220 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1221 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1222 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1223 //
test_viwdupq_x_wb_u32(uint32_t * a,uint32_t b,mve_pred16_t p)1224 uint32x4_t test_viwdupq_x_wb_u32(uint32_t *a, uint32_t b, mve_pred16_t p)
1225 {
1226 #ifdef POLYMORPHIC
1227     return viwdupq_x_u32(a, b, 1, p);
1228 #else /* POLYMORPHIC */
1229     return viwdupq_x_wb_u32(a, b, 1, p);
1230 #endif /* POLYMORPHIC */
1231 }
1232 
1233 // CHECK-LABEL: @test_vdwdupq_x_wb_u8(
1234 // CHECK-NEXT:  entry:
1235 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1236 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1237 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1238 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <16 x i1> [[TMP2]])
1239 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1240 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1241 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1242 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1243 //
test_vdwdupq_x_wb_u8(uint32_t * a,uint32_t b,mve_pred16_t p)1244 uint8x16_t test_vdwdupq_x_wb_u8(uint32_t *a, uint32_t b, mve_pred16_t p)
1245 {
1246 #ifdef POLYMORPHIC
1247     return vdwdupq_x_u8(a, b, 4, p);
1248 #else /* POLYMORPHIC */
1249     return vdwdupq_x_wb_u8(a, b, 4, p);
1250 #endif /* POLYMORPHIC */
1251 }
1252 
1253 // CHECK-LABEL: @test_vdwdupq_x_wb_u16(
1254 // CHECK-NEXT:  entry:
1255 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1256 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1257 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1258 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
1259 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1260 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1261 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1262 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1263 //
test_vdwdupq_x_wb_u16(uint32_t * a,uint32_t b,mve_pred16_t p)1264 uint16x8_t test_vdwdupq_x_wb_u16(uint32_t *a, uint32_t b, mve_pred16_t p)
1265 {
1266 #ifdef POLYMORPHIC
1267     return vdwdupq_x_u16(a, b, 4, p);
1268 #else /* POLYMORPHIC */
1269     return vdwdupq_x_wb_u16(a, b, 4, p);
1270 #endif /* POLYMORPHIC */
1271 }
1272 
1273 // CHECK-LABEL: @test_vdwdupq_x_wb_u32(
1274 // CHECK-NEXT:  entry:
1275 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4
1276 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1277 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1278 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
1279 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1280 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
1281 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1282 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1283 //
test_vdwdupq_x_wb_u32(uint32_t * a,uint32_t b,mve_pred16_t p)1284 uint32x4_t test_vdwdupq_x_wb_u32(uint32_t *a, uint32_t b, mve_pred16_t p)
1285 {
1286 #ifdef POLYMORPHIC
1287     return vdwdupq_x_u32(a, b, 4, p);
1288 #else /* POLYMORPHIC */
1289     return vdwdupq_x_wb_u32(a, b, 4, p);
1290 #endif /* POLYMORPHIC */
1291 }
1292