xref: /llvm-project/clang/test/CodeGen/AArch64/neon-perm.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
3 
4 // REQUIRES: aarch64-registered-target || arm-registered-target
5 
6 #include <arm_neon.h>
7 
8 // CHECK-LABEL: @test_vuzp1_s8(
9 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
10 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
11 int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
12   return vuzp1_s8(a, b);
13 }
14 
15 // CHECK-LABEL: @test_vuzp1q_s8(
16 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
17 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
18 int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
19   return vuzp1q_s8(a, b);
20 }
21 
22 // CHECK-LABEL: @test_vuzp1_s16(
23 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
24 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
25 int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
26   return vuzp1_s16(a, b);
27 }
28 
29 // CHECK-LABEL: @test_vuzp1q_s16(
30 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
31 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
32 int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
33   return vuzp1q_s16(a, b);
34 }
35 
36 // CHECK-LABEL: @test_vuzp1_s32(
37 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
38 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
39 int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
40   return vuzp1_s32(a, b);
41 }
42 
43 // CHECK-LABEL: @test_vuzp1q_s32(
44 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
45 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
46 int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
47   return vuzp1q_s32(a, b);
48 }
49 
50 // CHECK-LABEL: @test_vuzp1q_s64(
51 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
52 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
53 int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
54   return vuzp1q_s64(a, b);
55 }
56 
57 // CHECK-LABEL: @test_vuzp1_u8(
58 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
59 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
60 uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
61   return vuzp1_u8(a, b);
62 }
63 
64 // CHECK-LABEL: @test_vuzp1q_u8(
65 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
66 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
67 uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
68   return vuzp1q_u8(a, b);
69 }
70 
71 // CHECK-LABEL: @test_vuzp1_u16(
72 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
73 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
74 uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
75   return vuzp1_u16(a, b);
76 }
77 
78 // CHECK-LABEL: @test_vuzp1q_u16(
79 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
80 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
81 uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
82   return vuzp1q_u16(a, b);
83 }
84 
85 // CHECK-LABEL: @test_vuzp1_u32(
86 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
87 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
88 uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
89   return vuzp1_u32(a, b);
90 }
91 
92 // CHECK-LABEL: @test_vuzp1q_u32(
93 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
94 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
95 uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
96   return vuzp1q_u32(a, b);
97 }
98 
99 // CHECK-LABEL: @test_vuzp1q_u64(
100 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
101 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
102 uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
103   return vuzp1q_u64(a, b);
104 }
105 
106 // CHECK-LABEL: @test_vuzp1_f32(
107 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
108 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
109 float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
110   return vuzp1_f32(a, b);
111 }
112 
113 // CHECK-LABEL: @test_vuzp1q_f32(
114 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
115 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
116 float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
117   return vuzp1q_f32(a, b);
118 }
119 
120 // CHECK-LABEL: @test_vuzp1q_f64(
121 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
122 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
123 float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
124   return vuzp1q_f64(a, b);
125 }
126 
127 // CHECK-LABEL: @test_vuzp1_p8(
128 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
129 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
130 poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
131   return vuzp1_p8(a, b);
132 }
133 
134 // CHECK-LABEL: @test_vuzp1q_p8(
135 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
136 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
137 poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
138   return vuzp1q_p8(a, b);
139 }
140 
141 // CHECK-LABEL: @test_vuzp1_p16(
142 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
143 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
144 poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
145   return vuzp1_p16(a, b);
146 }
147 
148 // CHECK-LABEL: @test_vuzp1q_p16(
149 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
150 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
151 poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
152   return vuzp1q_p16(a, b);
153 }
154 
155 // CHECK-LABEL: @test_vuzp2_s8(
156 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
157 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
158 int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
159   return vuzp2_s8(a, b);
160 }
161 
162 // CHECK-LABEL: @test_vuzp2q_s8(
163 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
164 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
165 int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
166   return vuzp2q_s8(a, b);
167 }
168 
169 // CHECK-LABEL: @test_vuzp2_s16(
170 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
171 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
172 int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
173   return vuzp2_s16(a, b);
174 }
175 
176 // CHECK-LABEL: @test_vuzp2q_s16(
177 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
178 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
179 int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
180   return vuzp2q_s16(a, b);
181 }
182 
183 // CHECK-LABEL: @test_vuzp2_s32(
184 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
185 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
186 int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
187   return vuzp2_s32(a, b);
188 }
189 
190 // CHECK-LABEL: @test_vuzp2q_s32(
191 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
192 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
193 int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
194   return vuzp2q_s32(a, b);
195 }
196 
197 // CHECK-LABEL: @test_vuzp2q_s64(
198 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
199 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
200 int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
201   return vuzp2q_s64(a, b);
202 }
203 
204 // CHECK-LABEL: @test_vuzp2_u8(
205 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
206 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
207 uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
208   return vuzp2_u8(a, b);
209 }
210 
211 // CHECK-LABEL: @test_vuzp2q_u8(
212 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
213 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
214 uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
215   return vuzp2q_u8(a, b);
216 }
217 
218 // CHECK-LABEL: @test_vuzp2_u16(
219 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
220 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
221 uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
222   return vuzp2_u16(a, b);
223 }
224 
225 // CHECK-LABEL: @test_vuzp2q_u16(
226 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
227 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
228 uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
229   return vuzp2q_u16(a, b);
230 }
231 
232 // CHECK-LABEL: @test_vuzp2_u32(
233 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
234 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
235 uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
236   return vuzp2_u32(a, b);
237 }
238 
239 // CHECK-LABEL: @test_vuzp2q_u32(
240 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
241 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
242 uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
243   return vuzp2q_u32(a, b);
244 }
245 
246 // CHECK-LABEL: @test_vuzp2q_u64(
247 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
248 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
249 uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
250   return vuzp2q_u64(a, b);
251 }
252 
253 // CHECK-LABEL: @test_vuzp2_f32(
254 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
255 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
256 float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
257   return vuzp2_f32(a, b);
258 }
259 
260 // CHECK-LABEL: @test_vuzp2q_f32(
261 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
262 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
263 float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
264   return vuzp2q_f32(a, b);
265 }
266 
267 // CHECK-LABEL: @test_vuzp2q_f64(
268 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
269 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
270 float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
271   return vuzp2q_f64(a, b);
272 }
273 
274 // CHECK-LABEL: @test_vuzp2_p8(
275 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
276 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
277 poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
278   return vuzp2_p8(a, b);
279 }
280 
281 // CHECK-LABEL: @test_vuzp2q_p8(
282 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
283 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
284 poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
285   return vuzp2q_p8(a, b);
286 }
287 
288 // CHECK-LABEL: @test_vuzp2_p16(
289 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
290 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
291 poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
292   return vuzp2_p16(a, b);
293 }
294 
295 // CHECK-LABEL: @test_vuzp2q_p16(
296 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
297 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
298 poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
299   return vuzp2q_p16(a, b);
300 }
301 
302 // CHECK-LABEL: @test_vzip1_s8(
303 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
304 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
305 int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
306   return vzip1_s8(a, b);
307 }
308 
309 // CHECK-LABEL: @test_vzip1q_s8(
310 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
311 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
312 int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
313   return vzip1q_s8(a, b);
314 }
315 
316 // CHECK-LABEL: @test_vzip1_s16(
317 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
318 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
319 int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
320   return vzip1_s16(a, b);
321 }
322 
323 // CHECK-LABEL: @test_vzip1q_s16(
324 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
325 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
326 int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
327   return vzip1q_s16(a, b);
328 }
329 
330 // CHECK-LABEL: @test_vzip1_s32(
331 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
332 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
333 int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
334   return vzip1_s32(a, b);
335 }
336 
337 // CHECK-LABEL: @test_vzip1q_s32(
338 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
339 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
340 int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
341   return vzip1q_s32(a, b);
342 }
343 
344 // CHECK-LABEL: @test_vzip1q_s64(
345 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
346 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
347 int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
348   return vzip1q_s64(a, b);
349 }
350 
351 // CHECK-LABEL: @test_vzip1_u8(
352 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
353 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
354 uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
355   return vzip1_u8(a, b);
356 }
357 
358 // CHECK-LABEL: @test_vzip1q_u8(
359 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
360 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
361 uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
362   return vzip1q_u8(a, b);
363 }
364 
365 // CHECK-LABEL: @test_vzip1_u16(
366 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
367 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
368 uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
369   return vzip1_u16(a, b);
370 }
371 
372 // CHECK-LABEL: @test_vzip1q_u16(
373 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
374 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
375 uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
376   return vzip1q_u16(a, b);
377 }
378 
379 // CHECK-LABEL: @test_vzip1_u32(
380 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
381 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
382 uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
383   return vzip1_u32(a, b);
384 }
385 
386 // CHECK-LABEL: @test_vzip1q_u32(
387 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
388 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
389 uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
390   return vzip1q_u32(a, b);
391 }
392 
393 // CHECK-LABEL: @test_vzip1q_u64(
394 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
395 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
396 uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
397   return vzip1q_u64(a, b);
398 }
399 
400 // CHECK-LABEL: @test_vzip1_f32(
401 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
402 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
403 float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
404   return vzip1_f32(a, b);
405 }
406 
407 // CHECK-LABEL: @test_vzip1q_f32(
408 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
409 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
410 float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
411   return vzip1q_f32(a, b);
412 }
413 
414 // CHECK-LABEL: @test_vzip1q_f64(
415 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
416 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
417 float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
418   return vzip1q_f64(a, b);
419 }
420 
421 // CHECK-LABEL: @test_vzip1_p8(
422 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
423 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
424 poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
425   return vzip1_p8(a, b);
426 }
427 
428 // CHECK-LABEL: @test_vzip1q_p8(
429 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
430 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
431 poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
432   return vzip1q_p8(a, b);
433 }
434 
435 // CHECK-LABEL: @test_vzip1_p16(
436 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
437 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
438 poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
439   return vzip1_p16(a, b);
440 }
441 
442 // CHECK-LABEL: @test_vzip1q_p16(
443 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
444 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
445 poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
446   return vzip1q_p16(a, b);
447 }
448 
449 // CHECK-LABEL: @test_vzip2_s8(
450 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
451 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
452 int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
453   return vzip2_s8(a, b);
454 }
455 
456 // CHECK-LABEL: @test_vzip2q_s8(
457 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
458 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
459 int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
460   return vzip2q_s8(a, b);
461 }
462 
463 // CHECK-LABEL: @test_vzip2_s16(
464 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
465 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
466 int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
467   return vzip2_s16(a, b);
468 }
469 
470 // CHECK-LABEL: @test_vzip2q_s16(
471 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
472 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
473 int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
474   return vzip2q_s16(a, b);
475 }
476 
477 // CHECK-LABEL: @test_vzip2_s32(
478 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
479 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
480 int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
481   return vzip2_s32(a, b);
482 }
483 
484 // CHECK-LABEL: @test_vzip2q_s32(
485 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
486 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
487 int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
488   return vzip2q_s32(a, b);
489 }
490 
491 // CHECK-LABEL: @test_vzip2q_s64(
492 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
493 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
494 int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
495   return vzip2q_s64(a, b);
496 }
497 
498 // CHECK-LABEL: @test_vzip2_u8(
499 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
500 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
501 uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
502   return vzip2_u8(a, b);
503 }
504 
505 // CHECK-LABEL: @test_vzip2q_u8(
506 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
507 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
508 uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
509   return vzip2q_u8(a, b);
510 }
511 
512 // CHECK-LABEL: @test_vzip2_u16(
513 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
514 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
515 uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
516   return vzip2_u16(a, b);
517 }
518 
519 // CHECK-LABEL: @test_vzip2q_u16(
520 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
521 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
522 uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
523   return vzip2q_u16(a, b);
524 }
525 
526 // CHECK-LABEL: @test_vzip2_u32(
527 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
528 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
529 uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
530   return vzip2_u32(a, b);
531 }
532 
533 // CHECK-LABEL: @test_vzip2q_u32(
534 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
535 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
536 uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
537   return vzip2q_u32(a, b);
538 }
539 
540 // CHECK-LABEL: @test_vzip2q_u64(
541 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
542 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
543 uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
544   return vzip2q_u64(a, b);
545 }
546 
547 // CHECK-LABEL: @test_vzip2_f32(
548 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
549 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
550 float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
551   return vzip2_f32(a, b);
552 }
553 
554 // CHECK-LABEL: @test_vzip2q_f32(
555 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
556 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
557 float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
558   return vzip2q_f32(a, b);
559 }
560 
561 // CHECK-LABEL: @test_vzip2q_f64(
562 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
563 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
564 float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
565   return vzip2q_f64(a, b);
566 }
567 
568 // CHECK-LABEL: @test_vzip2_p8(
569 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
570 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
571 poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
572   return vzip2_p8(a, b);
573 }
574 
575 // CHECK-LABEL: @test_vzip2q_p8(
576 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
577 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
578 poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
579   return vzip2q_p8(a, b);
580 }
581 
582 // CHECK-LABEL: @test_vzip2_p16(
583 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
584 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
585 poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
586   return vzip2_p16(a, b);
587 }
588 
589 // CHECK-LABEL: @test_vzip2q_p16(
590 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
591 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
592 poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
593   return vzip2q_p16(a, b);
594 }
595 
596 // CHECK-LABEL: @test_vtrn1_s8(
597 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
598 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
599 int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
600   return vtrn1_s8(a, b);
601 }
602 
603 // CHECK-LABEL: @test_vtrn1q_s8(
604 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
605 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
606 int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
607   return vtrn1q_s8(a, b);
608 }
609 
610 // CHECK-LABEL: @test_vtrn1_s16(
611 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
612 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
613 int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
614   return vtrn1_s16(a, b);
615 }
616 
617 // CHECK-LABEL: @test_vtrn1q_s16(
618 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
619 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
620 int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
621   return vtrn1q_s16(a, b);
622 }
623 
624 // CHECK-LABEL: @test_vtrn1_s32(
625 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
626 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
627 int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
628   return vtrn1_s32(a, b);
629 }
630 
631 // CHECK-LABEL: @test_vtrn1q_s32(
632 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
633 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
634 int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
635   return vtrn1q_s32(a, b);
636 }
637 
638 // CHECK-LABEL: @test_vtrn1q_s64(
639 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
640 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
641 int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
642   return vtrn1q_s64(a, b);
643 }
644 
645 // CHECK-LABEL: @test_vtrn1_u8(
646 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
647 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
648 uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
649   return vtrn1_u8(a, b);
650 }
651 
652 // CHECK-LABEL: @test_vtrn1q_u8(
653 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
654 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
655 uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
656   return vtrn1q_u8(a, b);
657 }
658 
659 // CHECK-LABEL: @test_vtrn1_u16(
660 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
661 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
662 uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
663   return vtrn1_u16(a, b);
664 }
665 
666 // CHECK-LABEL: @test_vtrn1q_u16(
667 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
668 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
669 uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
670   return vtrn1q_u16(a, b);
671 }
672 
673 // CHECK-LABEL: @test_vtrn1_u32(
674 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
675 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
676 uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
677   return vtrn1_u32(a, b);
678 }
679 
680 // CHECK-LABEL: @test_vtrn1q_u32(
681 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
682 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
683 uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
684   return vtrn1q_u32(a, b);
685 }
686 
687 // CHECK-LABEL: @test_vtrn1q_u64(
688 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
689 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
690 uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
691   return vtrn1q_u64(a, b);
692 }
693 
694 // CHECK-LABEL: @test_vtrn1_f32(
695 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
696 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
697 float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
698   return vtrn1_f32(a, b);
699 }
700 
701 // CHECK-LABEL: @test_vtrn1q_f32(
702 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
703 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
704 float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
705   return vtrn1q_f32(a, b);
706 }
707 
708 // CHECK-LABEL: @test_vtrn1q_f64(
709 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
710 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
711 float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
712   return vtrn1q_f64(a, b);
713 }
714 
715 // CHECK-LABEL: @test_vtrn1_p8(
716 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
717 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
718 poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
719   return vtrn1_p8(a, b);
720 }
721 
722 // CHECK-LABEL: @test_vtrn1q_p8(
723 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
724 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
725 poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
726   return vtrn1q_p8(a, b);
727 }
728 
729 // CHECK-LABEL: @test_vtrn1_p16(
730 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
731 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
732 poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
733   return vtrn1_p16(a, b);
734 }
735 
736 // CHECK-LABEL: @test_vtrn1q_p16(
737 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
738 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
739 poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
740   return vtrn1q_p16(a, b);
741 }
742 
743 // CHECK-LABEL: @test_vtrn2_s8(
744 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
745 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
746 int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
747   return vtrn2_s8(a, b);
748 }
749 
750 // CHECK-LABEL: @test_vtrn2q_s8(
751 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
752 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
753 int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
754   return vtrn2q_s8(a, b);
755 }
756 
757 // CHECK-LABEL: @test_vtrn2_s16(
758 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
759 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
760 int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
761   return vtrn2_s16(a, b);
762 }
763 
764 // CHECK-LABEL: @test_vtrn2q_s16(
765 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
766 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
767 int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
768   return vtrn2q_s16(a, b);
769 }
770 
771 // CHECK-LABEL: @test_vtrn2_s32(
772 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
773 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
774 int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
775   return vtrn2_s32(a, b);
776 }
777 
778 // CHECK-LABEL: @test_vtrn2q_s32(
779 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
780 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
781 int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
782   return vtrn2q_s32(a, b);
783 }
784 
785 // CHECK-LABEL: @test_vtrn2q_s64(
786 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
787 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
788 int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
789   return vtrn2q_s64(a, b);
790 }
791 
792 // CHECK-LABEL: @test_vtrn2_u8(
793 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
794 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
795 uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
796   return vtrn2_u8(a, b);
797 }
798 
799 // CHECK-LABEL: @test_vtrn2q_u8(
800 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
801 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
802 uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
803   return vtrn2q_u8(a, b);
804 }
805 
806 // CHECK-LABEL: @test_vtrn2_u16(
807 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
808 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
809 uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
810   return vtrn2_u16(a, b);
811 }
812 
813 // CHECK-LABEL: @test_vtrn2q_u16(
814 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
815 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
816 uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
817   return vtrn2q_u16(a, b);
818 }
819 
820 // CHECK-LABEL: @test_vtrn2_u32(
821 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
822 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
823 uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
824   return vtrn2_u32(a, b);
825 }
826 
827 // CHECK-LABEL: @test_vtrn2q_u32(
828 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
829 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
830 uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
831   return vtrn2q_u32(a, b);
832 }
833 
834 // CHECK-LABEL: @test_vtrn2q_u64(
835 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
836 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
837 uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
838   return vtrn2q_u64(a, b);
839 }
840 
841 // CHECK-LABEL: @test_vtrn2_f32(
842 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
843 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
844 float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
845   return vtrn2_f32(a, b);
846 }
847 
848 // CHECK-LABEL: @test_vtrn2q_f32(
849 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
850 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
851 float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
852   return vtrn2q_f32(a, b);
853 }
854 
855 // CHECK-LABEL: @test_vtrn2q_f64(
856 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
857 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
858 float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
859   return vtrn2q_f64(a, b);
860 }
861 
862 // CHECK-LABEL: @test_vtrn2_p8(
863 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
864 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
865 poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
866   return vtrn2_p8(a, b);
867 }
868 
869 // CHECK-LABEL: @test_vtrn2q_p8(
870 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
871 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
872 poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
873   return vtrn2q_p8(a, b);
874 }
875 
876 // CHECK-LABEL: @test_vtrn2_p16(
877 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
878 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
879 poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
880   return vtrn2_p16(a, b);
881 }
882 
883 // CHECK-LABEL: @test_vtrn2q_p16(
884 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
885 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
886 poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
887   return vtrn2q_p16(a, b);
888 }
889 
890 // CHECK-LABEL: @test_vuzp_s8(
891 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
892 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
893 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
894 // CHECK:   store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
895 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
896 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
897 // CHECK:   store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
898 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
899 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
900 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
901 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
902 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
903 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
904 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
905   return vuzp_s8(a, b);
906 }
907 
908 // CHECK-LABEL: @test_vuzp_s16(
909 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
910 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
911 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
912 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
913 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
914 // CHECK:   store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
915 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
916 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
917 // CHECK:   store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
918 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
919 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
920 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
921 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
922 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
923 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
924 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
925   return vuzp_s16(a, b);
926 }
927 
928 // CHECK-LABEL: @test_vuzp_s32(
929 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
930 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
931 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
932 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
933 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
934 // CHECK:   store <2 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
935 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
936 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
937 // CHECK:   store <2 x i32> [[VUZP1_I]], ptr [[TMP4]]
938 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
939 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
940 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
941 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
942 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
943 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
944 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
945   return vuzp_s32(a, b);
946 }
947 
948 // CHECK-LABEL: @test_vuzp_u8(
949 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
950 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
951 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
952 // CHECK:   store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
953 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
954 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
955 // CHECK:   store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
956 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
957 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
958 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
959 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
960 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
961 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
962 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
963   return vuzp_u8(a, b);
964 }
965 
966 // CHECK-LABEL: @test_vuzp_u16(
967 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
968 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
969 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
970 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
971 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
972 // CHECK:   store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
973 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
974 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
975 // CHECK:   store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
976 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
977 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
978 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
979 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
980 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
981 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
982 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
983   return vuzp_u16(a, b);
984 }
985 
986 // CHECK-LABEL: @test_vuzp_u32(
987 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
988 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
989 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
990 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
991 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
992 // CHECK:   store <2 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
993 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
994 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
995 // CHECK:   store <2 x i32> [[VUZP1_I]], ptr [[TMP4]]
996 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
997 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
998 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
999 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1000 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
1001 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1002 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
1003   return vuzp_u32(a, b);
1004 }
1005 
1006 // CHECK-LABEL: @test_vuzp_f32(
1007 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1008 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1009 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1010 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1011 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1012 // CHECK:   store <2 x float> [[VUZP_I]], ptr [[RETVAL_I]]
1013 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
1014 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1015 // CHECK:   store <2 x float> [[VUZP1_I]], ptr [[TMP4]]
1016 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
1017 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1018 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1019 // CHECK:   store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
1020 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
1021 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1022 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
1023   return vuzp_f32(a, b);
1024 }
1025 
1026 // CHECK-LABEL: @test_vuzp_p8(
1027 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1028 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1029 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1030 // CHECK:   store <8 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1031 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1032 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1033 // CHECK:   store <8 x i8> [[VUZP1_I]], ptr [[TMP2]]
1034 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
1035 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1036 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1037 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1038 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
1039 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1040 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
1041   return vuzp_p8(a, b);
1042 }
1043 
1044 // CHECK-LABEL: @test_vuzp_p16(
1045 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1046 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1047 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1048 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1049 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1050 // CHECK:   store <4 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1051 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1052 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1053 // CHECK:   store <4 x i16> [[VUZP1_I]], ptr [[TMP4]]
1054 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
1055 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1056 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1057 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1058 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
1059 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1060 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
1061   return vuzp_p16(a, b);
1062 }
1063 
1064 // CHECK-LABEL: @test_vuzpq_s8(
1065 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1066 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1067 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1068 // CHECK:   store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1069 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1070 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1071 // CHECK:   store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
1072 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
1073 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1074 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1075 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1076 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
1077 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1078 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
1079   return vuzpq_s8(a, b);
1080 }
1081 
1082 // CHECK-LABEL: @test_vuzpq_s16(
1083 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1084 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1085 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1086 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1087 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1088 // CHECK:   store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1089 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1090 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1091 // CHECK:   store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
1092 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
1093 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1094 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1095 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1096 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
1097 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1098 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
1099   return vuzpq_s16(a, b);
1100 }
1101 
1102 // CHECK-LABEL: @test_vuzpq_s32(
1103 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1104 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1105 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1106 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1107 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1108 // CHECK:   store <4 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
1109 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1110 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1111 // CHECK:   store <4 x i32> [[VUZP1_I]], ptr [[TMP4]]
1112 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
1113 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1114 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1115 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1116 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
1117 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1118 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
1119   return vuzpq_s32(a, b);
1120 }
1121 
1122 // CHECK-LABEL: @test_vuzpq_u8(
1123 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1124 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1125 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1126 // CHECK:   store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1127 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1128 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1129 // CHECK:   store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
1130 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
1131 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1132 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1133 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1134 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
1135 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1136 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
1137   return vuzpq_u8(a, b);
1138 }
1139 
1140 // CHECK-LABEL: @test_vuzpq_u16(
1141 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1142 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1143 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1144 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1145 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1146 // CHECK:   store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1147 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1148 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1149 // CHECK:   store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
1150 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
1151 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1152 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1153 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1154 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
1155 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1156 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
1157   return vuzpq_u16(a, b);
1158 }
1159 
1160 // CHECK-LABEL: @test_vuzpq_u32(
1161 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1162 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1163 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1164 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1165 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1166 // CHECK:   store <4 x i32> [[VUZP_I]], ptr [[RETVAL_I]]
1167 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1168 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1169 // CHECK:   store <4 x i32> [[VUZP1_I]], ptr [[TMP4]]
1170 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
1171 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1172 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1173 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1174 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
1175 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1176 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
1177   return vuzpq_u32(a, b);
1178 }
1179 
1180 // CHECK-LABEL: @test_vuzpq_f32(
1181 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1182 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1183 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1184 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1185 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1186 // CHECK:   store <4 x float> [[VUZP_I]], ptr [[RETVAL_I]]
1187 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
1188 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1189 // CHECK:   store <4 x float> [[VUZP1_I]], ptr [[TMP4]]
1190 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
1191 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1192 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1193 // CHECK:   store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
1194 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
1195 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1196 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
1197   return vuzpq_f32(a, b);
1198 }
1199 
1200 // CHECK-LABEL: @test_vuzpq_p8(
1201 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1202 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1203 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1204 // CHECK:   store <16 x i8> [[VUZP_I]], ptr [[RETVAL_I]]
1205 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1206 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1207 // CHECK:   store <16 x i8> [[VUZP1_I]], ptr [[TMP2]]
1208 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
1209 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1210 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1211 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1212 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
1213 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1214 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
1215   return vuzpq_p8(a, b);
1216 }
1217 
1218 // CHECK-LABEL: @test_vuzpq_p16(
1219 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1220 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1221 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1222 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1223 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1224 // CHECK:   store <8 x i16> [[VUZP_I]], ptr [[RETVAL_I]]
1225 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1226 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1227 // CHECK:   store <8 x i16> [[VUZP1_I]], ptr [[TMP4]]
1228 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
1229 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1230 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1231 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1232 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
1233 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1234 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
1235   return vuzpq_p16(a, b);
1236 }
1237 
1238 // CHECK-LABEL: @test_vzip_s8(
1239 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1240 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1241 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1242 // CHECK:   store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1243 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1244 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1245 // CHECK:   store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
1246 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
1247 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1248 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1249 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1250 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
1251 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
1252 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
1253   return vzip_s8(a, b);
1254 }
1255 
1256 // CHECK-LABEL: @test_vzip_s16(
1257 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1258 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1259 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1260 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1261 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1262 // CHECK:   store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1263 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1264 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1265 // CHECK:   store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
1266 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
1267 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1268 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1269 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1270 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
1271 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
1272 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
1273   return vzip_s16(a, b);
1274 }
1275 
1276 // CHECK-LABEL: @test_vzip_s32(
1277 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1278 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1279 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1280 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1281 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1282 // CHECK:   store <2 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1283 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1284 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1285 // CHECK:   store <2 x i32> [[VZIP1_I]], ptr [[TMP4]]
1286 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
1287 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1288 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1289 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1290 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
1291 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
1292 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
1293   return vzip_s32(a, b);
1294 }
1295 
1296 // CHECK-LABEL: @test_vzip_u8(
1297 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1298 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1299 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1300 // CHECK:   store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1301 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1302 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1303 // CHECK:   store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
1304 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
1305 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1306 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1307 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1308 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
1309 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
1310 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
1311   return vzip_u8(a, b);
1312 }
1313 
1314 // CHECK-LABEL: @test_vzip_u16(
1315 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1316 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1317 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1318 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1319 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1320 // CHECK:   store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1321 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1322 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1323 // CHECK:   store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
1324 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
1325 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1326 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1327 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1328 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
1329 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1330 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
1331   return vzip_u16(a, b);
1332 }
1333 
1334 // CHECK-LABEL: @test_vzip_u32(
1335 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1336 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1337 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1338 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1339 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1340 // CHECK:   store <2 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1341 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1342 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1343 // CHECK:   store <2 x i32> [[VZIP1_I]], ptr [[TMP4]]
1344 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
1345 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1346 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1347 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1348 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
1349 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1350 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
1351   return vzip_u32(a, b);
1352 }
1353 
1354 // CHECK-LABEL: @test_vzip_f32(
1355 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1356 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1357 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1358 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1359 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1360 // CHECK:   store <2 x float> [[VZIP_I]], ptr [[RETVAL_I]]
1361 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
1362 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1363 // CHECK:   store <2 x float> [[VZIP1_I]], ptr [[TMP4]]
1364 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
1365 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1366 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1367 // CHECK:   store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
1368 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
1369 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1370 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
1371   return vzip_f32(a, b);
1372 }
1373 
1374 // CHECK-LABEL: @test_vzip_p8(
1375 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1376 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1377 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1378 // CHECK:   store <8 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1379 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1380 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1381 // CHECK:   store <8 x i8> [[VZIP1_I]], ptr [[TMP2]]
1382 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
1383 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1384 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1385 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1386 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
1387 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1388 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
1389   return vzip_p8(a, b);
1390 }
1391 
1392 // CHECK-LABEL: @test_vzip_p16(
1393 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1394 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1395 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1396 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1397 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1398 // CHECK:   store <4 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1399 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1400 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1401 // CHECK:   store <4 x i16> [[VZIP1_I]], ptr [[TMP4]]
1402 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
1403 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1404 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1405 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1406 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
1407 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1408 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
1409   return vzip_p16(a, b);
1410 }
1411 
1412 // CHECK-LABEL: @test_vzipq_s8(
1413 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1414 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1415 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1416 // CHECK:   store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1417 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1418 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1419 // CHECK:   store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
1420 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
1421 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1422 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1423 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1424 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
1425 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1426 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
1427   return vzipq_s8(a, b);
1428 }
1429 
1430 // CHECK-LABEL: @test_vzipq_s16(
1431 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1432 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1433 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1434 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1435 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1436 // CHECK:   store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1437 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1438 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1439 // CHECK:   store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
1440 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
1441 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1442 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1443 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1444 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
1445 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1446 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
1447   return vzipq_s16(a, b);
1448 }
1449 
1450 // CHECK-LABEL: @test_vzipq_s32(
1451 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1452 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1453 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1454 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1455 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1456 // CHECK:   store <4 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1457 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1458 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1459 // CHECK:   store <4 x i32> [[VZIP1_I]], ptr [[TMP4]]
1460 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
1461 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1462 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1463 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1464 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
1465 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1466 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
1467   return vzipq_s32(a, b);
1468 }
1469 
1470 // CHECK-LABEL: @test_vzipq_u8(
1471 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1472 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1473 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1474 // CHECK:   store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1475 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1476 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1477 // CHECK:   store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
1478 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
1479 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1480 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1481 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1482 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
1483 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1484 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
1485   return vzipq_u8(a, b);
1486 }
1487 
1488 // CHECK-LABEL: @test_vzipq_u16(
1489 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1490 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1491 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1492 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1493 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1494 // CHECK:   store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1495 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1496 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1497 // CHECK:   store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
1498 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
1499 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1500 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1501 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1502 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
1503 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1504 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
1505   return vzipq_u16(a, b);
1506 }
1507 
1508 // CHECK-LABEL: @test_vzipq_u32(
1509 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1510 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1511 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1512 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1513 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1514 // CHECK:   store <4 x i32> [[VZIP_I]], ptr [[RETVAL_I]]
1515 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1516 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1517 // CHECK:   store <4 x i32> [[VZIP1_I]], ptr [[TMP4]]
1518 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
1519 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1520 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1521 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1522 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
1523 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1524 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
1525   return vzipq_u32(a, b);
1526 }
1527 
1528 // CHECK-LABEL: @test_vzipq_f32(
1529 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1530 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1531 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1532 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1533 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1534 // CHECK:   store <4 x float> [[VZIP_I]], ptr [[RETVAL_I]]
1535 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
1536 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1537 // CHECK:   store <4 x float> [[VZIP1_I]], ptr [[TMP4]]
1538 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
1539 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1540 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1541 // CHECK:   store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
1542 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
1543 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1544 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
1545   return vzipq_f32(a, b);
1546 }
1547 
1548 // CHECK-LABEL: @test_vzipq_p8(
1549 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1550 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1551 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1552 // CHECK:   store <16 x i8> [[VZIP_I]], ptr [[RETVAL_I]]
1553 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1554 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1555 // CHECK:   store <16 x i8> [[VZIP1_I]], ptr [[TMP2]]
1556 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
1557 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1558 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1559 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1560 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
1561 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1562 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
1563   return vzipq_p8(a, b);
1564 }
1565 
1566 // CHECK-LABEL: @test_vzipq_p16(
1567 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1568 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1569 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1570 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1571 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1572 // CHECK:   store <8 x i16> [[VZIP_I]], ptr [[RETVAL_I]]
1573 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1574 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1575 // CHECK:   store <8 x i16> [[VZIP1_I]], ptr [[TMP4]]
1576 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
1577 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1578 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1579 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1580 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
1581 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1582 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
1583   return vzipq_p16(a, b);
1584 }
1585 
1586 // CHECK-LABEL: @test_vtrn_s8(
1587 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1588 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1589 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1590 // CHECK:   store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1591 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1592 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1593 // CHECK:   store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
1594 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL_I]], align 8
1595 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1596 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1597 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1598 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8
1599 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
1600 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
1601   return vtrn_s8(a, b);
1602 }
1603 
1604 // CHECK-LABEL: @test_vtrn_s16(
1605 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1606 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1608 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1609 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1610 // CHECK:   store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1611 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1612 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1613 // CHECK:   store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
1614 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL_I]], align 8
1615 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1616 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1617 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1618 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8
1619 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
1620 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
1621   return vtrn_s16(a, b);
1622 }
1623 
1624 // CHECK-LABEL: @test_vtrn_s32(
1625 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1626 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1627 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1628 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1629 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1630 // CHECK:   store <2 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1631 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1632 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1633 // CHECK:   store <2 x i32> [[VTRN1_I]], ptr [[TMP4]]
1634 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL_I]], align 8
1635 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1636 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1637 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1638 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8
1639 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
1640 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
1641   return vtrn_s32(a, b);
1642 }
1643 
1644 // CHECK-LABEL: @test_vtrn_u8(
1645 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1646 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1647 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1648 // CHECK:   store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1649 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1650 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1651 // CHECK:   store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
1652 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL_I]], align 8
1653 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1654 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1655 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1656 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8
1657 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
1658 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
1659   return vtrn_u8(a, b);
1660 }
1661 
1662 // CHECK-LABEL: @test_vtrn_u16(
1663 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1664 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1665 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1666 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1667 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1668 // CHECK:   store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1669 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1670 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1671 // CHECK:   store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
1672 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL_I]], align 8
1673 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1674 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1675 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1676 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8
1677 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
1678 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
1679   return vtrn_u16(a, b);
1680 }
1681 
1682 // CHECK-LABEL: @test_vtrn_u32(
1683 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1684 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1685 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1686 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1687 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1688 // CHECK:   store <2 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1689 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, ptr [[RETVAL_I]], i32 1
1690 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1691 // CHECK:   store <2 x i32> [[VTRN1_I]], ptr [[TMP4]]
1692 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL_I]], align 8
1693 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1694 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1695 // CHECK:   store [2 x <2 x i32>] [[TMP9]], ptr [[TMP8]], align 8
1696 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8
1697 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
1698 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
1699   return vtrn_u32(a, b);
1700 }
1701 
1702 // CHECK-LABEL: @test_vtrn_f32(
1703 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1704 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1705 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1706 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1707 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1708 // CHECK:   store <2 x float> [[VTRN_I]], ptr [[RETVAL_I]]
1709 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, ptr [[RETVAL_I]], i32 1
1710 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1711 // CHECK:   store <2 x float> [[VTRN1_I]], ptr [[TMP4]]
1712 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL_I]], align 8
1713 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[RETVAL]], i32 0, i32 0
1714 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1715 // CHECK:   store [2 x <2 x float>] [[TMP9]], ptr [[TMP8]], align 8
1716 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8
1717 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
1718 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
1719   return vtrn_f32(a, b);
1720 }
1721 
1722 // CHECK-LABEL: @test_vtrn_p8(
1723 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1724 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1725 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1726 // CHECK:   store <8 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1727 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, ptr [[RETVAL_I]], i32 1
1728 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1729 // CHECK:   store <8 x i8> [[VTRN1_I]], ptr [[TMP2]]
1730 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL_I]], align 8
1731 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1732 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1733 // CHECK:   store [2 x <8 x i8>] [[TMP7]], ptr [[TMP6]], align 8
1734 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8
1735 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
1736 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
1737   return vtrn_p8(a, b);
1738 }
1739 
1740 // CHECK-LABEL: @test_vtrn_p16(
1741 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1742 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1743 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1744 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1745 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1746 // CHECK:   store <4 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1747 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, ptr [[RETVAL_I]], i32 1
1748 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1749 // CHECK:   store <4 x i16> [[VTRN1_I]], ptr [[TMP4]]
1750 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL_I]], align 8
1751 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1752 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1753 // CHECK:   store [2 x <4 x i16>] [[TMP9]], ptr [[TMP8]], align 8
1754 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8
1755 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
1756 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
1757   return vtrn_p16(a, b);
1758 }
1759 
1760 // CHECK-LABEL: @test_vtrnq_s8(
1761 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1762 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1763 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1764 // CHECK:   store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1765 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1766 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1767 // CHECK:   store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
1768 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL_I]], align 16
1769 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1770 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1771 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1772 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16
1773 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
1774 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
1775   return vtrnq_s8(a, b);
1776 }
1777 
1778 // CHECK-LABEL: @test_vtrnq_s16(
1779 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1780 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1781 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1782 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1783 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1784 // CHECK:   store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1785 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1786 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1787 // CHECK:   store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
1788 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL_I]], align 16
1789 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1790 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1791 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1792 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16
1793 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
1794 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
1795   return vtrnq_s16(a, b);
1796 }
1797 
1798 // CHECK-LABEL: @test_vtrnq_s32(
1799 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1800 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1801 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1802 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1803 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1804 // CHECK:   store <4 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1805 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1806 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1807 // CHECK:   store <4 x i32> [[VTRN1_I]], ptr [[TMP4]]
1808 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL_I]], align 16
1809 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1810 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1811 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1812 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16
1813 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
1814 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
1815   return vtrnq_s32(a, b);
1816 }
1817 
1818 // CHECK-LABEL: @test_vtrnq_u8(
1819 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1820 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1821 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1822 // CHECK:   store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1823 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1824 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1825 // CHECK:   store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
1826 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL_I]], align 16
1827 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1828 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1829 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1830 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16
1831 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
1832 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
1833   return vtrnq_u8(a, b);
1834 }
1835 
1836 // CHECK-LABEL: @test_vtrnq_u16(
1837 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1838 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1839 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1840 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1841 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1842 // CHECK:   store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1843 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1844 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1845 // CHECK:   store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
1846 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL_I]], align 16
1847 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1848 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1849 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1850 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16
1851 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
1852 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
1853   return vtrnq_u16(a, b);
1854 }
1855 
1856 // CHECK-LABEL: @test_vtrnq_u32(
1857 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1858 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1859 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1860 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1861 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1862 // CHECK:   store <4 x i32> [[VTRN_I]], ptr [[RETVAL_I]]
1863 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, ptr [[RETVAL_I]], i32 1
1864 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1865 // CHECK:   store <4 x i32> [[VTRN1_I]], ptr [[TMP4]]
1866 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL_I]], align 16
1867 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1868 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1869 // CHECK:   store [2 x <4 x i32>] [[TMP9]], ptr [[TMP8]], align 16
1870 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16
1871 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
1872 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
1873   return vtrnq_u32(a, b);
1874 }
1875 
1876 // CHECK-LABEL: @test_vtrnq_f32(
1877 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1878 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1879 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1880 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1881 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1882 // CHECK:   store <4 x float> [[VTRN_I]], ptr [[RETVAL_I]]
1883 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, ptr [[RETVAL_I]], i32 1
1884 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1885 // CHECK:   store <4 x float> [[VTRN1_I]], ptr [[TMP4]]
1886 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL_I]], align 16
1887 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[RETVAL]], i32 0, i32 0
1888 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1889 // CHECK:   store [2 x <4 x float>] [[TMP9]], ptr [[TMP8]], align 16
1890 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16
1891 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
1892 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
1893   return vtrnq_f32(a, b);
1894 }
1895 
1896 // CHECK-LABEL: @test_vtrnq_p8(
1897 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1898 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1899 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1900 // CHECK:   store <16 x i8> [[VTRN_I]], ptr [[RETVAL_I]]
1901 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, ptr [[RETVAL_I]], i32 1
1902 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1903 // CHECK:   store <16 x i8> [[VTRN1_I]], ptr [[TMP2]]
1904 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL_I]], align 16
1905 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[RETVAL]], i32 0, i32 0
1906 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1907 // CHECK:   store [2 x <16 x i8>] [[TMP7]], ptr [[TMP6]], align 16
1908 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16
1909 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
1910 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
1911   return vtrnq_p8(a, b);
1912 }
1913 
1914 // CHECK-LABEL: @test_vtrnq_p16(
1915 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1916 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1917 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1918 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1919 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1920 // CHECK:   store <8 x i16> [[VTRN_I]], ptr [[RETVAL_I]]
1921 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, ptr [[RETVAL_I]], i32 1
1922 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1923 // CHECK:   store <8 x i16> [[VTRN1_I]], ptr [[TMP4]]
1924 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL_I]], align 16
1925 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[RETVAL]], i32 0, i32 0
1926 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1927 // CHECK:   store [2 x <8 x i16>] [[TMP9]], ptr [[TMP8]], align 16
1928 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16
1929 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
1930 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
1931   return vtrnq_p16(a, b);
1932 }
1933