xref: /llvm-project/llvm/test/CodeGen/AArch64/bitcast.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
6
7define <4 x i16> @foo1(<2 x i32> %a) {
8; CHECK-SD-LABEL: foo1:
9; CHECK-SD:       // %bb.0:
10; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
11; CHECK-SD-NEXT:    ret
12;
13; CHECK-GI-LABEL: foo1:
14; CHECK-GI:       // %bb.0:
15; CHECK-GI-NEXT:    mov w8, #58712 // =0xe558
16; CHECK-GI-NEXT:    mov v1.s[0], w8
17; CHECK-GI-NEXT:    zip1 v0.2s, v1.2s, v0.2s
18; CHECK-GI-NEXT:    rev32 v0.4h, v0.4h
19; CHECK-GI-NEXT:    ret
20  %1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
21; Can't optimize the following bitcast to scalar_to_vector.
22  %2 = bitcast <2 x i32> %1 to <4 x i16>
23  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
24  ret <4 x i16> %3
25}
26
27define <4 x i16> @foo2(<2 x i32> %a) {
28; CHECK-SD-LABEL: foo2:
29; CHECK-SD:       // %bb.0:
30; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
31; CHECK-SD-NEXT:    ret
32;
33; CHECK-GI-LABEL: foo2:
34; CHECK-GI:       // %bb.0:
35; CHECK-GI-NEXT:    mov w8, #712 // =0x2c8
36; CHECK-GI-NEXT:    mov v1.s[0], w8
37; CHECK-GI-NEXT:    zip1 v0.2s, v1.2s, v0.2s
38; CHECK-GI-NEXT:    rev32 v0.4h, v0.4h
39; CHECK-GI-NEXT:    ret
40  %1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
41; Can't optimize the following bitcast to scalar_to_vector.
42  %2 = bitcast <2 x i32> %1 to <4 x i16>
43  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
44  ret <4 x i16> %3
45}
46
47; ===== To and From Scalar Types =====
48
49define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
50; CHECK-SD-LABEL: bitcast_v4i8_i32:
51; CHECK-SD:       // %bb.0:
52; CHECK-SD-NEXT:    sub sp, sp, #16
53; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
54; CHECK-SD-NEXT:    add v0.4h, v0.4h, v1.4h
55; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
56; CHECK-SD-NEXT:    fmov w0, s0
57; CHECK-SD-NEXT:    add sp, sp, #16
58; CHECK-SD-NEXT:    ret
59;
60; CHECK-GI-LABEL: bitcast_v4i8_i32:
61; CHECK-GI:       // %bb.0:
62; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
63; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
64; CHECK-GI-NEXT:    fmov w0, s0
65; CHECK-GI-NEXT:    ret
66  %c = add <4 x i8> %a, %b
67  %d = bitcast <4 x i8> %c to i32
68  ret i32 %d
69}
70
71define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
72; CHECK-SD-LABEL: bitcast_i32_v4i8:
73; CHECK-SD:       // %bb.0:
74; CHECK-SD-NEXT:    add w8, w0, w1
75; CHECK-SD-NEXT:    fmov s0, w8
76; CHECK-SD-NEXT:    zip1 v0.8b, v0.8b, v0.8b
77; CHECK-SD-NEXT:    ret
78;
79; CHECK-GI-LABEL: bitcast_i32_v4i8:
80; CHECK-GI:       // %bb.0:
81; CHECK-GI-NEXT:    add w8, w0, w1
82; CHECK-GI-NEXT:    fmov s0, w8
83; CHECK-GI-NEXT:    mov b1, v0.b[1]
84; CHECK-GI-NEXT:    mov b2, v0.b[2]
85; CHECK-GI-NEXT:    fmov w8, s1
86; CHECK-GI-NEXT:    mov b1, v0.b[3]
87; CHECK-GI-NEXT:    mov v0.h[1], w8
88; CHECK-GI-NEXT:    fmov w8, s2
89; CHECK-GI-NEXT:    mov v0.h[2], w8
90; CHECK-GI-NEXT:    fmov w8, s1
91; CHECK-GI-NEXT:    mov v0.h[3], w8
92; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
93; CHECK-GI-NEXT:    ret
94  %c = add i32 %a, %b
95  %d = bitcast i32 %c to <4 x i8>
96  ret <4 x i8> %d
97}
98
99define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
100; CHECK-SD-LABEL: bitcast_v2i16_i32:
101; CHECK-SD:       // %bb.0:
102; CHECK-SD-NEXT:    sub sp, sp, #16
103; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
104; CHECK-SD-NEXT:    add v0.2s, v0.2s, v1.2s
105; CHECK-SD-NEXT:    mov w8, v0.s[1]
106; CHECK-SD-NEXT:    fmov w9, s0
107; CHECK-SD-NEXT:    strh w9, [sp, #12]
108; CHECK-SD-NEXT:    strh w8, [sp, #14]
109; CHECK-SD-NEXT:    ldr w0, [sp, #12]
110; CHECK-SD-NEXT:    add sp, sp, #16
111; CHECK-SD-NEXT:    ret
112;
113; CHECK-GI-LABEL: bitcast_v2i16_i32:
114; CHECK-GI:       // %bb.0:
115; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
116; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
117; CHECK-GI-NEXT:    fmov w0, s0
118; CHECK-GI-NEXT:    ret
119  %c = add <2 x i16> %a, %b
120  %d = bitcast <2 x i16> %c to i32
121  ret i32 %d
122}
123
124define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
125; CHECK-SD-LABEL: bitcast_i32_v2i16:
126; CHECK-SD:       // %bb.0:
127; CHECK-SD-NEXT:    add w8, w0, w1
128; CHECK-SD-NEXT:    fmov s0, w8
129; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
130; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
131; CHECK-SD-NEXT:    ret
132;
133; CHECK-GI-LABEL: bitcast_i32_v2i16:
134; CHECK-GI:       // %bb.0:
135; CHECK-GI-NEXT:    add w8, w0, w1
136; CHECK-GI-NEXT:    fmov s0, w8
137; CHECK-GI-NEXT:    mov h1, v0.h[1]
138; CHECK-GI-NEXT:    mov v0.s[0], w8
139; CHECK-GI-NEXT:    fmov w8, s1
140; CHECK-GI-NEXT:    mov v0.s[1], w8
141; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
142; CHECK-GI-NEXT:    ret
143  %c = add i32 %a, %b
144  %d = bitcast i32 %c to <2 x i16>
145  ret <2 x i16> %d
146}
147
148define i64 @bitcast_v8i8_i64(<8 x i8> %a, <8 x i8> %b){
149; CHECK-LABEL: bitcast_v8i8_i64:
150; CHECK:       // %bb.0:
151; CHECK-NEXT:    add v0.8b, v0.8b, v1.8b
152; CHECK-NEXT:    fmov x0, d0
153; CHECK-NEXT:    ret
154  %c = add <8 x i8> %a, %b
155  %d = bitcast <8 x i8> %c to i64
156  ret i64 %d
157}
158
159define <8 x i8> @bitcast_i64_v8i8(i64 %a, i64 %b){
160; CHECK-LABEL: bitcast_i64_v8i8:
161; CHECK:       // %bb.0:
162; CHECK-NEXT:    add x8, x0, x1
163; CHECK-NEXT:    fmov d0, x8
164; CHECK-NEXT:    ret
165  %c = add i64 %a, %b
166  %d = bitcast i64 %c to <8 x i8>
167  ret <8 x i8> %d
168}
169
170define i64 @bitcast_v4i16_i64(<4 x i16> %a, <4 x i16> %b){
171; CHECK-LABEL: bitcast_v4i16_i64:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
174; CHECK-NEXT:    fmov x0, d0
175; CHECK-NEXT:    ret
176  %c = add <4 x i16> %a, %b
177  %d = bitcast <4 x i16> %c to i64
178  ret i64 %d
179}
180
181define <4 x i16> @bitcast_i64_v4i16(i64 %a, i64 %b){
182; CHECK-LABEL: bitcast_i64_v4i16:
183; CHECK:       // %bb.0:
184; CHECK-NEXT:    add x8, x0, x1
185; CHECK-NEXT:    fmov d0, x8
186; CHECK-NEXT:    ret
187  %c = add i64 %a, %b
188  %d = bitcast i64 %c to <4 x i16>
189  ret <4 x i16> %d
190}
191
192define i64 @bitcast_v2i32_i64(<2 x i32> %a, <2 x i32> %b){
193; CHECK-LABEL: bitcast_v2i32_i64:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
196; CHECK-NEXT:    fmov x0, d0
197; CHECK-NEXT:    ret
198  %c = add <2 x i32> %a, %b
199  %d = bitcast <2 x i32> %c to i64
200  ret i64 %d
201}
202
203define <2 x i32> @bitcast_i64_v2i32(i64 %a, i64 %b){
204; CHECK-LABEL: bitcast_i64_v2i32:
205; CHECK:       // %bb.0:
206; CHECK-NEXT:    add x8, x0, x1
207; CHECK-NEXT:    fmov d0, x8
208; CHECK-NEXT:    ret
209  %c = add i64 %a, %b
210  %d = bitcast i64 %c to <2 x i32>
211  ret <2 x i32> %d
212}
213
214; ===== Legal Vector Types =====
215
216define <4 x i16> @bitcast_v2i32_v4i16(<2 x i32> %a, <2 x i32> %b){
217; CHECK-LABEL: bitcast_v2i32_v4i16:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
220; CHECK-NEXT:    ret
221  %c = add <2 x i32> %a, %b
222  %d = bitcast <2 x i32> %c to <4 x i16>
223  ret <4 x i16> %d
224}
225
226define <4 x i32> @bitcast_v2i64_v4i32(<2 x i64> %a, <2 x i64> %b){
227; CHECK-LABEL: bitcast_v2i64_v4i32:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
230; CHECK-NEXT:    ret
231  %c = add <2 x i64> %a, %b
232  %d = bitcast <2 x i64> %c to <4 x i32>
233  ret <4 x i32> %d
234}
235
236define <8 x i8> @bitcast_v2i32_v8i8(<2 x i32> %a, <2 x i32> %b){
237; CHECK-LABEL: bitcast_v2i32_v8i8:
238; CHECK:       // %bb.0:
239; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
240; CHECK-NEXT:    ret
241  %c = add <2 x i32> %a, %b
242  %d = bitcast <2 x i32> %c to <8 x i8>
243  ret <8 x i8> %d
244}
245
246define <8 x i16> @bitcast_v2i64_v8i16(<2 x i64> %a, <2 x i64> %b){
247; CHECK-LABEL: bitcast_v2i64_v8i16:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
250; CHECK-NEXT:    ret
251  %c = add <2 x i64> %a, %b
252  %d = bitcast <2 x i64> %c to <8 x i16>
253  ret <8 x i16> %d
254}
255
256define <16 x i8> @bitcast_v2i64_v16i8(<2 x i64> %a, <2 x i64> %b){
257; CHECK-LABEL: bitcast_v2i64_v16i8:
258; CHECK:       // %bb.0:
259; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
260; CHECK-NEXT:    ret
261  %c = add <2 x i64> %a, %b
262  %d = bitcast <2 x i64> %c to <16 x i8>
263  ret <16 x i8> %d
264}
265
266define <2 x i32> @bitcast_v4i16_v2i32(<4 x i16> %a, <4 x i16> %b){
267; CHECK-LABEL: bitcast_v4i16_v2i32:
268; CHECK:       // %bb.0:
269; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
270; CHECK-NEXT:    ret
271  %c = add <4 x i16> %a, %b
272  %d = bitcast <4 x i16> %c to <2 x i32>
273  ret <2 x i32> %d
274}
275
276define <2 x i64> @bitcast_v4i32_v2i64(<4 x i32> %a, <4 x i32> %b){
277; CHECK-LABEL: bitcast_v4i32_v2i64:
278; CHECK:       // %bb.0:
279; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
280; CHECK-NEXT:    ret
281  %c = add <4 x i32> %a, %b
282  %d = bitcast <4 x i32> %c to <2 x i64>
283  ret <2 x i64> %d
284}
285
286define <8 x i8> @bitcast_v4i16_v8i8(<4 x i16> %a, <4 x i16> %b){
287; CHECK-LABEL: bitcast_v4i16_v8i8:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
290; CHECK-NEXT:    ret
291  %c = add <4 x i16> %a, %b
292  %d = bitcast <4 x i16> %c to <8 x i8>
293  ret <8 x i8> %d
294}
295
296define <8 x i16> @bitcast_v4i32_v8i16(<4 x i32> %a, <4 x i32> %b){
297; CHECK-LABEL: bitcast_v4i32_v8i16:
298; CHECK:       // %bb.0:
299; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
300; CHECK-NEXT:    ret
301  %c = add <4 x i32> %a, %b
302  %d = bitcast <4 x i32> %c to <8 x i16>
303  ret <8 x i16> %d
304}
305
306define <16 x i8> @bitcast_v4i32_v16i8(<4 x i32> %a, <4 x i32> %b){
307; CHECK-LABEL: bitcast_v4i32_v16i8:
308; CHECK:       // %bb.0:
309; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
310; CHECK-NEXT:    ret
311  %c = add <4 x i32> %a, %b
312  %d = bitcast <4 x i32> %c to <16 x i8>
313  ret <16 x i8> %d
314}
315
316define <2 x i32> @bitcast_v8i8_v2i32(<8 x i8> %a, <8 x i8> %b){
317; CHECK-LABEL: bitcast_v8i8_v2i32:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    add v0.8b, v0.8b, v1.8b
320; CHECK-NEXT:    ret
321  %c = add <8 x i8> %a, %b
322  %d = bitcast <8 x i8> %c to <2 x i32>
323  ret <2 x i32> %d
324}
325
326define <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a, <8 x i16> %b){
327; CHECK-LABEL: bitcast_v8i16_v2i64:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
330; CHECK-NEXT:    ret
331  %c = add <8 x i16> %a, %b
332  %d = bitcast <8 x i16> %c to <2 x i64>
333  ret <2 x i64> %d
334}
335
336define <4 x i16> @bitcast_v8i8_v4i16(<8 x i8> %a, <8 x i8> %b){
337; CHECK-LABEL: bitcast_v8i8_v4i16:
338; CHECK:       // %bb.0:
339; CHECK-NEXT:    add v0.8b, v0.8b, v1.8b
340; CHECK-NEXT:    ret
341  %c = add <8 x i8> %a, %b
342  %d = bitcast <8 x i8> %c to <4 x i16>
343  ret <4 x i16> %d
344}
345
346define <4 x i32> @bitcast_v8i16_v4i32(<8 x i16> %a, <8 x i16> %b){
347; CHECK-LABEL: bitcast_v8i16_v4i32:
348; CHECK:       // %bb.0:
349; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
350; CHECK-NEXT:    ret
351  %c = add <8 x i16> %a, %b
352  %d = bitcast <8 x i16> %c to <4 x i32>
353  ret <4 x i32> %d
354}
355
356define <16 x i8> @bitcast_v8i16_v16i8(<8 x i16> %a, <8 x i16> %b){
357; CHECK-LABEL: bitcast_v8i16_v16i8:
358; CHECK:       // %bb.0:
359; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
360; CHECK-NEXT:    ret
361  %c = add <8 x i16> %a, %b
362  %d = bitcast <8 x i16> %c to <16 x i8>
363  ret <16 x i8> %d
364}
365
366define <2 x i64> @bitcast_v16i8_v2i64(<16 x i8> %a, <16 x i8> %b){
367; CHECK-LABEL: bitcast_v16i8_v2i64:
368; CHECK:       // %bb.0:
369; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
370; CHECK-NEXT:    ret
371  %c = add <16 x i8> %a, %b
372  %d = bitcast <16 x i8> %c to <2 x i64>
373  ret <2 x i64> %d
374}
375
376define <4 x i32> @bitcast_v16i8_v4i32(<16 x i8> %a, <16 x i8> %b){
377; CHECK-LABEL: bitcast_v16i8_v4i32:
378; CHECK:       // %bb.0:
379; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
380; CHECK-NEXT:    ret
381  %c = add <16 x i8> %a, %b
382  %d = bitcast <16 x i8> %c to <4 x i32>
383  ret <4 x i32> %d
384}
385
386define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){
387; CHECK-LABEL: bitcast_v16i8_v8i16:
388; CHECK:       // %bb.0:
389; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
390; CHECK-NEXT:    ret
391  %c = add <16 x i8> %a, %b
392  %d = bitcast <16 x i8> %c to <8 x i16>
393  ret <8 x i16> %d
394}
395
396; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
397
398define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
399; CHECK-SD-LABEL: bitcast_v2i16_v4i8:
400; CHECK-SD:       // %bb.0:
401; CHECK-SD-NEXT:    sub sp, sp, #16
402; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
403; CHECK-SD-NEXT:    add v0.2s, v0.2s, v1.2s
404; CHECK-SD-NEXT:    mov w8, v0.s[1]
405; CHECK-SD-NEXT:    fmov w9, s0
406; CHECK-SD-NEXT:    strh w9, [sp, #12]
407; CHECK-SD-NEXT:    strh w8, [sp, #14]
408; CHECK-SD-NEXT:    ldr s0, [sp, #12]
409; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
410; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
411; CHECK-SD-NEXT:    add sp, sp, #16
412; CHECK-SD-NEXT:    ret
413;
414; CHECK-GI-LABEL: bitcast_v2i16_v4i8:
415; CHECK-GI:       // %bb.0:
416; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
417; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
418; CHECK-GI-NEXT:    mov b1, v0.b[1]
419; CHECK-GI-NEXT:    mov b2, v0.b[2]
420; CHECK-GI-NEXT:    fmov w8, s1
421; CHECK-GI-NEXT:    mov b1, v0.b[3]
422; CHECK-GI-NEXT:    mov v0.h[1], w8
423; CHECK-GI-NEXT:    fmov w8, s2
424; CHECK-GI-NEXT:    mov v0.h[2], w8
425; CHECK-GI-NEXT:    fmov w8, s1
426; CHECK-GI-NEXT:    mov v0.h[3], w8
427; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
428; CHECK-GI-NEXT:    ret
429  %c = add <2 x i16> %a, %b
430  %d = bitcast <2 x i16> %c to <4 x i8>
431  ret <4 x i8> %d
432}
433
434define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
435; CHECK-SD-LABEL: bitcast_v4i8_v2i16:
436; CHECK-SD:       // %bb.0:
437; CHECK-SD-NEXT:    sub sp, sp, #16
438; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
439; CHECK-SD-NEXT:    add v0.4h, v0.4h, v1.4h
440; CHECK-SD-NEXT:    add x8, sp, #12
441; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
442; CHECK-SD-NEXT:    str s0, [sp, #12]
443; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x8]
444; CHECK-SD-NEXT:    orr x8, x8, #0x2
445; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
446; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
447; CHECK-SD-NEXT:    add sp, sp, #16
448; CHECK-SD-NEXT:    ret
449;
450; CHECK-GI-LABEL: bitcast_v4i8_v2i16:
451; CHECK-GI:       // %bb.0:
452; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
453; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
454; CHECK-GI-NEXT:    mov h1, v0.h[1]
455; CHECK-GI-NEXT:    fmov w8, s0
456; CHECK-GI-NEXT:    mov v0.s[0], w8
457; CHECK-GI-NEXT:    fmov w8, s1
458; CHECK-GI-NEXT:    mov v0.s[1], w8
459; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
460; CHECK-GI-NEXT:    ret
461  %c = add <4 x i8> %a, %b
462  %d = bitcast <4 x i8> %c to <2 x i16>
463  ret <2 x i16> %d
464}
465
466define <8 x i32> @bitcast_v4i64_v8i32(<4 x i64> %a, <4 x i64> %b){
467; CHECK-SD-LABEL: bitcast_v4i64_v8i32:
468; CHECK-SD:       // %bb.0:
469; CHECK-SD-NEXT:    add v1.2d, v1.2d, v3.2d
470; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
471; CHECK-SD-NEXT:    ret
472;
473; CHECK-GI-LABEL: bitcast_v4i64_v8i32:
474; CHECK-GI:       // %bb.0:
475; CHECK-GI-NEXT:    add v0.2d, v0.2d, v2.2d
476; CHECK-GI-NEXT:    add v1.2d, v1.2d, v3.2d
477; CHECK-GI-NEXT:    ret
478  %c = add <4 x i64> %a, %b
479  %d = bitcast <4 x i64> %c to <8 x i32>
480  ret <8 x i32> %d
481}
482
483define <16 x i16> @bitcast_v4i64_v16i16(<4 x i64> %a, <4 x i64> %b){
484; CHECK-SD-LABEL: bitcast_v4i64_v16i16:
485; CHECK-SD:       // %bb.0:
486; CHECK-SD-NEXT:    add v1.2d, v1.2d, v3.2d
487; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
488; CHECK-SD-NEXT:    ret
489;
490; CHECK-GI-LABEL: bitcast_v4i64_v16i16:
491; CHECK-GI:       // %bb.0:
492; CHECK-GI-NEXT:    add v0.2d, v0.2d, v2.2d
493; CHECK-GI-NEXT:    add v1.2d, v1.2d, v3.2d
494; CHECK-GI-NEXT:    ret
495  %c = add <4 x i64> %a, %b
496  %d = bitcast <4 x i64> %c to <16 x i16>
497  ret <16 x i16> %d
498}
499
500define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){
501; CHECK-SD-LABEL: bitcast_v8i32_v4i64:
502; CHECK-SD:       // %bb.0:
503; CHECK-SD-NEXT:    add v1.4s, v1.4s, v3.4s
504; CHECK-SD-NEXT:    add v0.4s, v0.4s, v2.4s
505; CHECK-SD-NEXT:    ret
506;
507; CHECK-GI-LABEL: bitcast_v8i32_v4i64:
508; CHECK-GI:       // %bb.0:
509; CHECK-GI-NEXT:    add v2.4s, v0.4s, v2.4s
510; CHECK-GI-NEXT:    add v3.4s, v1.4s, v3.4s
511; CHECK-GI-NEXT:    mov x8, v2.d[1]
512; CHECK-GI-NEXT:    mov x9, v3.d[1]
513; CHECK-GI-NEXT:    mov v0.d[0], v2.d[0]
514; CHECK-GI-NEXT:    mov v1.d[0], v3.d[0]
515; CHECK-GI-NEXT:    mov v0.d[1], x8
516; CHECK-GI-NEXT:    mov v1.d[1], x9
517; CHECK-GI-NEXT:    ret
518  %c = add <8 x i32> %a, %b
519  %d = bitcast <8 x i32> %c to <4 x i64>
520  ret <4 x i64> %d
521}
522
523define <16 x i16> @bitcast_v8i32_v16i16(<8 x i32> %a, <8 x i32> %b){
524; CHECK-SD-LABEL: bitcast_v8i32_v16i16:
525; CHECK-SD:       // %bb.0:
526; CHECK-SD-NEXT:    add v1.4s, v1.4s, v3.4s
527; CHECK-SD-NEXT:    add v0.4s, v0.4s, v2.4s
528; CHECK-SD-NEXT:    ret
529;
530; CHECK-GI-LABEL: bitcast_v8i32_v16i16:
531; CHECK-GI:       // %bb.0:
532; CHECK-GI-NEXT:    add v0.4s, v0.4s, v2.4s
533; CHECK-GI-NEXT:    add v1.4s, v1.4s, v3.4s
534; CHECK-GI-NEXT:    ret
535  %c = add <8 x i32> %a, %b
536  %d = bitcast <8 x i32> %c to <16 x i16>
537  ret <16 x i16> %d
538}
539
540define <16 x i32> @bitcast_v8i64_v16i32(<8 x i64> %a, <8 x i64> %b){
541; CHECK-SD-LABEL: bitcast_v8i64_v16i32:
542; CHECK-SD:       // %bb.0:
543; CHECK-SD-NEXT:    add v2.2d, v2.2d, v6.2d
544; CHECK-SD-NEXT:    add v0.2d, v0.2d, v4.2d
545; CHECK-SD-NEXT:    add v1.2d, v1.2d, v5.2d
546; CHECK-SD-NEXT:    add v3.2d, v3.2d, v7.2d
547; CHECK-SD-NEXT:    ret
548;
549; CHECK-GI-LABEL: bitcast_v8i64_v16i32:
550; CHECK-GI:       // %bb.0:
551; CHECK-GI-NEXT:    add v0.2d, v0.2d, v4.2d
552; CHECK-GI-NEXT:    add v1.2d, v1.2d, v5.2d
553; CHECK-GI-NEXT:    add v2.2d, v2.2d, v6.2d
554; CHECK-GI-NEXT:    add v3.2d, v3.2d, v7.2d
555; CHECK-GI-NEXT:    ret
556  %c = add <8 x i64> %a, %b
557  %d = bitcast <8 x i64> %c to <16 x i32>
558  ret <16 x i32> %d
559}
560
561define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){
562; CHECK-SD-LABEL: bitcast_v16i16_v4i64:
563; CHECK-SD:       // %bb.0:
564; CHECK-SD-NEXT:    add v1.8h, v1.8h, v3.8h
565; CHECK-SD-NEXT:    add v0.8h, v0.8h, v2.8h
566; CHECK-SD-NEXT:    ret
567;
568; CHECK-GI-LABEL: bitcast_v16i16_v4i64:
569; CHECK-GI:       // %bb.0:
570; CHECK-GI-NEXT:    add v2.8h, v0.8h, v2.8h
571; CHECK-GI-NEXT:    add v3.8h, v1.8h, v3.8h
572; CHECK-GI-NEXT:    mov x8, v2.d[1]
573; CHECK-GI-NEXT:    mov x9, v3.d[1]
574; CHECK-GI-NEXT:    mov v0.d[0], v2.d[0]
575; CHECK-GI-NEXT:    mov v1.d[0], v3.d[0]
576; CHECK-GI-NEXT:    mov v0.d[1], x8
577; CHECK-GI-NEXT:    mov v1.d[1], x9
578; CHECK-GI-NEXT:    ret
579  %c = add <16 x i16> %a, %b
580  %d = bitcast <16 x i16> %c to <4 x i64>
581  ret <4 x i64> %d
582}
583
584define <8 x i32> @bitcast_v16i16_v8i32(<16 x i16> %a, <16 x i16> %b){
585; CHECK-SD-LABEL: bitcast_v16i16_v8i32:
586; CHECK-SD:       // %bb.0:
587; CHECK-SD-NEXT:    add v1.8h, v1.8h, v3.8h
588; CHECK-SD-NEXT:    add v0.8h, v0.8h, v2.8h
589; CHECK-SD-NEXT:    ret
590;
591; CHECK-GI-LABEL: bitcast_v16i16_v8i32:
592; CHECK-GI:       // %bb.0:
593; CHECK-GI-NEXT:    add v0.8h, v0.8h, v2.8h
594; CHECK-GI-NEXT:    add v1.8h, v1.8h, v3.8h
595; CHECK-GI-NEXT:    ret
596  %c = add <16 x i16> %a, %b
597  %d = bitcast <16 x i16> %c to <8 x i32>
598  ret <8 x i32> %d
599}
600
601define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){
602; CHECK-SD-LABEL: bitcast_v16i32_v8i64:
603; CHECK-SD:       // %bb.0:
604; CHECK-SD-NEXT:    add v2.4s, v2.4s, v6.4s
605; CHECK-SD-NEXT:    add v0.4s, v0.4s, v4.4s
606; CHECK-SD-NEXT:    add v1.4s, v1.4s, v5.4s
607; CHECK-SD-NEXT:    add v3.4s, v3.4s, v7.4s
608; CHECK-SD-NEXT:    ret
609;
610; CHECK-GI-LABEL: bitcast_v16i32_v8i64:
611; CHECK-GI:       // %bb.0:
612; CHECK-GI-NEXT:    add v4.4s, v0.4s, v4.4s
613; CHECK-GI-NEXT:    add v5.4s, v1.4s, v5.4s
614; CHECK-GI-NEXT:    add v6.4s, v2.4s, v6.4s
615; CHECK-GI-NEXT:    add v7.4s, v3.4s, v7.4s
616; CHECK-GI-NEXT:    mov x8, v4.d[1]
617; CHECK-GI-NEXT:    mov x9, v5.d[1]
618; CHECK-GI-NEXT:    mov x10, v6.d[1]
619; CHECK-GI-NEXT:    mov x11, v7.d[1]
620; CHECK-GI-NEXT:    mov v0.d[0], v4.d[0]
621; CHECK-GI-NEXT:    mov v1.d[0], v5.d[0]
622; CHECK-GI-NEXT:    mov v2.d[0], v6.d[0]
623; CHECK-GI-NEXT:    mov v3.d[0], v7.d[0]
624; CHECK-GI-NEXT:    mov v0.d[1], x8
625; CHECK-GI-NEXT:    mov v1.d[1], x9
626; CHECK-GI-NEXT:    mov v2.d[1], x10
627; CHECK-GI-NEXT:    mov v3.d[1], x11
628; CHECK-GI-NEXT:    ret
629  %c = add <16 x i32> %a, %b
630  %d = bitcast <16 x i32> %c to <8 x i64>
631  ret <8 x i64> %d
632}
633
634; ===== Vectors with Non-Pow 2 Widths =====
635
636define <6 x i16> @bitcast_v3i32_v6i16(<3 x i32> %a, <3 x i32> %b){
637; CHECK-LABEL: bitcast_v3i32_v6i16:
638; CHECK:       // %bb.0:
639; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
640; CHECK-NEXT:    ret
641  %c = add <3 x i32> %a, %b
642  %d = bitcast <3 x i32> %c to <6 x i16>
643  ret <6 x i16> %d
644}
645