xref: /llvm-project/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple arm64-- | FileCheck %s
3
4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
6; Test the (concat_vectors (trunc), (trunc)) pattern.
7
8define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 {
9; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16:
10; CHECK:       // %bb.0: // %entry
11; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
12; CHECK-NEXT:    xtn v0.4h, v0.4s
13; CHECK-NEXT:    ret
14entry:
15  %at = trunc <2 x i64> %a to <2 x i16>
16  %bt = trunc <2 x i64> %b to <2 x i16>
17  %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
18  ret <4 x i16> %shuffle
19}
20
21define <4 x i32> @test_concat_truncate_v2i64_to_v4i32(<2 x i64> %a, <2 x i64> %b) #0 {
22; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i32:
23; CHECK:       // %bb.0: // %entry
24; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
25; CHECK-NEXT:    ret
26entry:
27  %at = trunc <2 x i64> %a to <2 x i32>
28  %bt = trunc <2 x i64> %b to <2 x i32>
29  %shuffle = shufflevector <2 x i32> %at, <2 x i32> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30  ret <4 x i32> %shuffle
31}
32
33define <4 x i16> @test_concat_truncate_v2i32_to_v4i16(<2 x i32> %a, <2 x i32> %b) #0 {
34; CHECK-LABEL: test_concat_truncate_v2i32_to_v4i16:
35; CHECK:       // %bb.0: // %entry
36; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
37; CHECK-NEXT:    ret
38entry:
39  %at = trunc <2 x i32> %a to <2 x i16>
40  %bt = trunc <2 x i32> %b to <2 x i16>
41  %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
42  ret <4 x i16> %shuffle
43}
44
45define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 {
46; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8:
47; CHECK:       // %bb.0: // %entry
48; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
49; CHECK-NEXT:    xtn v0.8b, v0.8h
50; CHECK-NEXT:    ret
51entry:
52  %at = trunc <4 x i32> %a to <4 x i8>
53  %bt = trunc <4 x i32> %b to <4 x i8>
54  %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
55  ret <8 x i8> %shuffle
56}
57
58define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 {
59; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16:
60; CHECK:       // %bb.0: // %entry
61; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
62; CHECK-NEXT:    ret
63entry:
64  %at = trunc <4 x i32> %a to <4 x i16>
65  %bt = trunc <4 x i32> %b to <4 x i16>
66  %shuffle = shufflevector <4 x i16> %at, <4 x i16> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
67  ret <8 x i16> %shuffle
68}
69
70define <8 x i8> @test_concat_truncate_v4i16_to_v8i8(<4 x i16> %a, <4 x i16> %b) #0 {
71; CHECK-LABEL: test_concat_truncate_v4i16_to_v8i8:
72; CHECK:       // %bb.0: // %entry
73; CHECK-NEXT:    uzp1 v0.8b, v0.8b, v1.8b
74; CHECK-NEXT:    ret
75entry:
76  %at = trunc <4 x i16> %a to <4 x i8>
77  %bt = trunc <4 x i16> %b to <4 x i8>
78  %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
79  ret <8 x i8> %shuffle
80}
81
82define <16 x i8> @test_concat_truncate_v8i16_to_v16i8(<8 x i16> %a, <8 x i16> %b) #0 {
83; CHECK-LABEL: test_concat_truncate_v8i16_to_v16i8:
84; CHECK:       // %bb.0: // %entry
85; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
86; CHECK-NEXT:    ret
87entry:
88  %at = trunc <8 x i16> %a to <8 x i8>
89  %bt = trunc <8 x i16> %b to <8 x i8>
90  %shuffle = shufflevector <8 x i8> %at, <8 x i8> %bt, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32  9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
91  ret <16 x i8> %shuffle
92}
93
94; The concat_vectors operation in this test is introduced when splitting
95; the fptrunc operation due to the split <vscale x 4 x double> input operand.
96define void @test_concat_fptrunc_v4f64_to_v4f32(ptr %ptr) #1 {
97; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32:
98; CHECK:       // %bb.0: // %entry
99; CHECK-NEXT:    fmov z0.s, #1.00000000
100; CHECK-NEXT:    ptrue p0.s
101; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
102; CHECK-NEXT:    ret
103entry:
104  %0 = shufflevector <vscale x 4 x double> insertelement (<vscale x 4 x double> poison, double 1.000000e+00, i32 0), <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
105  %1 = fptrunc <vscale x 4 x double> %0 to <vscale x 4 x float>
106  store <vscale x 4 x float> %1, ptr %ptr, align 4
107  ret void
108}
109
110attributes #0 = { nounwind }
111attributes #1 = { "target-features"="+sve" }
112