xref: /llvm-project/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck %s
3; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s
4; RUN: llc -mtriple=arm64-apple-ios -mattr=+global-isel -mattr=+sve -o - %s | FileCheck %s
5; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+global-isel -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s
6
7define void @zext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind {
8; CHECK-LABEL: zext_of_concat:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    ldr d0, [x0]
11; CHECK-NEXT:    ldr d1, [x1]
12; CHECK-NEXT:    add.2s v0, v0, v1
13; CHECK-NEXT:    ldr q1, [x2]
14; CHECK-NEXT:    ushll.2d v0, v0, #0
15; CHECK-NEXT:    add.4s v0, v0, v1
16; CHECK-NEXT:    str q0, [x2]
17; CHECK-NEXT:    ret
18;
19; CHECK-BE-LABEL: zext_of_concat:
20; CHECK-BE:       // %bb.0:
21; CHECK-BE-NEXT:    ld1 { v0.2s }, [x0]
22; CHECK-BE-NEXT:    ld1 { v1.2s }, [x1]
23; CHECK-BE-NEXT:    add v0.2s, v0.2s, v1.2s
24; CHECK-BE-NEXT:    movi v1.2d, #0000000000000000
25; CHECK-BE-NEXT:    zip1 v0.4s, v0.4s, v0.4s
26; CHECK-BE-NEXT:    trn2 v0.4s, v0.4s, v1.4s
27; CHECK-BE-NEXT:    ld1 { v1.4s }, [x2]
28; CHECK-BE-NEXT:    add v0.4s, v0.4s, v1.4s
29; CHECK-BE-NEXT:    st1 { v0.4s }, [x2]
30; CHECK-BE-NEXT:    ret
31  %i0.a = load <2 x i32>, ptr %a
32  %i0.b = load <2 x i32>, ptr %b
33  %i0 = add <2 x i32> %i0.a, %i0.b
34  %i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
35  %i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
36  %i3 = load <4 x i32>, ptr %c
37  %i4 = add <4 x i32> %i2, %i3
38  store <4 x i32> %i4, ptr %c
39  ret void
40}
41
42define void @zext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind {
43; CHECK-LABEL: zext_of_concat_extrause:
44; CHECK:       ; %bb.0:
45; CHECK-NEXT:    ldr d0, [x1]
46; CHECK-NEXT:    ldr d1, [x0]
47; CHECK-NEXT:    add.2s v0, v1, v0
48; CHECK-NEXT:    movi.2d v1, #0000000000000000
49; CHECK-NEXT:    mov.d v0[1], v0[0]
50; CHECK-NEXT:    zip1.4s v1, v0, v1
51; CHECK-NEXT:    str q0, [x4]
52; CHECK-NEXT:    ldr q0, [x2]
53; CHECK-NEXT:    add.4s v0, v1, v0
54; CHECK-NEXT:    str q0, [x2]
55; CHECK-NEXT:    ret
56;
57; CHECK-BE-LABEL: zext_of_concat_extrause:
58; CHECK-BE:       // %bb.0:
59; CHECK-BE-NEXT:    ld1 { v0.2s }, [x1]
60; CHECK-BE-NEXT:    ld1 { v1.2s }, [x0]
61; CHECK-BE-NEXT:    movi v2.2d, #0000000000000000
62; CHECK-BE-NEXT:    add v0.2s, v1.2s, v0.2s
63; CHECK-BE-NEXT:    mov v0.d[1], v0.d[0]
64; CHECK-BE-NEXT:    zip1 v1.4s, v0.4s, v0.4s
65; CHECK-BE-NEXT:    st1 { v0.4s }, [x4]
66; CHECK-BE-NEXT:    trn2 v0.4s, v1.4s, v2.4s
67; CHECK-BE-NEXT:    ld1 { v1.4s }, [x2]
68; CHECK-BE-NEXT:    add v0.4s, v0.4s, v1.4s
69; CHECK-BE-NEXT:    st1 { v0.4s }, [x2]
70; CHECK-BE-NEXT:    ret
71  %i0.a = load <2 x i32>, ptr %a
72  %i0.b = load <2 x i32>, ptr %b
73  %i0 = add <2 x i32> %i0.a, %i0.b
74  %i1 = shufflevector <2 x i32> %i0, <2 x i32> %i0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
75  store <4 x i32> %i1, ptr %e
76  %i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
77  %i3 = load <4 x i32>, ptr %c
78  %i4 = add <4 x i32> %i2, %i3
79  store <4 x i32> %i4, ptr %c
80  ret void
81}
82
83define void @aext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind {
84; CHECK-LABEL: aext_of_concat:
85; CHECK:       ; %bb.0:
86; CHECK-NEXT:    ldr d0, [x0]
87; CHECK-NEXT:    ldr d1, [x1]
88; CHECK-NEXT:    add.2s v0, v0, v1
89; CHECK-NEXT:    ldr q1, [x2]
90; CHECK-NEXT:    ushll.2d v0, v0, #0
91; CHECK-NEXT:    add.4s v0, v0, v1
92; CHECK-NEXT:    str q0, [x2]
93; CHECK-NEXT:    ret
94;
95; CHECK-BE-LABEL: aext_of_concat:
96; CHECK-BE:       // %bb.0:
97; CHECK-BE-NEXT:    ld1 { v0.2s }, [x0]
98; CHECK-BE-NEXT:    ld1 { v1.2s }, [x1]
99; CHECK-BE-NEXT:    add v0.2s, v0.2s, v1.2s
100; CHECK-BE-NEXT:    ld1 { v1.4s }, [x2]
101; CHECK-BE-NEXT:    zip1 v0.4s, v0.4s, v0.4s
102; CHECK-BE-NEXT:    add v0.4s, v0.4s, v1.4s
103; CHECK-BE-NEXT:    st1 { v0.4s }, [x2]
104; CHECK-BE-NEXT:    ret
105  %i0.a = load <2 x i32>, ptr %a
106  %i0.b = load <2 x i32>, ptr %b
107  %i0 = add <2 x i32> %i0.a, %i0.b
108  %i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
109  %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
110  %i3 = load <4 x i32>, ptr %c
111  %i4 = add <4 x i32> %i2, %i3
112  store <4 x i32> %i4, ptr %c
113  ret void
114}
115
116define void @aext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind {
117; CHECK-LABEL: aext_of_concat_extrause:
118; CHECK:       ; %bb.0:
119; CHECK-NEXT:    ldr d0, [x1]
120; CHECK-NEXT:    ldr d1, [x0]
121; CHECK-NEXT:    add.2s v0, v1, v0
122; CHECK-NEXT:    mov.16b v1, v0
123; CHECK-NEXT:    mov.d v1[1], v0[0]
124; CHECK-NEXT:    zip1.4s v0, v0, v0
125; CHECK-NEXT:    str q1, [x4]
126; CHECK-NEXT:    ldr q1, [x2]
127; CHECK-NEXT:    add.4s v0, v0, v1
128; CHECK-NEXT:    str q0, [x2]
129; CHECK-NEXT:    ret
130;
131; CHECK-BE-LABEL: aext_of_concat_extrause:
132; CHECK-BE:       // %bb.0:
133; CHECK-BE-NEXT:    ld1 { v0.2s }, [x1]
134; CHECK-BE-NEXT:    ld1 { v1.2s }, [x0]
135; CHECK-BE-NEXT:    add v0.2s, v1.2s, v0.2s
136; CHECK-BE-NEXT:    mov v1.16b, v0.16b
137; CHECK-BE-NEXT:    mov v1.d[1], v0.d[0]
138; CHECK-BE-NEXT:    zip1 v0.4s, v0.4s, v0.4s
139; CHECK-BE-NEXT:    st1 { v1.4s }, [x4]
140; CHECK-BE-NEXT:    ld1 { v1.4s }, [x2]
141; CHECK-BE-NEXT:    add v0.4s, v0.4s, v1.4s
142; CHECK-BE-NEXT:    st1 { v0.4s }, [x2]
143; CHECK-BE-NEXT:    ret
144  %i0.a = load <2 x i32>, ptr %a
145  %i0.b = load <2 x i32>, ptr %b
146  %i0 = add <2 x i32> %i0.a, %i0.b
147  %i1 = shufflevector <2 x i32> %i0, <2 x i32> %i0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
148  store <4 x i32> %i1, ptr %e
149  %i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
150  %i3 = load <4 x i32>, ptr %c
151  %i4 = add <4 x i32> %i2, %i3
152  store <4 x i32> %i4, ptr %c
153  ret void
154}
155