xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3target triple = "aarch64-unknown-linux-gnu"
4
5define <vscale x 8 x i1> @masked_load_sext_i8i16(ptr %ap, <vscale x 16 x i8> %b) #0 {
6; CHECK-LABEL: masked_load_sext_i8i16:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    ptrue p0.b, vl32
9; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
10; CHECK-NEXT:    punpklo p0.h, p0.b
11; CHECK-NEXT:    ret
12  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
13  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
14  %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
15  %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
16  %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
17  %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
18  ret <vscale x 8 x i1> %cmp1
19}
20
21; This negative test ensures the two ptrues have the same vl
22define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 {
23; CHECK-LABEL: masked_load_sext_i8i16_ptrue_vl:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    ptrue p0.b, vl64
26; CHECK-NEXT:    ptrue p1.h, vl32
27; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
28; CHECK-NEXT:    punpklo p0.h, p0.b
29; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
30; CHECK-NEXT:    ret
31  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
32  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
33  %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
34  %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
35  %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
36  %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
37  ret <vscale x 8 x i1> %cmp1
38}
39
40; This negative test enforces that both predicates are ptrues
41define <vscale x 8 x i1> @masked_load_sext_i8i16_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
42; CHECK-LABEL: masked_load_sext_i8i16_parg:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
45; CHECK-NEXT:    ptrue p1.h, vl32
46; CHECK-NEXT:    punpklo p0.h, p0.b
47; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
48; CHECK-NEXT:    ret
49  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
50  %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
51  %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
52  %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
53  %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
54  ret <vscale x 8 x i1> %cmp1
55}
56
57define <vscale x 4 x i1> @masked_load_sext_i8i32(ptr %ap, <vscale x 16 x i8> %b) #0 {
58; CHECK-LABEL: masked_load_sext_i8i32:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    ptrue p0.b, vl32
61; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
62; CHECK-NEXT:    punpklo p0.h, p0.b
63; CHECK-NEXT:    punpklo p0.h, p0.b
64; CHECK-NEXT:    ret
65  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
66  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
67  %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
68  %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
69  %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
70  %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
71  ret <vscale x 4 x i1> %cmp1
72}
73
74; This negative test ensures the two ptrues have the same vl
75define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 {
76; CHECK-LABEL: masked_load_sext_i8i32_ptrue_vl:
77; CHECK:       // %bb.0:
78; CHECK-NEXT:    ptrue p0.b, vl64
79; CHECK-NEXT:    ptrue p1.s, vl32
80; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
81; CHECK-NEXT:    punpklo p0.h, p0.b
82; CHECK-NEXT:    punpklo p0.h, p0.b
83; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
84; CHECK-NEXT:    ret
85  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
86  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
87  %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
88  %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
89  %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
90  %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
91  ret <vscale x 4 x i1> %cmp1
92}
93
94; This negative test enforces that both predicates are ptrues
95define <vscale x 4 x i1> @masked_load_sext_i8i32_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
96; CHECK-LABEL: masked_load_sext_i8i32_parg:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
99; CHECK-NEXT:    ptrue p1.s, vl32
100; CHECK-NEXT:    punpklo p0.h, p0.b
101; CHECK-NEXT:    punpklo p0.h, p0.b
102; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
103; CHECK-NEXT:    ret
104  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
105  %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
106  %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
107  %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
108  %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
109  ret <vscale x 4 x i1> %cmp1
110}
111
112define <vscale x 2 x i1> @masked_load_sext_i8i64(ptr %ap, <vscale x 16 x i8> %b) #0 {
113; CHECK-LABEL: masked_load_sext_i8i64:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    ptrue p0.b, vl32
116; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
117; CHECK-NEXT:    punpklo p0.h, p0.b
118; CHECK-NEXT:    punpklo p0.h, p0.b
119; CHECK-NEXT:    punpklo p0.h, p0.b
120; CHECK-NEXT:    ret
121  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
122  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
123  %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
124  %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
125  %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
126  %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
127  ret <vscale x 2 x i1> %cmp1
128}
129
130; This negative test ensures the two ptrues have the same vl
131define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 {
132; CHECK-LABEL: masked_load_sext_i8i64_ptrue_vl:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    ptrue p0.b, vl64
135; CHECK-NEXT:    ptrue p1.d, vl32
136; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
137; CHECK-NEXT:    punpklo p0.h, p0.b
138; CHECK-NEXT:    punpklo p0.h, p0.b
139; CHECK-NEXT:    punpklo p0.h, p0.b
140; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
141; CHECK-NEXT:    ret
142  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
143  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
144  %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
145  %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
146  %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
147  %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
148  ret <vscale x 2 x i1> %cmp1
149}
150
151; This negative test enforces that both predicates are ptrues
152define <vscale x 2 x i1> @masked_load_sext_i8i64_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
153; CHECK-LABEL: masked_load_sext_i8i64_parg:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
156; CHECK-NEXT:    ptrue p1.d, vl32
157; CHECK-NEXT:    punpklo p0.h, p0.b
158; CHECK-NEXT:    punpklo p0.h, p0.b
159; CHECK-NEXT:    punpklo p0.h, p0.b
160; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
161; CHECK-NEXT:    ret
162  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
163  %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
164  %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
165  %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
166  %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
167  ret <vscale x 2 x i1> %cmp1
168}
169
170; This negative test enforces that the ptrues have a specified vl
171define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 {
172; CHECK-LABEL: masked_load_sext_i8i16_ptrue_all:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    ptrue p0.b, vl64
175; CHECK-NEXT:    ptrue p1.h, vl32
176; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
177; CHECK-NEXT:    punpklo p0.h, p0.b
178; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
179; CHECK-NEXT:    ret
180  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
181  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
182  %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
183  %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
184  %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
185  %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
186  ret <vscale x 8 x i1> %cmp1
187}
188
189; This negative test enforces that the ptrues have a specified vl
190define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 {
191; CHECK-LABEL: masked_load_sext_i8i32_ptrue_all:
192; CHECK:       // %bb.0:
193; CHECK-NEXT:    ptrue p0.b, vl64
194; CHECK-NEXT:    ptrue p1.s, vl32
195; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
196; CHECK-NEXT:    punpklo p0.h, p0.b
197; CHECK-NEXT:    punpklo p0.h, p0.b
198; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
199; CHECK-NEXT:    ret
200  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
201  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
202  %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
203  %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
204  %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
205  %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
206  ret <vscale x 4 x i1> %cmp1
207}
208
209; This negative test enforces that the ptrues have a specified vl
210define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 {
211; CHECK-LABEL: masked_load_sext_i8i64_ptrue_all:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    ptrue p0.b
214; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
215; CHECK-NEXT:    punpklo p0.h, p0.b
216; CHECK-NEXT:    punpklo p0.h, p0.b
217; CHECK-NEXT:    punpklo p0.h, p0.b
218; CHECK-NEXT:    ret
219  %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
220  %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
221  %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
222  %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
223  %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
224  %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
225  ret <vscale x 2 x i1> %cmp1
226}
227
228declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
229
230declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
231declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
232declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
233declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
234
235declare <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64)
236declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
237declare <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
238
239declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
240declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
241declare <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
242
243attributes #0 = { "target-features"="+sve" }
244