xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-unary-movprfx.ll (revision caabf2a445bd7d00844cd623db5348423d458dd7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6;
7; SQABS (sve2_int_un_pred_arit)
8;
9
10; Check movprfx is not inserted when dstReg == srcReg
11define <vscale x 16 x i8> @sqabs_i8_dupreg(<vscale x 16 x i8> %a) #0 {
12; CHECK-LABEL: sqabs_i8_dupreg:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    ptrue p0.b
15; CHECK-NEXT:    sqabs z0.b, p0/m, z0.b
16; CHECK-NEXT:    ret
17  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
18  %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a)
19  ret <vscale x 16 x i8> %ret
20}
21
22; Check movprfx is inserted when passthru is undef
23define <vscale x 16 x i8> @sqabs_i8_undef(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
24; CHECK-LABEL: sqabs_i8_undef:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    ptrue p0.b
27; CHECK-NEXT:    movprfx z0, z1
28; CHECK-NEXT:    sqabs z0.b, p0/m, z1.b
29; CHECK-NEXT:    ret
30  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
31  %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
32  ret <vscale x 16 x i8> %ret
33}
34
35; Check movprfx is inserted when predicate is all active, making the passthru dead
36define <vscale x 16 x i8> @sqabs_i8_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
37; CHECK-LABEL: sqabs_i8_active:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    ptrue p0.b
40; CHECK-NEXT:    movprfx z0, z1
41; CHECK-NEXT:    sqabs z0.b, p0/m, z1.b
42; CHECK-NEXT:    ret
43  %pg = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
44  %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
45  ret <vscale x 16 x i8> %ret
46}
47
48; Check movprfx is not inserted when predicate is not all active, making the passthru used
49define <vscale x 16 x i8> @sqabs_i8_not_active(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
50; CHECK-LABEL: sqabs_i8_not_active:
51; CHECK:       // %bb.0:
52; CHECK-NEXT:    ptrue p0.d
53; CHECK-NEXT:    sqabs z0.b, p0/m, z1.b
54; CHECK-NEXT:    ret
55  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
56  %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
57  %ret = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg.to, <vscale x 16 x i8> %b)
58  ret <vscale x 16 x i8> %ret
59}
60
61define <vscale x 8 x i16> @sqabs_i16_dupreg(<vscale x 8 x i16> %a) #0 {
62; CHECK-LABEL: sqabs_i16_dupreg:
63; CHECK:       // %bb.0:
64; CHECK-NEXT:    ptrue p0.h
65; CHECK-NEXT:    sqabs z0.h, p0/m, z0.h
66; CHECK-NEXT:    ret
67  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
68  %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a)
69  ret <vscale x 8 x i16> %ret
70}
71
72define <vscale x 8 x i16> @sqabs_i16_undef(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
73; CHECK-LABEL: sqabs_i16_undef:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    ptrue p0.h
76; CHECK-NEXT:    movprfx z0, z1
77; CHECK-NEXT:    sqabs z0.h, p0/m, z1.h
78; CHECK-NEXT:    ret
79  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
80  %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
81  ret <vscale x 8 x i16> %ret
82}
83
84define <vscale x 8 x i16> @sqabs_i16_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
85; CHECK-LABEL: sqabs_i16_active:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    ptrue p0.h
88; CHECK-NEXT:    movprfx z0, z1
89; CHECK-NEXT:    sqabs z0.h, p0/m, z1.h
90; CHECK-NEXT:    ret
91  %pg = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
92  %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
93  ret <vscale x 8 x i16> %ret
94}
95
96define <vscale x 8 x i16> @sqabs_i16_not_active(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
97; CHECK-LABEL: sqabs_i16_not_active:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    ptrue p0.d
100; CHECK-NEXT:    sqabs z0.h, p0/m, z1.h
101; CHECK-NEXT:    ret
102  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
103  %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
104  %pg.from = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg.to)
105  %ret = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg.from, <vscale x 8 x i16> %b)
106  ret <vscale x 8 x i16> %ret
107}
108
109define <vscale x 4 x i32> @sqabs_i32_dupreg(<vscale x 4 x i32> %a) #0 {
110; CHECK-LABEL: sqabs_i32_dupreg:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    ptrue p0.s
113; CHECK-NEXT:    sqabs z0.s, p0/m, z0.s
114; CHECK-NEXT:    ret
115  %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
116  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
117  ret <vscale x 4 x i32> %ret
118}
119
120define <vscale x 4 x i32> @sqabs_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
121; CHECK-LABEL: sqabs_i32_undef:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ptrue p0.s
124; CHECK-NEXT:    movprfx z0, z1
125; CHECK-NEXT:    sqabs z0.s, p0/m, z1.s
126; CHECK-NEXT:    ret
127  %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
128  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
129  ret <vscale x 4 x i32> %ret
130}
131
132define <vscale x 4 x i32> @sqabs_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
133; CHECK-LABEL: sqabs_i32_active:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    ptrue p0.s
136; CHECK-NEXT:    movprfx z0, z1
137; CHECK-NEXT:    sqabs z0.s, p0/m, z1.s
138; CHECK-NEXT:    ret
139  %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
140  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
141  ret <vscale x 4 x i32> %ret
142}
143
144define <vscale x 4 x i32> @sqabs_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
145; CHECK-LABEL: sqabs_i32_not_active:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ptrue p0.d
148; CHECK-NEXT:    sqabs z0.s, p0/m, z1.s
149; CHECK-NEXT:    ret
150  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
151  %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
152  %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
153  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
154  ret <vscale x 4 x i32> %ret
155}
156
157define <vscale x 2 x i64> @sqabs_i64_dupreg(<vscale x 2 x i64> %a) #0 {
158; CHECK-LABEL: sqabs_i64_dupreg:
159; CHECK:       // %bb.0:
160; CHECK-NEXT:    ptrue p0.d
161; CHECK-NEXT:    sqabs z0.d, p0/m, z0.d
162; CHECK-NEXT:    ret
163  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
164  %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a)
165  ret <vscale x 2 x i64> %ret
166}
167
168define <vscale x 2 x i64> @sqabs_i64_undef(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
169; CHECK-LABEL: sqabs_i64_undef:
170; CHECK:       // %bb.0:
171; CHECK-NEXT:    ptrue p0.d
172; CHECK-NEXT:    movprfx z0, z1
173; CHECK-NEXT:    sqabs z0.d, p0/m, z1.d
174; CHECK-NEXT:    ret
175  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
176  %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
177  ret <vscale x 2 x i64> %ret
178}
179
180define <vscale x 2 x i64> @sqabs_i64_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
181; CHECK-LABEL: sqabs_i64_active:
182; CHECK:       // %bb.0:
183; CHECK-NEXT:    ptrue p0.d
184; CHECK-NEXT:    movprfx z0, z1
185; CHECK-NEXT:    sqabs z0.d, p0/m, z1.d
186; CHECK-NEXT:    ret
187  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
188  %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
189  ret <vscale x 2 x i64> %ret
190}
191
192define <vscale x 2 x i64> @sqabs_i64_not_active(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %pg) #0 {
193; CHECK-LABEL: sqabs_i64_not_active:
194; CHECK:       // %bb.0:
195; CHECK-NEXT:    sqabs z0.d, p0/m, z1.d
196; CHECK-NEXT:    ret
197  %ret = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
198  ret <vscale x 2 x i64> %ret
199}
200
201;
202; URECPE (sve2_int_un_pred_arit_s)
203;
204
205define <vscale x 4 x i32> @urecpe_i32_dupreg(<vscale x 4 x i32> %a) #0 {
206; CHECK-LABEL: urecpe_i32_dupreg:
207; CHECK:       // %bb.0:
208; CHECK-NEXT:    ptrue p0.s
209; CHECK-NEXT:    urecpe z0.s, p0/m, z0.s
210; CHECK-NEXT:    ret
211  %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
212  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a)
213  ret <vscale x 4 x i32> %ret
214}
215
216define <vscale x 4 x i32> @urecpe_i32_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
217; CHECK-LABEL: urecpe_i32_undef:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    ptrue p0.s
220; CHECK-NEXT:    movprfx z0, z1
221; CHECK-NEXT:    urecpe z0.s, p0/m, z1.s
222; CHECK-NEXT:    ret
223  %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
224  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
225  ret <vscale x 4 x i32> %ret
226}
227
228define <vscale x 4 x i32> @urecpe_i32_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
229; CHECK-LABEL: urecpe_i32_active:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    ptrue p0.s
232; CHECK-NEXT:    movprfx z0, z1
233; CHECK-NEXT:    urecpe z0.s, p0/m, z1.s
234; CHECK-NEXT:    ret
235  %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
236  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
237  ret <vscale x 4 x i32> %ret
238}
239
240define <vscale x 4 x i32> @urecpe_i32_not_active(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
241; CHECK-LABEL: urecpe_i32_not_active:
242; CHECK:       // %bb.0:
243; CHECK-NEXT:    ptrue p0.d
244; CHECK-NEXT:    urecpe z0.s, p0/m, z1.s
245; CHECK-NEXT:    ret
246  %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
247  %pg.to = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
248  %pg.from = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.to)
249  %ret = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg.from, <vscale x 4 x i32> %b)
250  ret <vscale x 4 x i32> %ret
251}
252
253declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
254declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
255declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
256declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
257
258declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
259declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
260declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
261
262declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
263declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
264declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
265
266declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
267declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
268declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
269declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
270
271declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
272
273attributes #0 = { nounwind "target-features"="+sve2" }
274