xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fcopysign.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck --check-prefixes=CHECK,CHECK-NO-EXTEND-ROUND %s
3; RUN: llc < %s -mtriple=aarch64 -mattr=+sve --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK-EXTEND-ROUND %s
4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
6;============ v2f32
7
8define <vscale x 2 x float> @test_copysign_v2f32_v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
9; CHECK-LABEL: test_copysign_v2f32_v2f32:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    and z1.s, z1.s, #0x80000000
12; CHECK-NEXT:    and z0.s, z0.s, #0x7fffffff
13; CHECK-NEXT:    orr z0.d, z0.d, z1.d
14; CHECK-NEXT:    ret
15  %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
16  ret <vscale x 2 x float> %r
17}
18
19define <vscale x 2 x float> @test_copysign_v2f32_v2f64(<vscale x 2 x float> %a, <vscale x 2 x double> %b) #0 {
20; CHECK-LABEL: test_copysign_v2f32_v2f64:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    ptrue p0.d
23; CHECK-NEXT:    and z0.s, z0.s, #0x7fffffff
24; CHECK-NEXT:    fcvt z1.s, p0/m, z1.d
25; CHECK-NEXT:    and z1.s, z1.s, #0x80000000
26; CHECK-NEXT:    orr z0.d, z0.d, z1.d
27; CHECK-NEXT:    ret
28  %tmp0 = fptrunc <vscale x 2 x double> %b to <vscale x 2 x float>
29  %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %tmp0)
30  ret <vscale x 2 x float> %r
31}
32
33declare <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0
34
35;============ v4f32
36
37define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
38; CHECK-LABEL: test_copysign_v4f32_v4f32:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    and z1.s, z1.s, #0x80000000
41; CHECK-NEXT:    and z0.s, z0.s, #0x7fffffff
42; CHECK-NEXT:    orr z0.d, z0.d, z1.d
43; CHECK-NEXT:    ret
44  %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
45  ret <vscale x 4 x float> %r
46}
47
48; SplitVecOp #1
49define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 {
50; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
51; CHECK-NO-EXTEND-ROUND:       // %bb.0:
52; CHECK-NO-EXTEND-ROUND-NEXT:    ptrue p0.d
53; CHECK-NO-EXTEND-ROUND-NEXT:    and z0.s, z0.s, #0x7fffffff
54; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z2.s, p0/m, z2.d
55; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z1.s, p0/m, z1.d
56; CHECK-NO-EXTEND-ROUND-NEXT:    uzp1 z1.s, z1.s, z2.s
57; CHECK-NO-EXTEND-ROUND-NEXT:    and z1.s, z1.s, #0x80000000
58; CHECK-NO-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
59; CHECK-NO-EXTEND-ROUND-NEXT:    ret
60;
61; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64:
62; CHECK-EXTEND-ROUND:       // %bb.0:
63; CHECK-EXTEND-ROUND-NEXT:    ptrue p0.d
64; CHECK-EXTEND-ROUND-NEXT:    uunpkhi z3.d, z0.s
65; CHECK-EXTEND-ROUND-NEXT:    uunpklo z0.d, z0.s
66; CHECK-EXTEND-ROUND-NEXT:    fcvt z2.s, p0/m, z2.d
67; CHECK-EXTEND-ROUND-NEXT:    fcvt z1.s, p0/m, z1.d
68; CHECK-EXTEND-ROUND-NEXT:    and z3.s, z3.s, #0x7fffffff
69; CHECK-EXTEND-ROUND-NEXT:    and z0.s, z0.s, #0x7fffffff
70; CHECK-EXTEND-ROUND-NEXT:    and z2.s, z2.s, #0x80000000
71; CHECK-EXTEND-ROUND-NEXT:    and z1.s, z1.s, #0x80000000
72; CHECK-EXTEND-ROUND-NEXT:    orr z2.d, z3.d, z2.d
73; CHECK-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
74; CHECK-EXTEND-ROUND-NEXT:    uzp1 z0.s, z0.s, z2.s
75; CHECK-EXTEND-ROUND-NEXT:    ret
76  %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float>
77  %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0)
78  ret <vscale x 4 x float> %r
79}
80
81declare <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0
82
83;============ v2f64
84
85define <vscale x 2 x double> @test_copysign_v2f64_v232(<vscale x 2 x double> %a, <vscale x 2 x float> %b) #0 {
86; CHECK-LABEL: test_copysign_v2f64_v232:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    ptrue p0.d
89; CHECK-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
90; CHECK-NEXT:    fcvt z1.d, p0/m, z1.s
91; CHECK-NEXT:    and z1.d, z1.d, #0x8000000000000000
92; CHECK-NEXT:    orr z0.d, z0.d, z1.d
93; CHECK-NEXT:    ret
94  %tmp0 = fpext <vscale x 2 x float> %b to <vscale x 2 x double>
95  %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %tmp0)
96  ret <vscale x 2 x double> %r
97}
98
99define <vscale x 2 x double> @test_copysign_v2f64_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
100; CHECK-LABEL: test_copysign_v2f64_v2f64:
101; CHECK:       // %bb.0:
102; CHECK-NEXT:    and z1.d, z1.d, #0x8000000000000000
103; CHECK-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
104; CHECK-NEXT:    orr z0.d, z0.d, z1.d
105; CHECK-NEXT:    ret
106  %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
107  ret <vscale x 2 x double> %r
108}
109
110declare <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0
111
112;============ v4f64
113
114; SplitVecRes mismatched
115define <vscale x 4 x double> @test_copysign_v4f64_v4f32(<vscale x 4 x double> %a, <vscale x 4 x float> %b) #0 {
116; CHECK-LABEL: test_copysign_v4f64_v4f32:
117; CHECK:       // %bb.0:
118; CHECK-NEXT:    uunpklo z3.d, z2.s
119; CHECK-NEXT:    uunpkhi z2.d, z2.s
120; CHECK-NEXT:    ptrue p0.d
121; CHECK-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
122; CHECK-NEXT:    and z1.d, z1.d, #0x7fffffffffffffff
123; CHECK-NEXT:    fcvt z3.d, p0/m, z3.s
124; CHECK-NEXT:    fcvt z2.d, p0/m, z2.s
125; CHECK-NEXT:    and z3.d, z3.d, #0x8000000000000000
126; CHECK-NEXT:    and z2.d, z2.d, #0x8000000000000000
127; CHECK-NEXT:    orr z0.d, z0.d, z3.d
128; CHECK-NEXT:    orr z1.d, z1.d, z2.d
129; CHECK-NEXT:    ret
130  %tmp0 = fpext <vscale x 4 x float> %b to <vscale x 4 x double>
131  %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %tmp0)
132  ret <vscale x 4 x double> %r
133}
134
135; SplitVecRes same
136define <vscale x 4 x double> @test_copysign_v4f64_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0 {
137; CHECK-LABEL: test_copysign_v4f64_v4f64:
138; CHECK:       // %bb.0:
139; CHECK-NEXT:    and z2.d, z2.d, #0x8000000000000000
140; CHECK-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
141; CHECK-NEXT:    and z3.d, z3.d, #0x8000000000000000
142; CHECK-NEXT:    and z1.d, z1.d, #0x7fffffffffffffff
143; CHECK-NEXT:    orr z0.d, z0.d, z2.d
144; CHECK-NEXT:    orr z1.d, z1.d, z3.d
145; CHECK-NEXT:    ret
146  %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
147  ret <vscale x 4 x double> %r
148}
149
150declare <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0
151
152;============ v4f16
153
154define <vscale x 4 x half> @test_copysign_v4f16_v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
155; CHECK-LABEL: test_copysign_v4f16_v4f16:
156; CHECK:       // %bb.0:
157; CHECK-NEXT:    and z1.h, z1.h, #0x8000
158; CHECK-NEXT:    and z0.h, z0.h, #0x7fff
159; CHECK-NEXT:    orr z0.d, z0.d, z1.d
160; CHECK-NEXT:    ret
161  %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
162  ret <vscale x 4 x half> %r
163}
164
165define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <vscale x 4 x float> %b) #0 {
166; CHECK-LABEL: test_copysign_v4f16_v4f32:
167; CHECK:       // %bb.0:
168; CHECK-NEXT:    ptrue p0.s
169; CHECK-NEXT:    and z0.h, z0.h, #0x7fff
170; CHECK-NEXT:    fcvt z1.h, p0/m, z1.s
171; CHECK-NEXT:    and z1.h, z1.h, #0x8000
172; CHECK-NEXT:    orr z0.d, z0.d, z1.d
173; CHECK-NEXT:    ret
174  %tmp0 = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half>
175  %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0)
176  ret <vscale x 4 x half> %r
177}
178
179define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 {
180; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
181; CHECK-NO-EXTEND-ROUND:       // %bb.0:
182; CHECK-NO-EXTEND-ROUND-NEXT:    ptrue p0.d
183; CHECK-NO-EXTEND-ROUND-NEXT:    and z0.h, z0.h, #0x7fff
184; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z2.h, p0/m, z2.d
185; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z1.h, p0/m, z1.d
186; CHECK-NO-EXTEND-ROUND-NEXT:    uzp1 z1.s, z1.s, z2.s
187; CHECK-NO-EXTEND-ROUND-NEXT:    and z1.h, z1.h, #0x8000
188; CHECK-NO-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
189; CHECK-NO-EXTEND-ROUND-NEXT:    ret
190;
191; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64:
192; CHECK-EXTEND-ROUND:       // %bb.0:
193; CHECK-EXTEND-ROUND-NEXT:    ptrue p0.d
194; CHECK-EXTEND-ROUND-NEXT:    uunpkhi z3.d, z0.s
195; CHECK-EXTEND-ROUND-NEXT:    uunpklo z0.d, z0.s
196; CHECK-EXTEND-ROUND-NEXT:    fcvt z2.h, p0/m, z2.d
197; CHECK-EXTEND-ROUND-NEXT:    fcvt z1.h, p0/m, z1.d
198; CHECK-EXTEND-ROUND-NEXT:    and z3.h, z3.h, #0x7fff
199; CHECK-EXTEND-ROUND-NEXT:    and z0.h, z0.h, #0x7fff
200; CHECK-EXTEND-ROUND-NEXT:    and z2.h, z2.h, #0x8000
201; CHECK-EXTEND-ROUND-NEXT:    and z1.h, z1.h, #0x8000
202; CHECK-EXTEND-ROUND-NEXT:    orr z2.d, z3.d, z2.d
203; CHECK-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
204; CHECK-EXTEND-ROUND-NEXT:    uzp1 z0.s, z0.s, z2.s
205; CHECK-EXTEND-ROUND-NEXT:    ret
206  %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x half>
207  %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0)
208  ret <vscale x 4 x half> %r
209}
210
211declare <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0
212
213;============ v8f16
214
215define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
216; CHECK-LABEL: test_copysign_v8f16_v8f16:
217; CHECK:       // %bb.0:
218; CHECK-NEXT:    and z1.h, z1.h, #0x8000
219; CHECK-NEXT:    and z0.h, z0.h, #0x7fff
220; CHECK-NEXT:    orr z0.d, z0.d, z1.d
221; CHECK-NEXT:    ret
222  %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
223  ret <vscale x 8 x half> %r
224}
225
226define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <vscale x 8 x float> %b) #0 {
227; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
228; CHECK-NO-EXTEND-ROUND:       // %bb.0:
229; CHECK-NO-EXTEND-ROUND-NEXT:    ptrue p0.s
230; CHECK-NO-EXTEND-ROUND-NEXT:    and z0.h, z0.h, #0x7fff
231; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z2.h, p0/m, z2.s
232; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z1.h, p0/m, z1.s
233; CHECK-NO-EXTEND-ROUND-NEXT:    uzp1 z1.h, z1.h, z2.h
234; CHECK-NO-EXTEND-ROUND-NEXT:    and z1.h, z1.h, #0x8000
235; CHECK-NO-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
236; CHECK-NO-EXTEND-ROUND-NEXT:    ret
237;
238; CHECK-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32:
239; CHECK-EXTEND-ROUND:       // %bb.0:
240; CHECK-EXTEND-ROUND-NEXT:    ptrue p0.s
241; CHECK-EXTEND-ROUND-NEXT:    uunpkhi z3.s, z0.h
242; CHECK-EXTEND-ROUND-NEXT:    uunpklo z0.s, z0.h
243; CHECK-EXTEND-ROUND-NEXT:    fcvt z2.h, p0/m, z2.s
244; CHECK-EXTEND-ROUND-NEXT:    fcvt z1.h, p0/m, z1.s
245; CHECK-EXTEND-ROUND-NEXT:    and z3.h, z3.h, #0x7fff
246; CHECK-EXTEND-ROUND-NEXT:    and z0.h, z0.h, #0x7fff
247; CHECK-EXTEND-ROUND-NEXT:    and z2.h, z2.h, #0x8000
248; CHECK-EXTEND-ROUND-NEXT:    and z1.h, z1.h, #0x8000
249; CHECK-EXTEND-ROUND-NEXT:    orr z2.d, z3.d, z2.d
250; CHECK-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
251; CHECK-EXTEND-ROUND-NEXT:    uzp1 z0.h, z0.h, z2.h
252; CHECK-EXTEND-ROUND-NEXT:    ret
253  %tmp0 = fptrunc <vscale x 8 x float> %b to <vscale x 8 x half>
254  %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %tmp0)
255  ret <vscale x 8 x half> %r
256}
257
258
259;========== FCOPYSIGN_EXTEND_ROUND
260
261define <vscale x 4 x half> @test_copysign_nxv4f32_nxv4f16(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
262; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
263; CHECK-NO-EXTEND-ROUND:       // %bb.0:
264; CHECK-NO-EXTEND-ROUND-NEXT:    and z1.s, z1.s, #0x80000000
265; CHECK-NO-EXTEND-ROUND-NEXT:    and z0.s, z0.s, #0x7fffffff
266; CHECK-NO-EXTEND-ROUND-NEXT:    ptrue p0.s
267; CHECK-NO-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
268; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z0.h, p0/m, z0.s
269; CHECK-NO-EXTEND-ROUND-NEXT:    ret
270;
271; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16:
272; CHECK-EXTEND-ROUND:       // %bb.0:
273; CHECK-EXTEND-ROUND-NEXT:    ptrue p0.s
274; CHECK-EXTEND-ROUND-NEXT:    fcvt z0.h, p0/m, z0.s
275; CHECK-EXTEND-ROUND-NEXT:    fcvt z1.h, p0/m, z1.s
276; CHECK-EXTEND-ROUND-NEXT:    and z1.h, z1.h, #0x8000
277; CHECK-EXTEND-ROUND-NEXT:    and z0.h, z0.h, #0x7fff
278; CHECK-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
279; CHECK-EXTEND-ROUND-NEXT:    ret
280  %t1 = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
281  %t2 = fptrunc <vscale x 4 x float> %t1 to <vscale x 4 x half>
282  ret <vscale x 4 x half> %t2
283}
284
285define <vscale x 2 x float> @test_copysign_nxv2f64_nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
286; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
287; CHECK-NO-EXTEND-ROUND:       // %bb.0:
288; CHECK-NO-EXTEND-ROUND-NEXT:    and z1.d, z1.d, #0x8000000000000000
289; CHECK-NO-EXTEND-ROUND-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
290; CHECK-NO-EXTEND-ROUND-NEXT:    ptrue p0.d
291; CHECK-NO-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
292; CHECK-NO-EXTEND-ROUND-NEXT:    fcvt z0.s, p0/m, z0.d
293; CHECK-NO-EXTEND-ROUND-NEXT:    ret
294;
295; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32:
296; CHECK-EXTEND-ROUND:       // %bb.0:
297; CHECK-EXTEND-ROUND-NEXT:    ptrue p0.d
298; CHECK-EXTEND-ROUND-NEXT:    fcvt z0.s, p0/m, z0.d
299; CHECK-EXTEND-ROUND-NEXT:    fcvt z1.s, p0/m, z1.d
300; CHECK-EXTEND-ROUND-NEXT:    and z1.s, z1.s, #0x80000000
301; CHECK-EXTEND-ROUND-NEXT:    and z0.s, z0.s, #0x7fffffff
302; CHECK-EXTEND-ROUND-NEXT:    orr z0.d, z0.d, z1.d
303; CHECK-EXTEND-ROUND-NEXT:    ret
304  %t1 = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
305  %t2 = fptrunc <vscale x 2 x double> %t1 to <vscale x 2 x float>
306  ret <vscale x 2 x float> %t2
307}
308
309declare <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0
310
311attributes #0 = { nounwind }
312