xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -o - | FileCheck --check-prefixes=CHECK,CHECK_NO_EXTEND_ROUND %s
3; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK_EXTEND_ROUND %s
4
5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6
7;============ v2f32
8
9define <vscale x 2 x float> @test_copysign_v2f32_v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 {
10; CHECK-LABEL: test_copysign_v2f32_v2f32:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    mov z2.s, #0x7fffffff
13; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
14; CHECK-NEXT:    ret
15  %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
16  ret <vscale x 2 x float> %r
17}
18
19define <vscale x 2 x float> @test_copysign_v2f32_v2f64(<vscale x 2 x float> %a, <vscale x 2 x double> %b) #0 {
20; CHECK-LABEL: test_copysign_v2f32_v2f64:
21; CHECK:       // %bb.0:
22; CHECK-NEXT:    ptrue p0.d
23; CHECK-NEXT:    mov z2.s, #0x7fffffff
24; CHECK-NEXT:    fcvt z1.s, p0/m, z1.d
25; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
26; CHECK-NEXT:    ret
27  %tmp0 = fptrunc <vscale x 2 x double> %b to <vscale x 2 x float>
28  %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %tmp0)
29  ret <vscale x 2 x float> %r
30}
31
32declare <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0
33
34;============ v4f32
35
36define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
37; CHECK-LABEL: test_copysign_v4f32_v4f32:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    mov z2.s, #0x7fffffff
40; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
41; CHECK-NEXT:    ret
42  %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
43  ret <vscale x 4 x float> %r
44}
45
46; SplitVecOp #1
47define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 {
48; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64:
49; CHECK_NO_EXTEND_ROUND:       // %bb.0:
50; CHECK_NO_EXTEND_ROUND-NEXT:    ptrue p0.d
51; CHECK_NO_EXTEND_ROUND-NEXT:    mov z3.s, #0x7fffffff
52; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z2.s, p0/m, z2.d
53; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z1.s, p0/m, z1.d
54; CHECK_NO_EXTEND_ROUND-NEXT:    uzp1 z1.s, z1.s, z2.s
55; CHECK_NO_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z1.d, z3.d
56; CHECK_NO_EXTEND_ROUND-NEXT:    ret
57;
58; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64:
59; CHECK_EXTEND_ROUND:       // %bb.0:
60; CHECK_EXTEND_ROUND-NEXT:    ptrue p0.d
61; CHECK_EXTEND_ROUND-NEXT:    uunpkhi z3.d, z0.s
62; CHECK_EXTEND_ROUND-NEXT:    mov z4.s, #0x7fffffff
63; CHECK_EXTEND_ROUND-NEXT:    uunpklo z0.d, z0.s
64; CHECK_EXTEND_ROUND-NEXT:    fcvt z2.s, p0/m, z2.d
65; CHECK_EXTEND_ROUND-NEXT:    fcvt z1.s, p0/m, z1.d
66; CHECK_EXTEND_ROUND-NEXT:    bsl z3.d, z3.d, z2.d, z4.d
67; CHECK_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z1.d, z4.d
68; CHECK_EXTEND_ROUND-NEXT:    uzp1 z0.s, z0.s, z3.s
69; CHECK_EXTEND_ROUND-NEXT:    ret
70  %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float>
71  %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0)
72  ret <vscale x 4 x float> %r
73}
74
75declare <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0
76
77;============ v2f64
78
79define <vscale x 2 x double> @test_copysign_v2f64_v232(<vscale x 2 x double> %a, <vscale x 2 x float> %b) #0 {
80; CHECK-LABEL: test_copysign_v2f64_v232:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    ptrue p0.d
83; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
84; CHECK-NEXT:    fcvt z1.d, p0/m, z1.s
85; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
86; CHECK-NEXT:    ret
87  %tmp0 = fpext <vscale x 2 x float> %b to <vscale x 2 x double>
88  %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %tmp0)
89  ret <vscale x 2 x double> %r
90}
91
92define <vscale x 2 x double> @test_copysign_v2f64_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
93; CHECK-LABEL: test_copysign_v2f64_v2f64:
94; CHECK:       // %bb.0:
95; CHECK-NEXT:    mov z2.d, #0x7fffffffffffffff
96; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
97; CHECK-NEXT:    ret
98  %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
99  ret <vscale x 2 x double> %r
100}
101
102declare <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0
103
104;============ v4f64
105
106; SplitVecRes mismatched
107define <vscale x 4 x double> @test_copysign_v4f64_v4f32(<vscale x 4 x double> %a, <vscale x 4 x float> %b) #0 {
108; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
109; CHECK_NO_EXTEND_ROUND:       // %bb.0:
110; CHECK_NO_EXTEND_ROUND-NEXT:    uunpkhi z3.d, z2.s
111; CHECK_NO_EXTEND_ROUND-NEXT:    uunpklo z2.d, z2.s
112; CHECK_NO_EXTEND_ROUND-NEXT:    ptrue p0.d
113; CHECK_NO_EXTEND_ROUND-NEXT:    mov z4.d, #0x7fffffffffffffff
114; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z3.d, p0/m, z3.s
115; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z2.d, p0/m, z2.s
116; CHECK_NO_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z2.d, z4.d
117; CHECK_NO_EXTEND_ROUND-NEXT:    bsl z1.d, z1.d, z3.d, z4.d
118; CHECK_NO_EXTEND_ROUND-NEXT:    ret
119;
120; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32:
121; CHECK_EXTEND_ROUND:       // %bb.0:
122; CHECK_EXTEND_ROUND-NEXT:    uunpkhi z3.d, z2.s
123; CHECK_EXTEND_ROUND-NEXT:    uunpklo z2.d, z2.s
124; CHECK_EXTEND_ROUND-NEXT:    ptrue p0.d
125; CHECK_EXTEND_ROUND-NEXT:    mov z4.d, #0x7fffffffffffffff
126; CHECK_EXTEND_ROUND-NEXT:    fcvt z2.d, p0/m, z2.s
127; CHECK_EXTEND_ROUND-NEXT:    fcvt z3.d, p0/m, z3.s
128; CHECK_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z2.d, z4.d
129; CHECK_EXTEND_ROUND-NEXT:    bsl z1.d, z1.d, z3.d, z4.d
130; CHECK_EXTEND_ROUND-NEXT:    ret
131  %tmp0 = fpext <vscale x 4 x float> %b to <vscale x 4 x double>
132  %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %tmp0)
133  ret <vscale x 4 x double> %r
134}
135
136; SplitVecRes same
137define <vscale x 4 x double> @test_copysign_v4f64_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0 {
138; CHECK-LABEL: test_copysign_v4f64_v4f64:
139; CHECK:       // %bb.0:
140; CHECK-NEXT:    mov z4.d, #0x7fffffffffffffff
141; CHECK-NEXT:    bsl z0.d, z0.d, z2.d, z4.d
142; CHECK-NEXT:    bsl z1.d, z1.d, z3.d, z4.d
143; CHECK-NEXT:    ret
144  %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
145  ret <vscale x 4 x double> %r
146}
147
148declare <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0
149
150;============ v4f16
151
152define <vscale x 4 x half> @test_copysign_v4f16_v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 {
153; CHECK-LABEL: test_copysign_v4f16_v4f16:
154; CHECK:       // %bb.0:
155; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
156; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
157; CHECK-NEXT:    ret
158  %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
159  ret <vscale x 4 x half> %r
160}
161
162define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <vscale x 4 x float> %b) #0 {
163; CHECK-LABEL: test_copysign_v4f16_v4f32:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    ptrue p0.s
166; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
167; CHECK-NEXT:    fcvt z1.h, p0/m, z1.s
168; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
169; CHECK-NEXT:    ret
170  %tmp0 = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half>
171  %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0)
172  ret <vscale x 4 x half> %r
173}
174
175define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 {
176; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64:
177; CHECK_NO_EXTEND_ROUND:       // %bb.0:
178; CHECK_NO_EXTEND_ROUND-NEXT:    ptrue p0.d
179; CHECK_NO_EXTEND_ROUND-NEXT:    mov z3.h, #32767 // =0x7fff
180; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z2.h, p0/m, z2.d
181; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z1.h, p0/m, z1.d
182; CHECK_NO_EXTEND_ROUND-NEXT:    uzp1 z1.s, z1.s, z2.s
183; CHECK_NO_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z1.d, z3.d
184; CHECK_NO_EXTEND_ROUND-NEXT:    ret
185;
186; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64:
187; CHECK_EXTEND_ROUND:       // %bb.0:
188; CHECK_EXTEND_ROUND-NEXT:    ptrue p0.d
189; CHECK_EXTEND_ROUND-NEXT:    uunpkhi z3.d, z0.s
190; CHECK_EXTEND_ROUND-NEXT:    mov z4.h, #32767 // =0x7fff
191; CHECK_EXTEND_ROUND-NEXT:    uunpklo z0.d, z0.s
192; CHECK_EXTEND_ROUND-NEXT:    fcvt z2.h, p0/m, z2.d
193; CHECK_EXTEND_ROUND-NEXT:    fcvt z1.h, p0/m, z1.d
194; CHECK_EXTEND_ROUND-NEXT:    bsl z3.d, z3.d, z2.d, z4.d
195; CHECK_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z1.d, z4.d
196; CHECK_EXTEND_ROUND-NEXT:    uzp1 z0.s, z0.s, z3.s
197; CHECK_EXTEND_ROUND-NEXT:    ret
198  %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x half>
199  %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0)
200  ret <vscale x 4 x half> %r
201}
202
203declare <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0
204
205;============ v8f16
206
207define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
208; CHECK-LABEL: test_copysign_v8f16_v8f16:
209; CHECK:       // %bb.0:
210; CHECK-NEXT:    mov z2.h, #32767 // =0x7fff
211; CHECK-NEXT:    bsl z0.d, z0.d, z1.d, z2.d
212; CHECK-NEXT:    ret
213  %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
214  ret <vscale x 8 x half> %r
215}
216
217define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <vscale x 8 x float> %b) #0 {
218; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32:
219; CHECK_NO_EXTEND_ROUND:       // %bb.0:
220; CHECK_NO_EXTEND_ROUND-NEXT:    ptrue p0.s
221; CHECK_NO_EXTEND_ROUND-NEXT:    mov z3.h, #32767 // =0x7fff
222; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z2.h, p0/m, z2.s
223; CHECK_NO_EXTEND_ROUND-NEXT:    fcvt z1.h, p0/m, z1.s
224; CHECK_NO_EXTEND_ROUND-NEXT:    uzp1 z1.h, z1.h, z2.h
225; CHECK_NO_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z1.d, z3.d
226; CHECK_NO_EXTEND_ROUND-NEXT:    ret
227;
228; CHECK_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32:
229; CHECK_EXTEND_ROUND:       // %bb.0:
230; CHECK_EXTEND_ROUND-NEXT:    ptrue p0.s
231; CHECK_EXTEND_ROUND-NEXT:    uunpkhi z3.s, z0.h
232; CHECK_EXTEND_ROUND-NEXT:    mov z4.h, #32767 // =0x7fff
233; CHECK_EXTEND_ROUND-NEXT:    uunpklo z0.s, z0.h
234; CHECK_EXTEND_ROUND-NEXT:    fcvt z2.h, p0/m, z2.s
235; CHECK_EXTEND_ROUND-NEXT:    fcvt z1.h, p0/m, z1.s
236; CHECK_EXTEND_ROUND-NEXT:    bsl z3.d, z3.d, z2.d, z4.d
237; CHECK_EXTEND_ROUND-NEXT:    bsl z0.d, z0.d, z1.d, z4.d
238; CHECK_EXTEND_ROUND-NEXT:    uzp1 z0.h, z0.h, z3.h
239; CHECK_EXTEND_ROUND-NEXT:    ret
240  %tmp0 = fptrunc <vscale x 8 x float> %b to <vscale x 8 x half>
241  %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %tmp0)
242  ret <vscale x 8 x half> %r
243}
244
245declare <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0
246
247attributes #0 = { nounwind }
248