xref: /llvm-project/llvm/test/CodeGen/AArch64/vector-fcopysign.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck --check-prefixes=CHECK,NOFP16 %s
3; RUN: llc < %s -mtriple aarch64-apple-darwin -mattr=+v8.2a,+fullfp16 | FileCheck --check-prefixes=CHECK,FP16 %s
4
5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
6
7;============ v1f32
8
9; WidenVecRes same
10define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
11; CHECK-LABEL: test_copysign_v1f32_v1f32:
12; CHECK:       ; %bb.0:
13; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
14; CHECK-NEXT:    bif.8b v0, v1, v2
15; CHECK-NEXT:    ret
16  %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
17  ret <1 x float> %r
18}
19
20; WidenVecRes mismatched
21define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
22; CHECK-LABEL: test_copysign_v1f32_v1f64:
23; CHECK:       ; %bb.0:
24; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
25; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
26; CHECK-NEXT:    fcvtn v1.2s, v1.2d
27; CHECK-NEXT:    bif.8b v0, v1, v2
28; CHECK-NEXT:    ret
29  %tmp0 = fptrunc <1 x double> %b to <1 x float>
30  %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
31  ret <1 x float> %r
32}
33
34declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
35
36;============ v1f64
37
38; WidenVecOp #1
39define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
40; CHECK-LABEL: test_copysign_v1f64_v1f32:
41; CHECK:       ; %bb.0:
42; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
43; CHECK-NEXT:    fcvtl v1.2d, v1.2s
44; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
45; CHECK-NEXT:    fneg.2d v2, v2
46; CHECK-NEXT:    bif.16b v0, v1, v2
47; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
48; CHECK-NEXT:    ret
49  %tmp0 = fpext <1 x float> %b to <1 x double>
50  %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
51  ret <1 x double> %r
52}
53
54define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
55; CHECK-LABEL: test_copysign_v1f64_v1f64:
56; CHECK:       ; %bb.0:
57; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
58; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
59; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
60; CHECK-NEXT:    fneg.2d v2, v2
61; CHECK-NEXT:    bif.16b v0, v1, v2
62; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
63; CHECK-NEXT:    ret
64  %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
65  ret <1 x double> %r
66}
67
68declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
69
70;============ v2f32
71
72define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
73; CHECK-LABEL: test_copysign_v2f32_v2f32:
74; CHECK:       ; %bb.0:
75; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
76; CHECK-NEXT:    bif.8b v0, v1, v2
77; CHECK-NEXT:    ret
78  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
79  ret <2 x float> %r
80}
81
82define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
83; CHECK-LABEL: test_copysign_v2f32_v2f64:
84; CHECK:       ; %bb.0:
85; CHECK-NEXT:    fcvtn v1.2s, v1.2d
86; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
87; CHECK-NEXT:    bif.8b v0, v1, v2
88; CHECK-NEXT:    ret
89  %tmp0 = fptrunc <2 x double> %b to <2 x float>
90  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
91  ret <2 x float> %r
92}
93
94declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
95
96;============ v4f32
97
98define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
99; CHECK-LABEL: test_copysign_v4f32_v4f32:
100; CHECK:       ; %bb.0:
101; CHECK-NEXT:    mvni.4s v2, #128, lsl #24
102; CHECK-NEXT:    bif.16b v0, v1, v2
103; CHECK-NEXT:    ret
104  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
105  ret <4 x float> %r
106}
107
108; SplitVecOp #1
109define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
110; CHECK-LABEL: test_copysign_v4f32_v4f64:
111; CHECK:       ; %bb.0:
112; CHECK-NEXT:    fcvtn v1.2s, v1.2d
113; CHECK-NEXT:    fcvtn2 v1.4s, v2.2d
114; CHECK-NEXT:    mvni.4s v2, #128, lsl #24
115; CHECK-NEXT:    bif.16b v0, v1, v2
116; CHECK-NEXT:    ret
117  %tmp0 = fptrunc <4 x double> %b to <4 x float>
118  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
119  ret <4 x float> %r
120}
121
122declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
123
124;============ v2f64
125
126define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
127; CHECK-LABEL: test_copysign_v2f64_v232:
128; CHECK:       ; %bb.0:
129; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
130; CHECK-NEXT:    fcvtl v1.2d, v1.2s
131; CHECK-NEXT:    fneg.2d v2, v2
132; CHECK-NEXT:    bif.16b v0, v1, v2
133; CHECK-NEXT:    ret
134  %tmp0 = fpext <2 x float> %b to <2 x double>
135  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
136  ret <2 x double> %r
137}
138
139define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
140; CHECK-LABEL: test_copysign_v2f64_v2f64:
141; CHECK:       ; %bb.0:
142; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
143; CHECK-NEXT:    fneg.2d v2, v2
144; CHECK-NEXT:    bif.16b v0, v1, v2
145; CHECK-NEXT:    ret
146  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
147  ret <2 x double> %r
148}
149
150declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
151
152;============ v4f64
153
154; SplitVecRes mismatched
155define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
156; CHECK-LABEL: test_copysign_v4f64_v4f32:
157; CHECK:       ; %bb.0:
158; CHECK-NEXT:    movi.2d v3, #0xffffffffffffffff
159; CHECK-NEXT:    fcvtl v4.2d, v2.2s
160; CHECK-NEXT:    fcvtl2 v2.2d, v2.4s
161; CHECK-NEXT:    fneg.2d v3, v3
162; CHECK-NEXT:    bif.16b v1, v2, v3
163; CHECK-NEXT:    bif.16b v0, v4, v3
164; CHECK-NEXT:    ret
165  %tmp0 = fpext <4 x float> %b to <4 x double>
166  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
167  ret <4 x double> %r
168}
169
170; SplitVecRes same
171define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
172; CHECK-LABEL: test_copysign_v4f64_v4f64:
173; CHECK:       ; %bb.0:
174; CHECK-NEXT:    movi.2d v4, #0xffffffffffffffff
175; CHECK-NEXT:    fneg.2d v4, v4
176; CHECK-NEXT:    bif.16b v0, v2, v4
177; CHECK-NEXT:    bif.16b v1, v3, v4
178; CHECK-NEXT:    ret
179  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
180  ret <4 x double> %r
181}
182
183declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
184
185;============ v4f16
186
187define <4 x half> @test_copysign_v4f16_v4f16(<4 x half> %a, <4 x half> %b) #0 {
188; CHECK-LABEL: test_copysign_v4f16_v4f16:
189; CHECK:       ; %bb.0:
190; CHECK-NEXT:    mvni.4h v2, #128, lsl #8
191; CHECK-NEXT:    bif.8b v0, v1, v2
192; CHECK-NEXT:    ret
193  %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
194  ret <4 x half> %r
195}
196
197define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 {
198; CHECK-LABEL: test_copysign_v4f16_v4f32:
199; CHECK:       ; %bb.0:
200; CHECK-NEXT:    fcvtn v1.4h, v1.4s
201; CHECK-NEXT:    mvni.4h v2, #128, lsl #8
202; CHECK-NEXT:    bif.8b v0, v1, v2
203; CHECK-NEXT:    ret
204  %tmp0 = fptrunc <4 x float> %b to <4 x half>
205  %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
206  ret <4 x half> %r
207}
208
209define <4 x half> @test_copysign_v4f16_v4f64(<4 x half> %a, <4 x double> %b) #0 {
210; CHECK-LABEL: test_copysign_v4f16_v4f64:
211; CHECK:       ; %bb.0:
212; CHECK-NEXT:    fcvtxn v1.2s, v1.2d
213; CHECK-NEXT:    fcvtxn2 v1.4s, v2.2d
214; CHECK-NEXT:    mvni.4h v2, #128, lsl #8
215; CHECK-NEXT:    fcvtn v1.4h, v1.4s
216; CHECK-NEXT:    bif.8b v0, v1, v2
217; CHECK-NEXT:    ret
218  %tmp0 = fptrunc <4 x double> %b to <4 x half>
219  %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
220  ret <4 x half> %r
221}
222
223declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
224
225;============ v8f16
226
227define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 {
228; CHECK-LABEL: test_copysign_v8f16_v8f16:
229; CHECK:       ; %bb.0:
230; CHECK-NEXT:    mvni.8h v2, #128, lsl #8
231; CHECK-NEXT:    bif.16b v0, v1, v2
232; CHECK-NEXT:    ret
233  %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
234  ret <8 x half> %r
235}
236
237define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 {
238; CHECK-LABEL: test_copysign_v8f16_v8f32:
239; CHECK:       ; %bb.0:
240; CHECK-NEXT:    fcvtn v1.4h, v1.4s
241; CHECK-NEXT:    fcvtn2 v1.8h, v2.4s
242; CHECK-NEXT:    mvni.8h v2, #128, lsl #8
243; CHECK-NEXT:    bif.16b v0, v1, v2
244; CHECK-NEXT:    ret
245  %tmp0 = fptrunc <8 x float> %b to <8 x half>
246  %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
247  ret <8 x half> %r
248}
249
250declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
251
252;============ v4bf16
253
254define <4 x bfloat> @test_copysign_v4bf16_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) #0 {
255; CHECK-LABEL: test_copysign_v4bf16_v4bf16:
256; CHECK:       ; %bb.0:
257; CHECK-NEXT:    mvni.4h v2, #128, lsl #8
258; CHECK-NEXT:    bif.8b v0, v1, v2
259; CHECK-NEXT:    ret
260  %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b)
261  ret <4 x bfloat> %r
262}
263
264define <4 x bfloat> @test_copysign_v4bf16_v4f32(<4 x bfloat> %a, <4 x float> %b) #0 {
265; CHECK-LABEL: test_copysign_v4bf16_v4f32:
266; CHECK:       ; %bb.0:
267; CHECK-NEXT:    movi.4s v2, #1
268; CHECK-NEXT:    movi.4s v3, #127, msl #8
269; CHECK-NEXT:    ushr.4s v4, v1, #16
270; CHECK-NEXT:    and.16b v2, v4, v2
271; CHECK-NEXT:    add.4s v3, v1, v3
272; CHECK-NEXT:    fcmeq.4s v4, v1, v1
273; CHECK-NEXT:    orr.4s v1, #64, lsl #16
274; CHECK-NEXT:    add.4s v2, v2, v3
275; CHECK-NEXT:    bit.16b v1, v2, v4
276; CHECK-NEXT:    mvni.4h v2, #128, lsl #8
277; CHECK-NEXT:    shrn.4h v1, v1, #16
278; CHECK-NEXT:    bif.8b v0, v1, v2
279; CHECK-NEXT:    ret
280  %tmp0 = fptrunc <4 x float> %b to <4 x bfloat>
281  %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %tmp0)
282  ret <4 x bfloat> %r
283}
284
285define <4 x bfloat> @test_copysign_v4bf16_v4f64(<4 x bfloat> %a, <4 x double> %b) #0 {
286; CHECK-LABEL: test_copysign_v4bf16_v4f64:
287; CHECK:       ; %bb.0:
288; CHECK-NEXT:    fcvtxn v1.2s, v1.2d
289; CHECK-NEXT:    movi.4s v3, #127, msl #8
290; CHECK-NEXT:    fcvtxn2 v1.4s, v2.2d
291; CHECK-NEXT:    movi.4s v2, #1
292; CHECK-NEXT:    ushr.4s v4, v1, #16
293; CHECK-NEXT:    add.4s v3, v1, v3
294; CHECK-NEXT:    and.16b v2, v4, v2
295; CHECK-NEXT:    fcmeq.4s v4, v1, v1
296; CHECK-NEXT:    orr.4s v1, #64, lsl #16
297; CHECK-NEXT:    add.4s v2, v2, v3
298; CHECK-NEXT:    bit.16b v1, v2, v4
299; CHECK-NEXT:    mvni.4h v2, #128, lsl #8
300; CHECK-NEXT:    shrn.4h v1, v1, #16
301; CHECK-NEXT:    bif.8b v0, v1, v2
302; CHECK-NEXT:    ret
303  %tmp0 = fptrunc <4 x double> %b to <4 x bfloat>
304  %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %tmp0)
305  ret <4 x bfloat> %r
306}
307
308declare <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) #0
309
310;============ v8bf16
311
312define <8 x bfloat> @test_copysign_v8bf16_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
313; CHECK-LABEL: test_copysign_v8bf16_v8bf16:
314; CHECK:       ; %bb.0:
315; CHECK-NEXT:    mvni.8h v2, #128, lsl #8
316; CHECK-NEXT:    bif.16b v0, v1, v2
317; CHECK-NEXT:    ret
318  %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
319  ret <8 x bfloat> %r
320}
321
322define <8 x bfloat> @test_copysign_v8bf16_v8f32(<8 x bfloat> %a, <8 x float> %b) #0 {
323; CHECK-LABEL: test_copysign_v8bf16_v8f32:
324; CHECK:       ; %bb.0:
325; CHECK-NEXT:    movi.4s v3, #1
326; CHECK-NEXT:    movi.4s v4, #127, msl #8
327; CHECK-NEXT:    ushr.4s v5, v2, #16
328; CHECK-NEXT:    ushr.4s v6, v1, #16
329; CHECK-NEXT:    and.16b v5, v5, v3
330; CHECK-NEXT:    add.4s v7, v2, v4
331; CHECK-NEXT:    and.16b v3, v6, v3
332; CHECK-NEXT:    add.4s v4, v1, v4
333; CHECK-NEXT:    fcmeq.4s v6, v2, v2
334; CHECK-NEXT:    orr.4s v2, #64, lsl #16
335; CHECK-NEXT:    add.4s v5, v5, v7
336; CHECK-NEXT:    fcmeq.4s v7, v1, v1
337; CHECK-NEXT:    orr.4s v1, #64, lsl #16
338; CHECK-NEXT:    add.4s v3, v3, v4
339; CHECK-NEXT:    bit.16b v2, v5, v6
340; CHECK-NEXT:    bit.16b v1, v3, v7
341; CHECK-NEXT:    uzp2.8h v1, v1, v2
342; CHECK-NEXT:    mvni.8h v2, #128, lsl #8
343; CHECK-NEXT:    bif.16b v0, v1, v2
344; CHECK-NEXT:    ret
345  %tmp0 = fptrunc <8 x float> %b to <8 x bfloat>
346  %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %tmp0)
347  ret <8 x bfloat> %r
348}
349
350declare <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) #0
351
352attributes #0 = { nounwind }
353;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
354; FP16: {{.*}}
355; NOFP16: {{.*}}
356