xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE
3; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE2
4; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=SVE2
5; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
6
7target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
8
9target triple = "aarch64-unknown-linux-gnu"
10
11;============ f16
12
13define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) {
14; SVE-LABEL: test_copysign_v4f16_v4f16:
15; SVE:       // %bb.0:
16; SVE-NEXT:    ldr d0, [x0]
17; SVE-NEXT:    ldr d1, [x1]
18; SVE-NEXT:    and z1.h, z1.h, #0x8000
19; SVE-NEXT:    and z0.h, z0.h, #0x7fff
20; SVE-NEXT:    orr z0.d, z0.d, z1.d
21; SVE-NEXT:    str d0, [x0]
22; SVE-NEXT:    ret
23;
24; SVE2-LABEL: test_copysign_v4f16_v4f16:
25; SVE2:       // %bb.0:
26; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
27; SVE2-NEXT:    ldr d1, [x0]
28; SVE2-NEXT:    ldr d2, [x1]
29; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
30; SVE2-NEXT:    str d1, [x0]
31; SVE2-NEXT:    ret
32;
33; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f16:
34; NONEON-NOSVE:       // %bb.0:
35; NONEON-NOSVE-NEXT:    sub sp, sp, #48
36; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
37; NONEON-NOSVE-NEXT:    ldr d0, [x1]
38; NONEON-NOSVE-NEXT:    ldr d1, [x0]
39; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #24]
40; NONEON-NOSVE-NEXT:    ldr h0, [sp, #38]
41; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
42; NONEON-NOSVE-NEXT:    ldr h0, [sp, #36]
43; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
44; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
45; NONEON-NOSVE-NEXT:    ldr h0, [sp, #34]
46; NONEON-NOSVE-NEXT:    tst w8, #0x80
47; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
48; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
49; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
50; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
51; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
52; NONEON-NOSVE-NEXT:    fcvt s0, h0
53; NONEON-NOSVE-NEXT:    fabs s0, s0
54; NONEON-NOSVE-NEXT:    fneg s1, s0
55; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
56; NONEON-NOSVE-NEXT:    tst w8, #0x80
57; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
58; NONEON-NOSVE-NEXT:    fcvt h0, s0
59; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
60; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
61; NONEON-NOSVE-NEXT:    fcvt s0, h0
62; NONEON-NOSVE-NEXT:    fabs s0, s0
63; NONEON-NOSVE-NEXT:    fneg s1, s0
64; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
65; NONEON-NOSVE-NEXT:    tst w8, #0x80
66; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
67; NONEON-NOSVE-NEXT:    fcvt h0, s0
68; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
69; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
70; NONEON-NOSVE-NEXT:    fcvt s0, h0
71; NONEON-NOSVE-NEXT:    fabs s0, s0
72; NONEON-NOSVE-NEXT:    fneg s1, s0
73; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
74; NONEON-NOSVE-NEXT:    tst w8, #0x80
75; NONEON-NOSVE-NEXT:    fcvt h0, s0
76; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
77; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
78; NONEON-NOSVE-NEXT:    fcvt s0, h0
79; NONEON-NOSVE-NEXT:    fabs s0, s0
80; NONEON-NOSVE-NEXT:    fneg s1, s0
81; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
82; NONEON-NOSVE-NEXT:    fcvt h0, s0
83; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
84; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
85; NONEON-NOSVE-NEXT:    str d0, [x0]
86; NONEON-NOSVE-NEXT:    add sp, sp, #48
87; NONEON-NOSVE-NEXT:    ret
88  %a = load <4 x half>, ptr %ap
89  %b = load <4 x half>, ptr %bp
90  %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
91  store <4 x half> %r, ptr %ap
92  ret void
93}
94
95define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) {
96; SVE-LABEL: test_copysign_v8f16_v8f16:
97; SVE:       // %bb.0:
98; SVE-NEXT:    ldr q0, [x0]
99; SVE-NEXT:    ldr q1, [x1]
100; SVE-NEXT:    and z1.h, z1.h, #0x8000
101; SVE-NEXT:    and z0.h, z0.h, #0x7fff
102; SVE-NEXT:    orr z0.d, z0.d, z1.d
103; SVE-NEXT:    str q0, [x0]
104; SVE-NEXT:    ret
105;
106; SVE2-LABEL: test_copysign_v8f16_v8f16:
107; SVE2:       // %bb.0:
108; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
109; SVE2-NEXT:    ldr q1, [x0]
110; SVE2-NEXT:    ldr q2, [x1]
111; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
112; SVE2-NEXT:    str q1, [x0]
113; SVE2-NEXT:    ret
114;
115; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f16:
116; NONEON-NOSVE:       // %bb.0:
117; NONEON-NOSVE-NEXT:    sub sp, sp, #80
118; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
119; NONEON-NOSVE-NEXT:    ldr q0, [x1]
120; NONEON-NOSVE-NEXT:    ldr q1, [x0]
121; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
122; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
123; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
124; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
125; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
126; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
127; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
128; NONEON-NOSVE-NEXT:    tst w8, #0x80
129; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
130; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
131; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
132; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
133; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
134; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
135; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
136; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
137; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
138; NONEON-NOSVE-NEXT:    str h0, [sp, #4]
139; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
140; NONEON-NOSVE-NEXT:    str h0, [sp]
141; NONEON-NOSVE-NEXT:    ldr h0, [sp, #46]
142; NONEON-NOSVE-NEXT:    fcvt s0, h0
143; NONEON-NOSVE-NEXT:    fabs s0, s0
144; NONEON-NOSVE-NEXT:    fneg s1, s0
145; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
146; NONEON-NOSVE-NEXT:    tst w8, #0x80
147; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
148; NONEON-NOSVE-NEXT:    fcvt h0, s0
149; NONEON-NOSVE-NEXT:    str h0, [sp, #78]
150; NONEON-NOSVE-NEXT:    ldr h0, [sp, #44]
151; NONEON-NOSVE-NEXT:    fcvt s0, h0
152; NONEON-NOSVE-NEXT:    fabs s0, s0
153; NONEON-NOSVE-NEXT:    fneg s1, s0
154; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
155; NONEON-NOSVE-NEXT:    tst w8, #0x80
156; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
157; NONEON-NOSVE-NEXT:    fcvt h0, s0
158; NONEON-NOSVE-NEXT:    str h0, [sp, #76]
159; NONEON-NOSVE-NEXT:    ldr h0, [sp, #42]
160; NONEON-NOSVE-NEXT:    fcvt s0, h0
161; NONEON-NOSVE-NEXT:    fabs s0, s0
162; NONEON-NOSVE-NEXT:    fneg s1, s0
163; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
164; NONEON-NOSVE-NEXT:    tst w8, #0x80
165; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
166; NONEON-NOSVE-NEXT:    fcvt h0, s0
167; NONEON-NOSVE-NEXT:    str h0, [sp, #74]
168; NONEON-NOSVE-NEXT:    ldr h0, [sp, #40]
169; NONEON-NOSVE-NEXT:    fcvt s0, h0
170; NONEON-NOSVE-NEXT:    fabs s0, s0
171; NONEON-NOSVE-NEXT:    fneg s1, s0
172; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
173; NONEON-NOSVE-NEXT:    tst w8, #0x80
174; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
175; NONEON-NOSVE-NEXT:    fcvt h0, s0
176; NONEON-NOSVE-NEXT:    str h0, [sp, #72]
177; NONEON-NOSVE-NEXT:    ldr h0, [sp, #38]
178; NONEON-NOSVE-NEXT:    fcvt s0, h0
179; NONEON-NOSVE-NEXT:    fabs s0, s0
180; NONEON-NOSVE-NEXT:    fneg s1, s0
181; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
182; NONEON-NOSVE-NEXT:    tst w8, #0x80
183; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
184; NONEON-NOSVE-NEXT:    fcvt h0, s0
185; NONEON-NOSVE-NEXT:    str h0, [sp, #70]
186; NONEON-NOSVE-NEXT:    ldr h0, [sp, #36]
187; NONEON-NOSVE-NEXT:    fcvt s0, h0
188; NONEON-NOSVE-NEXT:    fabs s0, s0
189; NONEON-NOSVE-NEXT:    fneg s1, s0
190; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
191; NONEON-NOSVE-NEXT:    tst w8, #0x80
192; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
193; NONEON-NOSVE-NEXT:    fcvt h0, s0
194; NONEON-NOSVE-NEXT:    str h0, [sp, #68]
195; NONEON-NOSVE-NEXT:    ldr h0, [sp, #34]
196; NONEON-NOSVE-NEXT:    fcvt s0, h0
197; NONEON-NOSVE-NEXT:    fabs s0, s0
198; NONEON-NOSVE-NEXT:    fneg s1, s0
199; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
200; NONEON-NOSVE-NEXT:    tst w8, #0x80
201; NONEON-NOSVE-NEXT:    fcvt h0, s0
202; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
203; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
204; NONEON-NOSVE-NEXT:    fcvt s0, h0
205; NONEON-NOSVE-NEXT:    fabs s0, s0
206; NONEON-NOSVE-NEXT:    fneg s1, s0
207; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
208; NONEON-NOSVE-NEXT:    fcvt h0, s0
209; NONEON-NOSVE-NEXT:    str h0, [sp, #64]
210; NONEON-NOSVE-NEXT:    ldr q0, [sp, #64]
211; NONEON-NOSVE-NEXT:    str q0, [x0]
212; NONEON-NOSVE-NEXT:    add sp, sp, #80
213; NONEON-NOSVE-NEXT:    ret
214  %a = load <8 x half>, ptr %ap
215  %b = load <8 x half>, ptr %bp
216  %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
217  store <8 x half> %r, ptr %ap
218  ret void
219}
220
221define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) {
222; SVE-LABEL: test_copysign_v16f16_v16f16:
223; SVE:       // %bb.0:
224; SVE-NEXT:    ldp q0, q3, [x1]
225; SVE-NEXT:    ldp q1, q2, [x0]
226; SVE-NEXT:    and z0.h, z0.h, #0x8000
227; SVE-NEXT:    and z3.h, z3.h, #0x8000
228; SVE-NEXT:    and z1.h, z1.h, #0x7fff
229; SVE-NEXT:    and z2.h, z2.h, #0x7fff
230; SVE-NEXT:    orr z0.d, z1.d, z0.d
231; SVE-NEXT:    orr z1.d, z2.d, z3.d
232; SVE-NEXT:    stp q0, q1, [x0]
233; SVE-NEXT:    ret
234;
235; SVE2-LABEL: test_copysign_v16f16_v16f16:
236; SVE2:       // %bb.0:
237; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
238; SVE2-NEXT:    ldp q1, q4, [x1]
239; SVE2-NEXT:    ldp q2, q3, [x0]
240; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
241; SVE2-NEXT:    bsl z3.d, z3.d, z4.d, z0.d
242; SVE2-NEXT:    stp q2, q3, [x0]
243; SVE2-NEXT:    ret
244;
245; NONEON-NOSVE-LABEL: test_copysign_v16f16_v16f16:
246; NONEON-NOSVE:       // %bb.0:
247; NONEON-NOSVE-NEXT:    sub sp, sp, #160
248; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
249; NONEON-NOSVE-NEXT:    ldp q1, q0, [x1]
250; NONEON-NOSVE-NEXT:    ldp q2, q3, [x0]
251; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #64]
252; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #96]
253; NONEON-NOSVE-NEXT:    ldr h0, [sp, #126]
254; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
255; NONEON-NOSVE-NEXT:    ldr h0, [sp, #124]
256; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
257; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
258; NONEON-NOSVE-NEXT:    ldr h0, [sp, #122]
259; NONEON-NOSVE-NEXT:    tst w8, #0x80
260; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
261; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
262; NONEON-NOSVE-NEXT:    ldr h0, [sp, #120]
263; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
264; NONEON-NOSVE-NEXT:    ldr h0, [sp, #118]
265; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
266; NONEON-NOSVE-NEXT:    ldr h0, [sp, #116]
267; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
268; NONEON-NOSVE-NEXT:    ldr h0, [sp, #114]
269; NONEON-NOSVE-NEXT:    str h0, [sp, #4]
270; NONEON-NOSVE-NEXT:    ldr h0, [sp, #112]
271; NONEON-NOSVE-NEXT:    str h0, [sp]
272; NONEON-NOSVE-NEXT:    ldr h0, [sp, #94]
273; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
274; NONEON-NOSVE-NEXT:    ldr h0, [sp, #92]
275; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
276; NONEON-NOSVE-NEXT:    ldr h0, [sp, #90]
277; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
278; NONEON-NOSVE-NEXT:    ldr h0, [sp, #88]
279; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
280; NONEON-NOSVE-NEXT:    ldr h0, [sp, #86]
281; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
282; NONEON-NOSVE-NEXT:    ldr h0, [sp, #84]
283; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
284; NONEON-NOSVE-NEXT:    ldr h0, [sp, #82]
285; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
286; NONEON-NOSVE-NEXT:    ldr h0, [sp, #80]
287; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
288; NONEON-NOSVE-NEXT:    ldr h0, [sp, #110]
289; NONEON-NOSVE-NEXT:    fcvt s0, h0
290; NONEON-NOSVE-NEXT:    fabs s0, s0
291; NONEON-NOSVE-NEXT:    fneg s1, s0
292; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
293; NONEON-NOSVE-NEXT:    tst w8, #0x80
294; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
295; NONEON-NOSVE-NEXT:    fcvt h0, s0
296; NONEON-NOSVE-NEXT:    str h0, [sp, #158]
297; NONEON-NOSVE-NEXT:    ldr h0, [sp, #108]
298; NONEON-NOSVE-NEXT:    fcvt s0, h0
299; NONEON-NOSVE-NEXT:    fabs s0, s0
300; NONEON-NOSVE-NEXT:    fneg s1, s0
301; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
302; NONEON-NOSVE-NEXT:    tst w8, #0x80
303; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
304; NONEON-NOSVE-NEXT:    fcvt h0, s0
305; NONEON-NOSVE-NEXT:    str h0, [sp, #156]
306; NONEON-NOSVE-NEXT:    ldr h0, [sp, #106]
307; NONEON-NOSVE-NEXT:    fcvt s0, h0
308; NONEON-NOSVE-NEXT:    fabs s0, s0
309; NONEON-NOSVE-NEXT:    fneg s1, s0
310; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
311; NONEON-NOSVE-NEXT:    tst w8, #0x80
312; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
313; NONEON-NOSVE-NEXT:    fcvt h0, s0
314; NONEON-NOSVE-NEXT:    str h0, [sp, #154]
315; NONEON-NOSVE-NEXT:    ldr h0, [sp, #104]
316; NONEON-NOSVE-NEXT:    fcvt s0, h0
317; NONEON-NOSVE-NEXT:    fabs s0, s0
318; NONEON-NOSVE-NEXT:    fneg s1, s0
319; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
320; NONEON-NOSVE-NEXT:    tst w8, #0x80
321; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
322; NONEON-NOSVE-NEXT:    fcvt h0, s0
323; NONEON-NOSVE-NEXT:    str h0, [sp, #152]
324; NONEON-NOSVE-NEXT:    ldr h0, [sp, #102]
325; NONEON-NOSVE-NEXT:    fcvt s0, h0
326; NONEON-NOSVE-NEXT:    fabs s0, s0
327; NONEON-NOSVE-NEXT:    fneg s1, s0
328; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
329; NONEON-NOSVE-NEXT:    tst w8, #0x80
330; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
331; NONEON-NOSVE-NEXT:    fcvt h0, s0
332; NONEON-NOSVE-NEXT:    str h0, [sp, #150]
333; NONEON-NOSVE-NEXT:    ldr h0, [sp, #100]
334; NONEON-NOSVE-NEXT:    fcvt s0, h0
335; NONEON-NOSVE-NEXT:    fabs s0, s0
336; NONEON-NOSVE-NEXT:    fneg s1, s0
337; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
338; NONEON-NOSVE-NEXT:    tst w8, #0x80
339; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
340; NONEON-NOSVE-NEXT:    fcvt h0, s0
341; NONEON-NOSVE-NEXT:    str h0, [sp, #148]
342; NONEON-NOSVE-NEXT:    ldr h0, [sp, #98]
343; NONEON-NOSVE-NEXT:    fcvt s0, h0
344; NONEON-NOSVE-NEXT:    fabs s0, s0
345; NONEON-NOSVE-NEXT:    fneg s1, s0
346; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
347; NONEON-NOSVE-NEXT:    tst w8, #0x80
348; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
349; NONEON-NOSVE-NEXT:    fcvt h0, s0
350; NONEON-NOSVE-NEXT:    str h0, [sp, #146]
351; NONEON-NOSVE-NEXT:    ldr h0, [sp, #96]
352; NONEON-NOSVE-NEXT:    fcvt s0, h0
353; NONEON-NOSVE-NEXT:    fabs s0, s0
354; NONEON-NOSVE-NEXT:    fneg s1, s0
355; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
356; NONEON-NOSVE-NEXT:    tst w8, #0x80
357; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
358; NONEON-NOSVE-NEXT:    fcvt h0, s0
359; NONEON-NOSVE-NEXT:    str h0, [sp, #144]
360; NONEON-NOSVE-NEXT:    ldr h0, [sp, #78]
361; NONEON-NOSVE-NEXT:    fcvt s0, h0
362; NONEON-NOSVE-NEXT:    fabs s0, s0
363; NONEON-NOSVE-NEXT:    fneg s1, s0
364; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
365; NONEON-NOSVE-NEXT:    tst w8, #0x80
366; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
367; NONEON-NOSVE-NEXT:    fcvt h0, s0
368; NONEON-NOSVE-NEXT:    str h0, [sp, #142]
369; NONEON-NOSVE-NEXT:    ldr h0, [sp, #76]
370; NONEON-NOSVE-NEXT:    fcvt s0, h0
371; NONEON-NOSVE-NEXT:    fabs s0, s0
372; NONEON-NOSVE-NEXT:    fneg s1, s0
373; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
374; NONEON-NOSVE-NEXT:    tst w8, #0x80
375; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
376; NONEON-NOSVE-NEXT:    fcvt h0, s0
377; NONEON-NOSVE-NEXT:    str h0, [sp, #140]
378; NONEON-NOSVE-NEXT:    ldr h0, [sp, #74]
379; NONEON-NOSVE-NEXT:    fcvt s0, h0
380; NONEON-NOSVE-NEXT:    fabs s0, s0
381; NONEON-NOSVE-NEXT:    fneg s1, s0
382; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
383; NONEON-NOSVE-NEXT:    tst w8, #0x80
384; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #45]
385; NONEON-NOSVE-NEXT:    fcvt h0, s0
386; NONEON-NOSVE-NEXT:    str h0, [sp, #138]
387; NONEON-NOSVE-NEXT:    ldr h0, [sp, #72]
388; NONEON-NOSVE-NEXT:    fcvt s0, h0
389; NONEON-NOSVE-NEXT:    fabs s0, s0
390; NONEON-NOSVE-NEXT:    fneg s1, s0
391; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
392; NONEON-NOSVE-NEXT:    tst w8, #0x80
393; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #41]
394; NONEON-NOSVE-NEXT:    fcvt h0, s0
395; NONEON-NOSVE-NEXT:    str h0, [sp, #136]
396; NONEON-NOSVE-NEXT:    ldr h0, [sp, #70]
397; NONEON-NOSVE-NEXT:    fcvt s0, h0
398; NONEON-NOSVE-NEXT:    fabs s0, s0
399; NONEON-NOSVE-NEXT:    fneg s1, s0
400; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
401; NONEON-NOSVE-NEXT:    tst w8, #0x80
402; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #37]
403; NONEON-NOSVE-NEXT:    fcvt h0, s0
404; NONEON-NOSVE-NEXT:    str h0, [sp, #134]
405; NONEON-NOSVE-NEXT:    ldr h0, [sp, #68]
406; NONEON-NOSVE-NEXT:    fcvt s0, h0
407; NONEON-NOSVE-NEXT:    fabs s0, s0
408; NONEON-NOSVE-NEXT:    fneg s1, s0
409; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
410; NONEON-NOSVE-NEXT:    tst w8, #0x80
411; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #33]
412; NONEON-NOSVE-NEXT:    fcvt h0, s0
413; NONEON-NOSVE-NEXT:    str h0, [sp, #132]
414; NONEON-NOSVE-NEXT:    ldr h0, [sp, #66]
415; NONEON-NOSVE-NEXT:    fcvt s0, h0
416; NONEON-NOSVE-NEXT:    fabs s0, s0
417; NONEON-NOSVE-NEXT:    fneg s1, s0
418; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
419; NONEON-NOSVE-NEXT:    tst w8, #0x80
420; NONEON-NOSVE-NEXT:    fcvt h0, s0
421; NONEON-NOSVE-NEXT:    str h0, [sp, #130]
422; NONEON-NOSVE-NEXT:    ldr h0, [sp, #64]
423; NONEON-NOSVE-NEXT:    fcvt s0, h0
424; NONEON-NOSVE-NEXT:    fabs s0, s0
425; NONEON-NOSVE-NEXT:    fneg s1, s0
426; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
427; NONEON-NOSVE-NEXT:    fcvt h0, s0
428; NONEON-NOSVE-NEXT:    str h0, [sp, #128]
429; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #128]
430; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
431; NONEON-NOSVE-NEXT:    add sp, sp, #160
432; NONEON-NOSVE-NEXT:    ret
433  %a = load <16 x half>, ptr %ap
434  %b = load <16 x half>, ptr %bp
435  %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b)
436  store <16 x half> %r, ptr %ap
437  ret void
438}
439
440;============ f32
441
442define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) {
443; SVE-LABEL: test_copysign_v2f32_v2f32:
444; SVE:       // %bb.0:
445; SVE-NEXT:    ldr d0, [x0]
446; SVE-NEXT:    ldr d1, [x1]
447; SVE-NEXT:    and z1.s, z1.s, #0x80000000
448; SVE-NEXT:    and z0.s, z0.s, #0x7fffffff
449; SVE-NEXT:    orr z0.d, z0.d, z1.d
450; SVE-NEXT:    str d0, [x0]
451; SVE-NEXT:    ret
452;
453; SVE2-LABEL: test_copysign_v2f32_v2f32:
454; SVE2:       // %bb.0:
455; SVE2-NEXT:    mov z0.s, #0x7fffffff
456; SVE2-NEXT:    ldr d1, [x0]
457; SVE2-NEXT:    ldr d2, [x1]
458; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
459; SVE2-NEXT:    str d1, [x0]
460; SVE2-NEXT:    ret
461;
462; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f32:
463; NONEON-NOSVE:       // %bb.0:
464; NONEON-NOSVE-NEXT:    sub sp, sp, #32
465; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
466; NONEON-NOSVE-NEXT:    ldr d0, [x0]
467; NONEON-NOSVE-NEXT:    ldr d1, [x1]
468; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
469; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
470; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
471; NONEON-NOSVE-NEXT:    fabs s0, s0
472; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
473; NONEON-NOSVE-NEXT:    fneg s1, s0
474; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
475; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
476; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
477; NONEON-NOSVE-NEXT:    fabs s0, s0
478; NONEON-NOSVE-NEXT:    fneg s1, s0
479; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
480; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #24]
481; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
482; NONEON-NOSVE-NEXT:    str d0, [x0]
483; NONEON-NOSVE-NEXT:    add sp, sp, #32
484; NONEON-NOSVE-NEXT:    ret
485  %a = load <2 x float>, ptr %ap
486  %b = load <2 x float>, ptr %bp
487  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
488  store <2 x float> %r, ptr %ap
489  ret void
490}
491
492define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) {
493; SVE-LABEL: test_copysign_v4f32_v4f32:
494; SVE:       // %bb.0:
495; SVE-NEXT:    ldr q0, [x0]
496; SVE-NEXT:    ldr q1, [x1]
497; SVE-NEXT:    and z1.s, z1.s, #0x80000000
498; SVE-NEXT:    and z0.s, z0.s, #0x7fffffff
499; SVE-NEXT:    orr z0.d, z0.d, z1.d
500; SVE-NEXT:    str q0, [x0]
501; SVE-NEXT:    ret
502;
503; SVE2-LABEL: test_copysign_v4f32_v4f32:
504; SVE2:       // %bb.0:
505; SVE2-NEXT:    mov z0.s, #0x7fffffff
506; SVE2-NEXT:    ldr q1, [x0]
507; SVE2-NEXT:    ldr q2, [x1]
508; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
509; SVE2-NEXT:    str q1, [x0]
510; SVE2-NEXT:    ret
511;
512; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f32:
513; NONEON-NOSVE:       // %bb.0:
514; NONEON-NOSVE-NEXT:    ldr q0, [x0]
515; NONEON-NOSVE-NEXT:    ldr q1, [x1]
516; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
517; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
518; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
519; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
520; NONEON-NOSVE-NEXT:    fabs s0, s0
521; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
522; NONEON-NOSVE-NEXT:    fneg s1, s0
523; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
524; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
525; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
526; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
527; NONEON-NOSVE-NEXT:    fabs s0, s0
528; NONEON-NOSVE-NEXT:    fneg s1, s0
529; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
530; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
531; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #40]
532; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
533; NONEON-NOSVE-NEXT:    fabs s0, s0
534; NONEON-NOSVE-NEXT:    fneg s1, s0
535; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
536; NONEON-NOSVE-NEXT:    ldr s0, [sp]
537; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
538; NONEON-NOSVE-NEXT:    fabs s0, s0
539; NONEON-NOSVE-NEXT:    fneg s1, s0
540; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
541; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #32]
542; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
543; NONEON-NOSVE-NEXT:    str q0, [x0]
544; NONEON-NOSVE-NEXT:    add sp, sp, #48
545; NONEON-NOSVE-NEXT:    ret
546  %a = load <4 x float>, ptr %ap
547  %b = load <4 x float>, ptr %bp
548  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
549  store <4 x float> %r, ptr %ap
550  ret void
551}
552
553define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) {
554; SVE-LABEL: test_copysign_v8f32_v8f32:
555; SVE:       // %bb.0:
556; SVE-NEXT:    ldp q0, q3, [x1]
557; SVE-NEXT:    ldp q1, q2, [x0]
558; SVE-NEXT:    and z0.s, z0.s, #0x80000000
559; SVE-NEXT:    and z3.s, z3.s, #0x80000000
560; SVE-NEXT:    and z1.s, z1.s, #0x7fffffff
561; SVE-NEXT:    and z2.s, z2.s, #0x7fffffff
562; SVE-NEXT:    orr z0.d, z1.d, z0.d
563; SVE-NEXT:    orr z1.d, z2.d, z3.d
564; SVE-NEXT:    stp q0, q1, [x0]
565; SVE-NEXT:    ret
566;
567; SVE2-LABEL: test_copysign_v8f32_v8f32:
568; SVE2:       // %bb.0:
569; SVE2-NEXT:    mov z0.s, #0x7fffffff
570; SVE2-NEXT:    ldp q1, q4, [x1]
571; SVE2-NEXT:    ldp q2, q3, [x0]
572; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
573; SVE2-NEXT:    bsl z3.d, z3.d, z4.d, z0.d
574; SVE2-NEXT:    stp q2, q3, [x0]
575; SVE2-NEXT:    ret
576;
577; NONEON-NOSVE-LABEL: test_copysign_v8f32_v8f32:
578; NONEON-NOSVE:       // %bb.0:
579; NONEON-NOSVE-NEXT:    sub sp, sp, #96
580; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
581; NONEON-NOSVE-NEXT:    ldp q2, q0, [x0]
582; NONEON-NOSVE-NEXT:    ldp q3, q1, [x1]
583; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
584; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #32]
585; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
586; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #56]
587; NONEON-NOSVE-NEXT:    fabs s0, s0
588; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
589; NONEON-NOSVE-NEXT:    fneg s1, s0
590; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
591; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
592; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
593; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #48]
594; NONEON-NOSVE-NEXT:    fabs s0, s0
595; NONEON-NOSVE-NEXT:    fneg s1, s0
596; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
597; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
598; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #88]
599; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
600; NONEON-NOSVE-NEXT:    fabs s0, s0
601; NONEON-NOSVE-NEXT:    fneg s1, s0
602; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
603; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
604; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
605; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
606; NONEON-NOSVE-NEXT:    fabs s0, s0
607; NONEON-NOSVE-NEXT:    fneg s1, s0
608; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
609; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
610; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #80]
611; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
612; NONEON-NOSVE-NEXT:    fabs s0, s0
613; NONEON-NOSVE-NEXT:    fneg s1, s0
614; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
615; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
616; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
617; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
618; NONEON-NOSVE-NEXT:    fabs s0, s0
619; NONEON-NOSVE-NEXT:    fneg s1, s0
620; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
621; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
622; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #72]
623; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
624; NONEON-NOSVE-NEXT:    fabs s0, s0
625; NONEON-NOSVE-NEXT:    fneg s1, s0
626; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
627; NONEON-NOSVE-NEXT:    ldr s0, [sp]
628; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
629; NONEON-NOSVE-NEXT:    fabs s0, s0
630; NONEON-NOSVE-NEXT:    fneg s1, s0
631; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
632; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #64]
633; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
634; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
635; NONEON-NOSVE-NEXT:    add sp, sp, #96
636; NONEON-NOSVE-NEXT:    ret
637  %a = load <8 x float>, ptr %ap
638  %b = load <8 x float>, ptr %bp
639  %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b)
640  store <8 x float> %r, ptr %ap
641  ret void
642}
643
644;============ f64
645
646define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) {
647; SVE-LABEL: test_copysign_v2f64_v2f64:
648; SVE:       // %bb.0:
649; SVE-NEXT:    ldr q0, [x0]
650; SVE-NEXT:    ldr q1, [x1]
651; SVE-NEXT:    and z1.d, z1.d, #0x8000000000000000
652; SVE-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
653; SVE-NEXT:    orr z0.d, z0.d, z1.d
654; SVE-NEXT:    str q0, [x0]
655; SVE-NEXT:    ret
656;
657; SVE2-LABEL: test_copysign_v2f64_v2f64:
658; SVE2:       // %bb.0:
659; SVE2-NEXT:    mov z0.d, #0x7fffffffffffffff
660; SVE2-NEXT:    ldr q1, [x0]
661; SVE2-NEXT:    ldr q2, [x1]
662; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
663; SVE2-NEXT:    str q1, [x0]
664; SVE2-NEXT:    ret
665;
666; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f64:
667; NONEON-NOSVE:       // %bb.0:
668; NONEON-NOSVE-NEXT:    ldr q0, [x0]
669; NONEON-NOSVE-NEXT:    ldr q1, [x1]
670; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
671; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
672; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
673; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
674; NONEON-NOSVE-NEXT:    fabs d0, d0
675; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
676; NONEON-NOSVE-NEXT:    fneg d1, d0
677; NONEON-NOSVE-NEXT:    fcsel d2, d1, d0, ne
678; NONEON-NOSVE-NEXT:    ldr d0, [sp]
679; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
680; NONEON-NOSVE-NEXT:    fabs d0, d0
681; NONEON-NOSVE-NEXT:    fneg d1, d0
682; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
683; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #32]
684; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
685; NONEON-NOSVE-NEXT:    str q0, [x0]
686; NONEON-NOSVE-NEXT:    add sp, sp, #48
687; NONEON-NOSVE-NEXT:    ret
688  %a = load <2 x double>, ptr %ap
689  %b = load <2 x double>, ptr %bp
690  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
691  store <2 x double> %r, ptr %ap
692  ret void
693}
694
695define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) {
696; SVE-LABEL: test_copysign_v4f64_v4f64:
697; SVE:       // %bb.0:
698; SVE-NEXT:    ldp q0, q3, [x1]
699; SVE-NEXT:    ldp q1, q2, [x0]
700; SVE-NEXT:    and z0.d, z0.d, #0x8000000000000000
701; SVE-NEXT:    and z3.d, z3.d, #0x8000000000000000
702; SVE-NEXT:    and z1.d, z1.d, #0x7fffffffffffffff
703; SVE-NEXT:    and z2.d, z2.d, #0x7fffffffffffffff
704; SVE-NEXT:    orr z0.d, z1.d, z0.d
705; SVE-NEXT:    orr z1.d, z2.d, z3.d
706; SVE-NEXT:    stp q0, q1, [x0]
707; SVE-NEXT:    ret
708;
709; SVE2-LABEL: test_copysign_v4f64_v4f64:
710; SVE2:       // %bb.0:
711; SVE2-NEXT:    mov z0.d, #0x7fffffffffffffff
712; SVE2-NEXT:    ldp q1, q4, [x1]
713; SVE2-NEXT:    ldp q2, q3, [x0]
714; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
715; SVE2-NEXT:    bsl z3.d, z3.d, z4.d, z0.d
716; SVE2-NEXT:    stp q2, q3, [x0]
717; SVE2-NEXT:    ret
718;
719; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f64:
720; NONEON-NOSVE:       // %bb.0:
721; NONEON-NOSVE-NEXT:    sub sp, sp, #96
722; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
723; NONEON-NOSVE-NEXT:    ldp q2, q0, [x0]
724; NONEON-NOSVE-NEXT:    ldp q3, q1, [x1]
725; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
726; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #32]
727; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
728; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #48]
729; NONEON-NOSVE-NEXT:    fabs d0, d0
730; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
731; NONEON-NOSVE-NEXT:    fneg d1, d0
732; NONEON-NOSVE-NEXT:    fcsel d2, d1, d0, ne
733; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
734; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
735; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
736; NONEON-NOSVE-NEXT:    fabs d0, d0
737; NONEON-NOSVE-NEXT:    fneg d1, d0
738; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
739; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
740; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #80]
741; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
742; NONEON-NOSVE-NEXT:    fabs d0, d0
743; NONEON-NOSVE-NEXT:    fneg d1, d0
744; NONEON-NOSVE-NEXT:    fcsel d2, d1, d0, ne
745; NONEON-NOSVE-NEXT:    ldr d0, [sp]
746; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
747; NONEON-NOSVE-NEXT:    fabs d0, d0
748; NONEON-NOSVE-NEXT:    fneg d1, d0
749; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
750; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #64]
751; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
752; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
753; NONEON-NOSVE-NEXT:    add sp, sp, #96
754; NONEON-NOSVE-NEXT:    ret
755  %a = load <4 x double>, ptr %ap
756  %b = load <4 x double>, ptr %bp
757  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
758  store <4 x double> %r, ptr %ap
759  ret void
760}
761
762;============ v2f32
763
764define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) {
765; SVE-LABEL: test_copysign_v2f32_v2f64:
766; SVE:       // %bb.0:
767; SVE-NEXT:    ptrue p0.d
768; SVE-NEXT:    ldr q0, [x1]
769; SVE-NEXT:    ldr d1, [x0]
770; SVE-NEXT:    fcvt z0.s, p0/m, z0.d
771; SVE-NEXT:    and z1.s, z1.s, #0x7fffffff
772; SVE-NEXT:    uzp1 z0.s, z0.s, z0.s
773; SVE-NEXT:    and z0.s, z0.s, #0x80000000
774; SVE-NEXT:    orr z0.d, z1.d, z0.d
775; SVE-NEXT:    str d0, [x0]
776; SVE-NEXT:    ret
777;
778; SVE2-LABEL: test_copysign_v2f32_v2f64:
779; SVE2:       // %bb.0:
780; SVE2-NEXT:    ptrue p0.d
781; SVE2-NEXT:    ldr q0, [x1]
782; SVE2-NEXT:    mov z1.s, #0x7fffffff
783; SVE2-NEXT:    ldr d2, [x0]
784; SVE2-NEXT:    fcvt z0.s, p0/m, z0.d
785; SVE2-NEXT:    uzp1 z0.s, z0.s, z0.s
786; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
787; SVE2-NEXT:    str d2, [x0]
788; SVE2-NEXT:    ret
789;
790; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f64:
791; NONEON-NOSVE:       // %bb.0:
792; NONEON-NOSVE-NEXT:    sub sp, sp, #48
793; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
794; NONEON-NOSVE-NEXT:    ldr d1, [x0]
795; NONEON-NOSVE-NEXT:    ldr q0, [x1]
796; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
797; NONEON-NOSVE-NEXT:    str q0, [sp, #16]
798; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
799; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
800; NONEON-NOSVE-NEXT:    fabs s0, s0
801; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
802; NONEON-NOSVE-NEXT:    fneg s1, s0
803; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
804; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
805; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
806; NONEON-NOSVE-NEXT:    fabs s0, s0
807; NONEON-NOSVE-NEXT:    fneg s1, s0
808; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
809; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #40]
810; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
811; NONEON-NOSVE-NEXT:    str d0, [x0]
812; NONEON-NOSVE-NEXT:    add sp, sp, #48
813; NONEON-NOSVE-NEXT:    ret
814  %a = load <2 x float>, ptr %ap
815  %b = load <2 x double>, ptr %bp
816  %tmp0 = fptrunc <2 x double> %b to <2 x float>
817  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
818  store <2 x float> %r, ptr %ap
819  ret void
820}
821
822;============ v4f32
823
824; SplitVecOp #1
825define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
826; SVE-LABEL: test_copysign_v4f32_v4f64:
827; SVE:       // %bb.0:
828; SVE-NEXT:    ldp q0, q1, [x1]
829; SVE-NEXT:    ptrue p0.d
830; SVE-NEXT:    fcvt z1.s, p0/m, z1.d
831; SVE-NEXT:    fcvt z0.s, p0/m, z0.d
832; SVE-NEXT:    ptrue p0.s, vl2
833; SVE-NEXT:    uzp1 z1.s, z1.s, z1.s
834; SVE-NEXT:    uzp1 z0.s, z0.s, z0.s
835; SVE-NEXT:    splice z0.s, p0, z0.s, z1.s
836; SVE-NEXT:    ldr q1, [x0]
837; SVE-NEXT:    and z1.s, z1.s, #0x7fffffff
838; SVE-NEXT:    and z0.s, z0.s, #0x80000000
839; SVE-NEXT:    orr z0.d, z1.d, z0.d
840; SVE-NEXT:    str q0, [x0]
841; SVE-NEXT:    ret
842;
843; SVE2-LABEL: test_copysign_v4f32_v4f64:
844; SVE2:       // %bb.0:
845; SVE2-NEXT:    ldp q1, q0, [x1]
846; SVE2-NEXT:    ptrue p0.d
847; SVE2-NEXT:    fcvt z0.s, p0/m, z0.d
848; SVE2-NEXT:    fcvt z1.s, p0/m, z1.d
849; SVE2-NEXT:    ptrue p0.s, vl2
850; SVE2-NEXT:    uzp1 z3.s, z0.s, z0.s
851; SVE2-NEXT:    uzp1 z2.s, z1.s, z1.s
852; SVE2-NEXT:    mov z1.s, #0x7fffffff
853; SVE2-NEXT:    splice z0.s, p0, { z2.s, z3.s }
854; SVE2-NEXT:    ldr q2, [x0]
855; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
856; SVE2-NEXT:    str q2, [x0]
857; SVE2-NEXT:    ret
858;
859; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f64:
860; NONEON-NOSVE:       // %bb.0:
861; NONEON-NOSVE-NEXT:    sub sp, sp, #64
862; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
863; NONEON-NOSVE-NEXT:    ldp q1, q0, [x1]
864; NONEON-NOSVE-NEXT:    ldr q2, [x0]
865; NONEON-NOSVE-NEXT:    str q2, [sp]
866; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
867; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
868; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #32]
869; NONEON-NOSVE-NEXT:    fabs s0, s0
870; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
871; NONEON-NOSVE-NEXT:    fneg s1, s0
872; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
873; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
874; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
875; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
876; NONEON-NOSVE-NEXT:    fabs s0, s0
877; NONEON-NOSVE-NEXT:    fneg s1, s0
878; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
879; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
880; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #56]
881; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
882; NONEON-NOSVE-NEXT:    fabs s0, s0
883; NONEON-NOSVE-NEXT:    fneg s1, s0
884; NONEON-NOSVE-NEXT:    fcsel s2, s1, s0, ne
885; NONEON-NOSVE-NEXT:    ldr s0, [sp]
886; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
887; NONEON-NOSVE-NEXT:    fabs s0, s0
888; NONEON-NOSVE-NEXT:    fneg s1, s0
889; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
890; NONEON-NOSVE-NEXT:    stp s0, s2, [sp, #48]
891; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
892; NONEON-NOSVE-NEXT:    str q0, [x0]
893; NONEON-NOSVE-NEXT:    add sp, sp, #64
894; NONEON-NOSVE-NEXT:    ret
895  %a = load <4 x float>, ptr %ap
896  %b = load <4 x double>, ptr %bp
897  %tmp0 = fptrunc <4 x double> %b to <4 x float>
898  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
899  store <4 x float> %r, ptr %ap
900  ret void
901}
902
903;============ v2f64
904
905define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) {
906; SVE-LABEL: test_copysign_v2f64_v2f32:
907; SVE:       // %bb.0:
908; SVE-NEXT:    ptrue p0.d, vl2
909; SVE-NEXT:    ldr q0, [x0]
910; SVE-NEXT:    ld1w { z1.d }, p0/z, [x1]
911; SVE-NEXT:    and z0.d, z0.d, #0x7fffffffffffffff
912; SVE-NEXT:    fcvt z1.d, p0/m, z1.s
913; SVE-NEXT:    and z1.d, z1.d, #0x8000000000000000
914; SVE-NEXT:    orr z0.d, z0.d, z1.d
915; SVE-NEXT:    str q0, [x0]
916; SVE-NEXT:    ret
917;
918; SVE2-LABEL: test_copysign_v2f64_v2f32:
919; SVE2:       // %bb.0:
920; SVE2-NEXT:    ptrue p0.d, vl2
921; SVE2-NEXT:    mov z1.d, #0x7fffffffffffffff
922; SVE2-NEXT:    ldr q2, [x0]
923; SVE2-NEXT:    ld1w { z0.d }, p0/z, [x1]
924; SVE2-NEXT:    fcvt z0.d, p0/m, z0.s
925; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
926; SVE2-NEXT:    str q2, [x0]
927; SVE2-NEXT:    ret
928;
929; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f32:
930; NONEON-NOSVE:       // %bb.0:
931; NONEON-NOSVE-NEXT:    sub sp, sp, #48
932; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
933; NONEON-NOSVE-NEXT:    ldr q1, [x0]
934; NONEON-NOSVE-NEXT:    ldr d0, [x1]
935; NONEON-NOSVE-NEXT:    str q1, [sp]
936; NONEON-NOSVE-NEXT:    str d0, [sp, #24]
937; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
938; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
939; NONEON-NOSVE-NEXT:    fabs d0, d0
940; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
941; NONEON-NOSVE-NEXT:    fneg d1, d0
942; NONEON-NOSVE-NEXT:    fcsel d2, d1, d0, ne
943; NONEON-NOSVE-NEXT:    ldr d0, [sp]
944; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
945; NONEON-NOSVE-NEXT:    fabs d0, d0
946; NONEON-NOSVE-NEXT:    fneg d1, d0
947; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
948; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #32]
949; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
950; NONEON-NOSVE-NEXT:    str q0, [x0]
951; NONEON-NOSVE-NEXT:    add sp, sp, #48
952; NONEON-NOSVE-NEXT:    ret
953  %a = load <2 x double>, ptr %ap
954  %b = load < 2 x float>, ptr %bp
955  %tmp0 = fpext <2 x float> %b to <2 x double>
956  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
957  store <2 x double> %r, ptr %ap
958  ret void
959}
960
961;============ v4f64
962
963; SplitVecRes mismatched
964define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) {
965; SVE-LABEL: test_copysign_v4f64_v4f32:
966; SVE:       // %bb.0:
967; SVE-NEXT:    ptrue p0.d, vl2
968; SVE-NEXT:    mov x8, #2 // =0x2
969; SVE-NEXT:    ldp q2, q3, [x0]
970; SVE-NEXT:    ld1w { z0.d }, p0/z, [x1]
971; SVE-NEXT:    ld1w { z1.d }, p0/z, [x1, x8, lsl #2]
972; SVE-NEXT:    and z2.d, z2.d, #0x7fffffffffffffff
973; SVE-NEXT:    and z3.d, z3.d, #0x7fffffffffffffff
974; SVE-NEXT:    fcvt z0.d, p0/m, z0.s
975; SVE-NEXT:    fcvt z1.d, p0/m, z1.s
976; SVE-NEXT:    and z0.d, z0.d, #0x8000000000000000
977; SVE-NEXT:    and z1.d, z1.d, #0x8000000000000000
978; SVE-NEXT:    orr z0.d, z2.d, z0.d
979; SVE-NEXT:    orr z1.d, z3.d, z1.d
980; SVE-NEXT:    stp q0, q1, [x0]
981; SVE-NEXT:    ret
982;
983; SVE2-LABEL: test_copysign_v4f64_v4f32:
984; SVE2:       // %bb.0:
985; SVE2-NEXT:    ptrue p0.d, vl2
986; SVE2-NEXT:    mov x8, #2 // =0x2
987; SVE2-NEXT:    mov z2.d, #0x7fffffffffffffff
988; SVE2-NEXT:    ldp q3, q4, [x0]
989; SVE2-NEXT:    ld1w { z0.d }, p0/z, [x1]
990; SVE2-NEXT:    ld1w { z1.d }, p0/z, [x1, x8, lsl #2]
991; SVE2-NEXT:    fcvt z0.d, p0/m, z0.s
992; SVE2-NEXT:    fcvt z1.d, p0/m, z1.s
993; SVE2-NEXT:    bsl z3.d, z3.d, z0.d, z2.d
994; SVE2-NEXT:    bsl z4.d, z4.d, z1.d, z2.d
995; SVE2-NEXT:    stp q3, q4, [x0]
996; SVE2-NEXT:    ret
997;
998; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f32:
999; NONEON-NOSVE:       // %bb.0:
1000; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1001; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1002; NONEON-NOSVE-NEXT:    ldp q1, q2, [x0]
1003; NONEON-NOSVE-NEXT:    ldr q0, [x1]
1004; NONEON-NOSVE-NEXT:    stp q0, q2, [sp, #16]
1005; NONEON-NOSVE-NEXT:    ldp d0, d2, [sp, #16]
1006; NONEON-NOSVE-NEXT:    str q1, [sp]
1007; NONEON-NOSVE-NEXT:    stp d2, d0, [sp, #48]
1008; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
1009; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #48]
1010; NONEON-NOSVE-NEXT:    fabs d0, d0
1011; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1012; NONEON-NOSVE-NEXT:    fneg d1, d0
1013; NONEON-NOSVE-NEXT:    fcsel d2, d1, d0, ne
1014; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
1015; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1016; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #56]
1017; NONEON-NOSVE-NEXT:    fabs d0, d0
1018; NONEON-NOSVE-NEXT:    fneg d1, d0
1019; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
1020; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1021; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #64]
1022; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
1023; NONEON-NOSVE-NEXT:    fabs d0, d0
1024; NONEON-NOSVE-NEXT:    fneg d1, d0
1025; NONEON-NOSVE-NEXT:    fcsel d2, d1, d0, ne
1026; NONEON-NOSVE-NEXT:    ldr d0, [sp]
1027; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1028; NONEON-NOSVE-NEXT:    fabs d0, d0
1029; NONEON-NOSVE-NEXT:    fneg d1, d0
1030; NONEON-NOSVE-NEXT:    fcsel d0, d1, d0, ne
1031; NONEON-NOSVE-NEXT:    stp d0, d2, [sp, #80]
1032; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
1033; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1034; NONEON-NOSVE-NEXT:    add sp, sp, #96
1035; NONEON-NOSVE-NEXT:    ret
1036  %a = load <4 x double>, ptr %ap
1037  %b = load <4 x float>, ptr %bp
1038  %tmp0 = fpext <4 x float> %b to <4 x double>
1039  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
1040  store <4 x double> %r, ptr %ap
1041  ret void
1042}
1043
1044;============ v4f16
1045
1046define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
1047; SVE-LABEL: test_copysign_v4f16_v4f32:
1048; SVE:       // %bb.0:
1049; SVE-NEXT:    ptrue p0.s
1050; SVE-NEXT:    ldr q0, [x1]
1051; SVE-NEXT:    ldr d1, [x0]
1052; SVE-NEXT:    fcvt z0.h, p0/m, z0.s
1053; SVE-NEXT:    and z1.h, z1.h, #0x7fff
1054; SVE-NEXT:    uzp1 z0.h, z0.h, z0.h
1055; SVE-NEXT:    and z0.h, z0.h, #0x8000
1056; SVE-NEXT:    orr z0.d, z1.d, z0.d
1057; SVE-NEXT:    str d0, [x0]
1058; SVE-NEXT:    ret
1059;
1060; SVE2-LABEL: test_copysign_v4f16_v4f32:
1061; SVE2:       // %bb.0:
1062; SVE2-NEXT:    ptrue p0.s
1063; SVE2-NEXT:    ldr q0, [x1]
1064; SVE2-NEXT:    mov z1.h, #32767 // =0x7fff
1065; SVE2-NEXT:    ldr d2, [x0]
1066; SVE2-NEXT:    fcvt z0.h, p0/m, z0.s
1067; SVE2-NEXT:    uzp1 z0.h, z0.h, z0.h
1068; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
1069; SVE2-NEXT:    str d2, [x0]
1070; SVE2-NEXT:    ret
1071;
1072; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f32:
1073; NONEON-NOSVE:       // %bb.0:
1074; NONEON-NOSVE-NEXT:    sub sp, sp, #48
1075; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1076; NONEON-NOSVE-NEXT:    ldr d1, [x0]
1077; NONEON-NOSVE-NEXT:    ldr q0, [x1]
1078; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
1079; NONEON-NOSVE-NEXT:    str q0, [sp, #16]
1080; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
1081; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
1082; NONEON-NOSVE-NEXT:    fcvt s0, h0
1083; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1084; NONEON-NOSVE-NEXT:    fabs s0, s0
1085; NONEON-NOSVE-NEXT:    fneg s1, s0
1086; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1087; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1088; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
1089; NONEON-NOSVE-NEXT:    fcvt h0, s0
1090; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
1091; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
1092; NONEON-NOSVE-NEXT:    fcvt s0, h0
1093; NONEON-NOSVE-NEXT:    fabs s0, s0
1094; NONEON-NOSVE-NEXT:    fneg s1, s0
1095; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1096; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1097; NONEON-NOSVE-NEXT:    fcvt h0, s0
1098; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
1099; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
1100; NONEON-NOSVE-NEXT:    fcvt s0, h0
1101; NONEON-NOSVE-NEXT:    fabs s0, s0
1102; NONEON-NOSVE-NEXT:    fneg s1, s0
1103; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1104; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1105; NONEON-NOSVE-NEXT:    fcvt h0, s0
1106; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
1107; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
1108; NONEON-NOSVE-NEXT:    fcvt s0, h0
1109; NONEON-NOSVE-NEXT:    fabs s0, s0
1110; NONEON-NOSVE-NEXT:    fneg s1, s0
1111; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1112; NONEON-NOSVE-NEXT:    fcvt h0, s0
1113; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
1114; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
1115; NONEON-NOSVE-NEXT:    str d0, [x0]
1116; NONEON-NOSVE-NEXT:    add sp, sp, #48
1117; NONEON-NOSVE-NEXT:    ret
1118  %a = load <4 x half>, ptr %ap
1119  %b = load <4 x float>, ptr %bp
1120  %tmp0 = fptrunc <4 x float> %b to <4 x half>
1121  %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
1122  store <4 x half> %r, ptr %ap
1123  ret void
1124}
1125
1126define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
1127; SVE-LABEL: test_copysign_v4f16_v4f64:
1128; SVE:       // %bb.0:
1129; SVE-NEXT:    ldp q0, q1, [x1]
1130; SVE-NEXT:    mov z2.d, z1.d[1]
1131; SVE-NEXT:    mov z3.d, z0.d[1]
1132; SVE-NEXT:    fcvt h1, d1
1133; SVE-NEXT:    fcvt h0, d0
1134; SVE-NEXT:    fcvt h2, d2
1135; SVE-NEXT:    fcvt h3, d3
1136; SVE-NEXT:    zip1 z1.h, z1.h, z2.h
1137; SVE-NEXT:    zip1 z0.h, z0.h, z3.h
1138; SVE-NEXT:    zip1 z0.s, z0.s, z1.s
1139; SVE-NEXT:    ldr d1, [x0]
1140; SVE-NEXT:    and z1.h, z1.h, #0x7fff
1141; SVE-NEXT:    and z0.h, z0.h, #0x8000
1142; SVE-NEXT:    orr z0.d, z1.d, z0.d
1143; SVE-NEXT:    str d0, [x0]
1144; SVE-NEXT:    ret
1145;
1146; SVE2-LABEL: test_copysign_v4f16_v4f64:
1147; SVE2:       // %bb.0:
1148; SVE2-NEXT:    ldp q0, q1, [x1]
1149; SVE2-NEXT:    mov z2.d, z1.d[1]
1150; SVE2-NEXT:    mov z3.d, z0.d[1]
1151; SVE2-NEXT:    fcvt h1, d1
1152; SVE2-NEXT:    fcvt h0, d0
1153; SVE2-NEXT:    fcvt h2, d2
1154; SVE2-NEXT:    fcvt h3, d3
1155; SVE2-NEXT:    zip1 z1.h, z1.h, z2.h
1156; SVE2-NEXT:    zip1 z0.h, z0.h, z3.h
1157; SVE2-NEXT:    mov z2.h, #32767 // =0x7fff
1158; SVE2-NEXT:    zip1 z0.s, z0.s, z1.s
1159; SVE2-NEXT:    ldr d1, [x0]
1160; SVE2-NEXT:    bsl z1.d, z1.d, z0.d, z2.d
1161; SVE2-NEXT:    str d1, [x0]
1162; SVE2-NEXT:    ret
1163;
1164; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64:
1165; NONEON-NOSVE:       // %bb.0:
1166; NONEON-NOSVE-NEXT:    sub sp, sp, #64
1167; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
1168; NONEON-NOSVE-NEXT:    ldp q1, q0, [x1]
1169; NONEON-NOSVE-NEXT:    ldr d2, [x0]
1170; NONEON-NOSVE-NEXT:    str d2, [sp, #8]
1171; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
1172; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
1173; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #32]
1174; NONEON-NOSVE-NEXT:    fcvt s0, h0
1175; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
1176; NONEON-NOSVE-NEXT:    fabs s0, s0
1177; NONEON-NOSVE-NEXT:    fneg s1, s0
1178; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1179; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
1180; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
1181; NONEON-NOSVE-NEXT:    fcvt h0, s0
1182; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
1183; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
1184; NONEON-NOSVE-NEXT:    fcvt s0, h0
1185; NONEON-NOSVE-NEXT:    fabs s0, s0
1186; NONEON-NOSVE-NEXT:    fneg s1, s0
1187; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1188; NONEON-NOSVE-NEXT:    tst x9, #0x8000000000000000
1189; NONEON-NOSVE-NEXT:    fcvt h0, s0
1190; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
1191; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
1192; NONEON-NOSVE-NEXT:    fcvt s0, h0
1193; NONEON-NOSVE-NEXT:    fabs s0, s0
1194; NONEON-NOSVE-NEXT:    fneg s1, s0
1195; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1196; NONEON-NOSVE-NEXT:    tst x8, #0x8000000000000000
1197; NONEON-NOSVE-NEXT:    fcvt h0, s0
1198; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
1199; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
1200; NONEON-NOSVE-NEXT:    fcvt s0, h0
1201; NONEON-NOSVE-NEXT:    fabs s0, s0
1202; NONEON-NOSVE-NEXT:    fneg s1, s0
1203; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1204; NONEON-NOSVE-NEXT:    fcvt h0, s0
1205; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
1206; NONEON-NOSVE-NEXT:    ldr d0, [sp, #56]
1207; NONEON-NOSVE-NEXT:    str d0, [x0]
1208; NONEON-NOSVE-NEXT:    add sp, sp, #64
1209; NONEON-NOSVE-NEXT:    ret
1210  %a = load <4 x half>, ptr %ap
1211  %b = load <4 x double>, ptr %bp
1212  %tmp0 = fptrunc <4 x double> %b to <4 x half>
1213  %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0)
1214  store <4 x half> %r, ptr %ap
1215  ret void
1216}
1217
1218;============ v8f16
1219
1220define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
1221; SVE-LABEL: test_copysign_v8f16_v8f32:
1222; SVE:       // %bb.0:
1223; SVE-NEXT:    ldp q0, q1, [x1]
1224; SVE-NEXT:    ptrue p0.s
1225; SVE-NEXT:    fcvt z1.h, p0/m, z1.s
1226; SVE-NEXT:    fcvt z0.h, p0/m, z0.s
1227; SVE-NEXT:    ptrue p0.h, vl4
1228; SVE-NEXT:    uzp1 z1.h, z1.h, z1.h
1229; SVE-NEXT:    uzp1 z0.h, z0.h, z0.h
1230; SVE-NEXT:    splice z0.h, p0, z0.h, z1.h
1231; SVE-NEXT:    ldr q1, [x0]
1232; SVE-NEXT:    and z1.h, z1.h, #0x7fff
1233; SVE-NEXT:    and z0.h, z0.h, #0x8000
1234; SVE-NEXT:    orr z0.d, z1.d, z0.d
1235; SVE-NEXT:    str q0, [x0]
1236; SVE-NEXT:    ret
1237;
1238; SVE2-LABEL: test_copysign_v8f16_v8f32:
1239; SVE2:       // %bb.0:
1240; SVE2-NEXT:    ldp q1, q0, [x1]
1241; SVE2-NEXT:    ptrue p0.s
1242; SVE2-NEXT:    fcvt z0.h, p0/m, z0.s
1243; SVE2-NEXT:    fcvt z1.h, p0/m, z1.s
1244; SVE2-NEXT:    ptrue p0.h, vl4
1245; SVE2-NEXT:    uzp1 z3.h, z0.h, z0.h
1246; SVE2-NEXT:    uzp1 z2.h, z1.h, z1.h
1247; SVE2-NEXT:    mov z1.h, #32767 // =0x7fff
1248; SVE2-NEXT:    splice z0.h, p0, { z2.h, z3.h }
1249; SVE2-NEXT:    ldr q2, [x0]
1250; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
1251; SVE2-NEXT:    str q2, [x0]
1252; SVE2-NEXT:    ret
1253;
1254; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f32:
1255; NONEON-NOSVE:       // %bb.0:
1256; NONEON-NOSVE-NEXT:    sub sp, sp, #64
1257; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
1258; NONEON-NOSVE-NEXT:    ldp q1, q0, [x1]
1259; NONEON-NOSVE-NEXT:    ldr q2, [x0]
1260; NONEON-NOSVE-NEXT:    str q2, [sp]
1261; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
1262; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
1263; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
1264; NONEON-NOSVE-NEXT:    fcvt s0, h0
1265; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1266; NONEON-NOSVE-NEXT:    fabs s0, s0
1267; NONEON-NOSVE-NEXT:    fneg s1, s0
1268; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1269; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1270; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
1271; NONEON-NOSVE-NEXT:    fcvt h0, s0
1272; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
1273; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
1274; NONEON-NOSVE-NEXT:    fcvt s0, h0
1275; NONEON-NOSVE-NEXT:    fabs s0, s0
1276; NONEON-NOSVE-NEXT:    fneg s1, s0
1277; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1278; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1279; NONEON-NOSVE-NEXT:    fcvt h0, s0
1280; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
1281; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
1282; NONEON-NOSVE-NEXT:    fcvt s0, h0
1283; NONEON-NOSVE-NEXT:    fabs s0, s0
1284; NONEON-NOSVE-NEXT:    fneg s1, s0
1285; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1286; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1287; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
1288; NONEON-NOSVE-NEXT:    fcvt h0, s0
1289; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
1290; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
1291; NONEON-NOSVE-NEXT:    fcvt s0, h0
1292; NONEON-NOSVE-NEXT:    fabs s0, s0
1293; NONEON-NOSVE-NEXT:    fneg s1, s0
1294; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1295; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1296; NONEON-NOSVE-NEXT:    fcvt h0, s0
1297; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
1298; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
1299; NONEON-NOSVE-NEXT:    fcvt s0, h0
1300; NONEON-NOSVE-NEXT:    fabs s0, s0
1301; NONEON-NOSVE-NEXT:    fneg s1, s0
1302; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1303; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1304; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
1305; NONEON-NOSVE-NEXT:    fcvt h0, s0
1306; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
1307; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
1308; NONEON-NOSVE-NEXT:    fcvt s0, h0
1309; NONEON-NOSVE-NEXT:    fabs s0, s0
1310; NONEON-NOSVE-NEXT:    fneg s1, s0
1311; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1312; NONEON-NOSVE-NEXT:    tst w9, #0x80000000
1313; NONEON-NOSVE-NEXT:    fcvt h0, s0
1314; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
1315; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
1316; NONEON-NOSVE-NEXT:    fcvt s0, h0
1317; NONEON-NOSVE-NEXT:    fabs s0, s0
1318; NONEON-NOSVE-NEXT:    fneg s1, s0
1319; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1320; NONEON-NOSVE-NEXT:    tst w8, #0x80000000
1321; NONEON-NOSVE-NEXT:    fcvt h0, s0
1322; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
1323; NONEON-NOSVE-NEXT:    ldr h0, [sp]
1324; NONEON-NOSVE-NEXT:    fcvt s0, h0
1325; NONEON-NOSVE-NEXT:    fabs s0, s0
1326; NONEON-NOSVE-NEXT:    fneg s1, s0
1327; NONEON-NOSVE-NEXT:    fcsel s0, s1, s0, ne
1328; NONEON-NOSVE-NEXT:    fcvt h0, s0
1329; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
1330; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
1331; NONEON-NOSVE-NEXT:    str q0, [x0]
1332; NONEON-NOSVE-NEXT:    add sp, sp, #64
1333; NONEON-NOSVE-NEXT:    ret
1334  %a = load <8 x half>, ptr %ap
1335  %b = load <8 x float>, ptr %bp
1336  %tmp0 = fptrunc <8 x float> %b to <8 x half>
1337  %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0)
1338  store <8 x half> %r, ptr %ap
1339  ret void
1340}
1341
1342declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0
1343declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0
1344declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0
1345
1346declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
1347declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
1348declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
1349
1350declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
1351declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
1352