xref: /llvm-project/llvm/test/CodeGen/AArch64/fsincos.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5define double @sin_f64(double %a) {
6; CHECK-LABEL: sin_f64:
7; CHECK:       // %bb.0: // %entry
8; CHECK-NEXT:    b sin
9entry:
10  %c = call double @llvm.sin.f64(double %a)
11  ret double %c
12}
13
14define float @sin_f32(float %a) {
15; CHECK-LABEL: sin_f32:
16; CHECK:       // %bb.0: // %entry
17; CHECK-NEXT:    b sinf
18entry:
19  %c = call float @llvm.sin.f32(float %a)
20  ret float %c
21}
22
23define half @sin_f16(half %a) {
24; CHECK-LABEL: sin_f16:
25; CHECK:       // %bb.0: // %entry
26; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
27; CHECK-NEXT:    .cfi_def_cfa_offset 16
28; CHECK-NEXT:    .cfi_offset w30, -16
29; CHECK-NEXT:    fcvt s0, h0
30; CHECK-NEXT:    bl sinf
31; CHECK-NEXT:    fcvt h0, s0
32; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
33; CHECK-NEXT:    ret
34entry:
35  %c = call half @llvm.sin.f16(half %a)
36  ret half %c
37}
38
39define fp128 @sin_fp128(fp128 %a) {
40; CHECK-LABEL: sin_fp128:
41; CHECK:       // %bb.0: // %entry
42; CHECK-NEXT:    b sinl
43entry:
44  %c = call fp128 @llvm.sin.fp128(fp128 %a)
45  ret fp128 %c
46}
47
48define <1 x double> @sin_v1f64(<1 x double> %x) {
49; CHECK-LABEL: sin_v1f64:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
52; CHECK-NEXT:    .cfi_def_cfa_offset 16
53; CHECK-NEXT:    .cfi_offset w30, -16
54; CHECK-NEXT:    bl sin
55; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
56; CHECK-NEXT:    ret
57  %c = call <1 x double> @llvm.sin.v1f64(<1 x double> %x)
58  ret <1 x double> %c
59}
60
61define <2 x double> @sin_v2f64(<2 x double> %a) {
62; CHECK-SD-LABEL: sin_v2f64:
63; CHECK-SD:       // %bb.0: // %entry
64; CHECK-SD-NEXT:    sub sp, sp, #48
65; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
66; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
67; CHECK-SD-NEXT:    .cfi_offset w30, -16
68; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
69; CHECK-SD-NEXT:    mov d0, v0.d[1]
70; CHECK-SD-NEXT:    bl sin
71; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
72; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
73; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
74; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
75; CHECK-SD-NEXT:    bl sin
76; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
77; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
78; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
79; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
80; CHECK-SD-NEXT:    add sp, sp, #48
81; CHECK-SD-NEXT:    ret
82;
83; CHECK-GI-LABEL: sin_v2f64:
84; CHECK-GI:       // %bb.0: // %entry
85; CHECK-GI-NEXT:    sub sp, sp, #32
86; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
87; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
88; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
89; CHECK-GI-NEXT:    .cfi_offset w30, -8
90; CHECK-GI-NEXT:    .cfi_offset b8, -16
91; CHECK-GI-NEXT:    mov d8, v0.d[1]
92; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
93; CHECK-GI-NEXT:    bl sin
94; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
95; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
96; CHECK-GI-NEXT:    fmov d0, d8
97; CHECK-GI-NEXT:    bl sin
98; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
99; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
100; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
101; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
102; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
103; CHECK-GI-NEXT:    mov v0.16b, v1.16b
104; CHECK-GI-NEXT:    add sp, sp, #32
105; CHECK-GI-NEXT:    ret
106entry:
107  %c = call <2 x double> @llvm.sin.v2f64(<2 x double> %a)
108  ret <2 x double> %c
109}
110
111define <3 x double> @sin_v3f64(<3 x double> %a) {
112; CHECK-SD-LABEL: sin_v3f64:
113; CHECK-SD:       // %bb.0: // %entry
114; CHECK-SD-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
115; CHECK-SD-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
116; CHECK-SD-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
117; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
118; CHECK-SD-NEXT:    .cfi_offset w30, -8
119; CHECK-SD-NEXT:    .cfi_offset b8, -16
120; CHECK-SD-NEXT:    .cfi_offset b9, -24
121; CHECK-SD-NEXT:    .cfi_offset b10, -32
122; CHECK-SD-NEXT:    fmov d8, d2
123; CHECK-SD-NEXT:    fmov d9, d1
124; CHECK-SD-NEXT:    bl sin
125; CHECK-SD-NEXT:    fmov d10, d0
126; CHECK-SD-NEXT:    fmov d0, d9
127; CHECK-SD-NEXT:    bl sin
128; CHECK-SD-NEXT:    fmov d9, d0
129; CHECK-SD-NEXT:    fmov d0, d8
130; CHECK-SD-NEXT:    bl sin
131; CHECK-SD-NEXT:    fmov d1, d9
132; CHECK-SD-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
133; CHECK-SD-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
134; CHECK-SD-NEXT:    fmov d2, d0
135; CHECK-SD-NEXT:    fmov d0, d10
136; CHECK-SD-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
137; CHECK-SD-NEXT:    ret
138;
139; CHECK-GI-LABEL: sin_v3f64:
140; CHECK-GI:       // %bb.0: // %entry
141; CHECK-GI-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
142; CHECK-GI-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
143; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
144; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
145; CHECK-GI-NEXT:    .cfi_offset w30, -8
146; CHECK-GI-NEXT:    .cfi_offset b8, -16
147; CHECK-GI-NEXT:    .cfi_offset b9, -24
148; CHECK-GI-NEXT:    .cfi_offset b10, -32
149; CHECK-GI-NEXT:    fmov d8, d1
150; CHECK-GI-NEXT:    fmov d9, d2
151; CHECK-GI-NEXT:    bl sin
152; CHECK-GI-NEXT:    fmov d10, d0
153; CHECK-GI-NEXT:    fmov d0, d8
154; CHECK-GI-NEXT:    bl sin
155; CHECK-GI-NEXT:    fmov d8, d0
156; CHECK-GI-NEXT:    fmov d0, d9
157; CHECK-GI-NEXT:    bl sin
158; CHECK-GI-NEXT:    fmov d1, d8
159; CHECK-GI-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
160; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
161; CHECK-GI-NEXT:    fmov d2, d0
162; CHECK-GI-NEXT:    fmov d0, d10
163; CHECK-GI-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
164; CHECK-GI-NEXT:    ret
165entry:
166  %c = call <3 x double> @llvm.sin.v3f64(<3 x double> %a)
167  ret <3 x double> %c
168}
169
170define <4 x double> @sin_v4f64(<4 x double> %a) {
171; CHECK-SD-LABEL: sin_v4f64:
172; CHECK-SD:       // %bb.0: // %entry
173; CHECK-SD-NEXT:    sub sp, sp, #64
174; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
175; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
176; CHECK-SD-NEXT:    .cfi_offset w30, -16
177; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
178; CHECK-SD-NEXT:    mov d0, v0.d[1]
179; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
180; CHECK-SD-NEXT:    bl sin
181; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
182; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
183; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
184; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
185; CHECK-SD-NEXT:    bl sin
186; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
187; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
188; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
189; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
190; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
191; CHECK-SD-NEXT:    mov d0, v0.d[1]
192; CHECK-SD-NEXT:    bl sin
193; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
194; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
195; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
196; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
197; CHECK-SD-NEXT:    bl sin
198; CHECK-SD-NEXT:    fmov d1, d0
199; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
200; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
201; CHECK-SD-NEXT:    mov v1.d[1], v2.d[0]
202; CHECK-SD-NEXT:    add sp, sp, #64
203; CHECK-SD-NEXT:    ret
204;
205; CHECK-GI-LABEL: sin_v4f64:
206; CHECK-GI:       // %bb.0: // %entry
207; CHECK-GI-NEXT:    sub sp, sp, #80
208; CHECK-GI-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
209; CHECK-GI-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
210; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
211; CHECK-GI-NEXT:    .cfi_offset w30, -16
212; CHECK-GI-NEXT:    .cfi_offset b8, -24
213; CHECK-GI-NEXT:    .cfi_offset b9, -32
214; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
215; CHECK-GI-NEXT:    mov d8, v0.d[1]
216; CHECK-GI-NEXT:    mov d9, v1.d[1]
217; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
218; CHECK-GI-NEXT:    bl sin
219; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
220; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
221; CHECK-GI-NEXT:    fmov d0, d8
222; CHECK-GI-NEXT:    bl sin
223; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
224; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
225; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
226; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
227; CHECK-GI-NEXT:    bl sin
228; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
229; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
230; CHECK-GI-NEXT:    fmov d0, d9
231; CHECK-GI-NEXT:    bl sin
232; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
233; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
234; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
235; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
236; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
237; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
238; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
239; CHECK-GI-NEXT:    mov v0.16b, v2.16b
240; CHECK-GI-NEXT:    add sp, sp, #80
241; CHECK-GI-NEXT:    ret
242entry:
243  %c = call <4 x double> @llvm.sin.v4f64(<4 x double> %a)
244  ret <4 x double> %c
245}
246
247define <2 x float> @sin_v2f32(<2 x float> %a) {
248; CHECK-SD-LABEL: sin_v2f32:
249; CHECK-SD:       // %bb.0: // %entry
250; CHECK-SD-NEXT:    sub sp, sp, #48
251; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
252; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
253; CHECK-SD-NEXT:    .cfi_offset w30, -16
254; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
255; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
256; CHECK-SD-NEXT:    mov s0, v0.s[1]
257; CHECK-SD-NEXT:    bl sinf
258; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
259; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
260; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
261; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
262; CHECK-SD-NEXT:    bl sinf
263; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
264; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
265; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
266; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
267; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
268; CHECK-SD-NEXT:    add sp, sp, #48
269; CHECK-SD-NEXT:    ret
270;
271; CHECK-GI-LABEL: sin_v2f32:
272; CHECK-GI:       // %bb.0: // %entry
273; CHECK-GI-NEXT:    sub sp, sp, #32
274; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
275; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
276; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
277; CHECK-GI-NEXT:    .cfi_offset w30, -8
278; CHECK-GI-NEXT:    .cfi_offset b8, -16
279; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
280; CHECK-GI-NEXT:    mov s8, v0.s[1]
281; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
282; CHECK-GI-NEXT:    bl sinf
283; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
284; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
285; CHECK-GI-NEXT:    fmov s0, s8
286; CHECK-GI-NEXT:    bl sinf
287; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
288; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
289; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
290; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
291; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
292; CHECK-GI-NEXT:    fmov d0, d1
293; CHECK-GI-NEXT:    add sp, sp, #32
294; CHECK-GI-NEXT:    ret
295entry:
296  %c = call <2 x float> @llvm.sin.v2f32(<2 x float> %a)
297  ret <2 x float> %c
298}
299
300define <3 x float> @sin_v3f32(<3 x float> %a) {
301; CHECK-SD-LABEL: sin_v3f32:
302; CHECK-SD:       // %bb.0: // %entry
303; CHECK-SD-NEXT:    sub sp, sp, #48
304; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
305; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
306; CHECK-SD-NEXT:    .cfi_offset w30, -16
307; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
308; CHECK-SD-NEXT:    mov s0, v0.s[1]
309; CHECK-SD-NEXT:    bl sinf
310; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
311; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
312; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
313; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
314; CHECK-SD-NEXT:    bl sinf
315; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
316; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
317; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
318; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
319; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
320; CHECK-SD-NEXT:    mov s0, v0.s[2]
321; CHECK-SD-NEXT:    bl sinf
322; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
323; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
324; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
325; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
326; CHECK-SD-NEXT:    mov v0.16b, v1.16b
327; CHECK-SD-NEXT:    add sp, sp, #48
328; CHECK-SD-NEXT:    ret
329;
330; CHECK-GI-LABEL: sin_v3f32:
331; CHECK-GI:       // %bb.0: // %entry
332; CHECK-GI-NEXT:    sub sp, sp, #64
333; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
334; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
335; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
336; CHECK-GI-NEXT:    .cfi_offset w30, -16
337; CHECK-GI-NEXT:    .cfi_offset b8, -24
338; CHECK-GI-NEXT:    .cfi_offset b9, -32
339; CHECK-GI-NEXT:    mov s8, v0.s[1]
340; CHECK-GI-NEXT:    mov s9, v0.s[2]
341; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
342; CHECK-GI-NEXT:    bl sinf
343; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
344; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
345; CHECK-GI-NEXT:    fmov s0, s8
346; CHECK-GI-NEXT:    bl sinf
347; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
348; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
349; CHECK-GI-NEXT:    fmov s0, s9
350; CHECK-GI-NEXT:    bl sinf
351; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
352; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
353; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
354; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
355; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
356; CHECK-GI-NEXT:    mov v1.s[2], v0.s[0]
357; CHECK-GI-NEXT:    mov v0.16b, v1.16b
358; CHECK-GI-NEXT:    add sp, sp, #64
359; CHECK-GI-NEXT:    ret
360entry:
361  %c = call <3 x float> @llvm.sin.v3f32(<3 x float> %a)
362  ret <3 x float> %c
363}
364
365define <4 x float> @sin_v4f32(<4 x float> %a) {
366; CHECK-SD-LABEL: sin_v4f32:
367; CHECK-SD:       // %bb.0: // %entry
368; CHECK-SD-NEXT:    sub sp, sp, #48
369; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
370; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
371; CHECK-SD-NEXT:    .cfi_offset w30, -16
372; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
373; CHECK-SD-NEXT:    mov s0, v0.s[1]
374; CHECK-SD-NEXT:    bl sinf
375; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
376; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
377; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
378; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
379; CHECK-SD-NEXT:    bl sinf
380; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
381; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
382; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
383; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
384; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
385; CHECK-SD-NEXT:    mov s0, v0.s[2]
386; CHECK-SD-NEXT:    bl sinf
387; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
388; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
389; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
390; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
391; CHECK-SD-NEXT:    mov s0, v0.s[3]
392; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
393; CHECK-SD-NEXT:    bl sinf
394; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
395; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
396; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
397; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
398; CHECK-SD-NEXT:    mov v0.16b, v1.16b
399; CHECK-SD-NEXT:    add sp, sp, #48
400; CHECK-SD-NEXT:    ret
401;
402; CHECK-GI-LABEL: sin_v4f32:
403; CHECK-GI:       // %bb.0: // %entry
404; CHECK-GI-NEXT:    sub sp, sp, #80
405; CHECK-GI-NEXT:    str d10, [sp, #48] // 8-byte Folded Spill
406; CHECK-GI-NEXT:    stp d9, d8, [sp, #56] // 16-byte Folded Spill
407; CHECK-GI-NEXT:    str x30, [sp, #72] // 8-byte Folded Spill
408; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
409; CHECK-GI-NEXT:    .cfi_offset w30, -8
410; CHECK-GI-NEXT:    .cfi_offset b8, -16
411; CHECK-GI-NEXT:    .cfi_offset b9, -24
412; CHECK-GI-NEXT:    .cfi_offset b10, -32
413; CHECK-GI-NEXT:    mov s8, v0.s[1]
414; CHECK-GI-NEXT:    mov s9, v0.s[2]
415; CHECK-GI-NEXT:    mov s10, v0.s[3]
416; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
417; CHECK-GI-NEXT:    bl sinf
418; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
419; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
420; CHECK-GI-NEXT:    fmov s0, s8
421; CHECK-GI-NEXT:    bl sinf
422; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
423; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
424; CHECK-GI-NEXT:    fmov s0, s9
425; CHECK-GI-NEXT:    bl sinf
426; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
427; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
428; CHECK-GI-NEXT:    fmov s0, s10
429; CHECK-GI-NEXT:    bl sinf
430; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
431; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
432; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
433; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
434; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
435; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
436; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
437; CHECK-GI-NEXT:    mov v1.s[2], v2.s[0]
438; CHECK-GI-NEXT:    mov v1.s[3], v0.s[0]
439; CHECK-GI-NEXT:    mov v0.16b, v1.16b
440; CHECK-GI-NEXT:    add sp, sp, #80
441; CHECK-GI-NEXT:    ret
442entry:
443  %c = call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
444  ret <4 x float> %c
445}
446
447define <8 x float> @sin_v8f32(<8 x float> %a) {
448; CHECK-SD-LABEL: sin_v8f32:
449; CHECK-SD:       // %bb.0: // %entry
450; CHECK-SD-NEXT:    sub sp, sp, #64
451; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
452; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
453; CHECK-SD-NEXT:    .cfi_offset w30, -16
454; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
455; CHECK-SD-NEXT:    mov s0, v0.s[1]
456; CHECK-SD-NEXT:    bl sinf
457; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
458; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
459; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
460; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
461; CHECK-SD-NEXT:    bl sinf
462; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
463; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
464; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
465; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
466; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
467; CHECK-SD-NEXT:    mov s0, v0.s[2]
468; CHECK-SD-NEXT:    bl sinf
469; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
470; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
471; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
472; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
473; CHECK-SD-NEXT:    mov s0, v0.s[3]
474; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
475; CHECK-SD-NEXT:    bl sinf
476; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
477; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
478; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
479; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
480; CHECK-SD-NEXT:    mov s0, v0.s[1]
481; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
482; CHECK-SD-NEXT:    bl sinf
483; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
484; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
485; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
486; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
487; CHECK-SD-NEXT:    bl sinf
488; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
489; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
490; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
491; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
492; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
493; CHECK-SD-NEXT:    mov s0, v0.s[2]
494; CHECK-SD-NEXT:    bl sinf
495; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
496; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
497; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
498; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
499; CHECK-SD-NEXT:    mov s0, v0.s[3]
500; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
501; CHECK-SD-NEXT:    bl sinf
502; CHECK-SD-NEXT:    fmov s2, s0
503; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
504; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
505; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
506; CHECK-SD-NEXT:    mov v1.s[3], v2.s[0]
507; CHECK-SD-NEXT:    add sp, sp, #64
508; CHECK-SD-NEXT:    ret
509;
510; CHECK-GI-LABEL: sin_v8f32:
511; CHECK-GI:       // %bb.0: // %entry
512; CHECK-GI-NEXT:    sub sp, sp, #176
513; CHECK-GI-NEXT:    stp d13, d12, [sp, #112] // 16-byte Folded Spill
514; CHECK-GI-NEXT:    stp d11, d10, [sp, #128] // 16-byte Folded Spill
515; CHECK-GI-NEXT:    stp d9, d8, [sp, #144] // 16-byte Folded Spill
516; CHECK-GI-NEXT:    str x30, [sp, #160] // 8-byte Folded Spill
517; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
518; CHECK-GI-NEXT:    .cfi_offset w30, -16
519; CHECK-GI-NEXT:    .cfi_offset b8, -24
520; CHECK-GI-NEXT:    .cfi_offset b9, -32
521; CHECK-GI-NEXT:    .cfi_offset b10, -40
522; CHECK-GI-NEXT:    .cfi_offset b11, -48
523; CHECK-GI-NEXT:    .cfi_offset b12, -56
524; CHECK-GI-NEXT:    .cfi_offset b13, -64
525; CHECK-GI-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
526; CHECK-GI-NEXT:    mov s8, v0.s[1]
527; CHECK-GI-NEXT:    mov s9, v0.s[2]
528; CHECK-GI-NEXT:    mov s10, v0.s[3]
529; CHECK-GI-NEXT:    mov s11, v1.s[1]
530; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
531; CHECK-GI-NEXT:    mov s12, v1.s[2]
532; CHECK-GI-NEXT:    mov s13, v1.s[3]
533; CHECK-GI-NEXT:    bl sinf
534; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
535; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
536; CHECK-GI-NEXT:    fmov s0, s8
537; CHECK-GI-NEXT:    bl sinf
538; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
539; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
540; CHECK-GI-NEXT:    fmov s0, s9
541; CHECK-GI-NEXT:    bl sinf
542; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
543; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
544; CHECK-GI-NEXT:    fmov s0, s10
545; CHECK-GI-NEXT:    bl sinf
546; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
547; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
548; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
549; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
550; CHECK-GI-NEXT:    bl sinf
551; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
552; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
553; CHECK-GI-NEXT:    fmov s0, s11
554; CHECK-GI-NEXT:    bl sinf
555; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
556; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
557; CHECK-GI-NEXT:    fmov s0, s12
558; CHECK-GI-NEXT:    bl sinf
559; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
560; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
561; CHECK-GI-NEXT:    fmov s0, s13
562; CHECK-GI-NEXT:    bl sinf
563; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
564; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
565; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
566; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
567; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
568; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
569; CHECK-GI-NEXT:    ldp q2, q3, [sp, #16] // 32-byte Folded Reload
570; CHECK-GI-NEXT:    ldp d13, d12, [sp, #112] // 16-byte Folded Reload
571; CHECK-GI-NEXT:    mov v3.s[1], v2.s[0]
572; CHECK-GI-NEXT:    ldr q2, [sp, #96] // 16-byte Folded Reload
573; CHECK-GI-NEXT:    mov v1.s[2], v2.s[0]
574; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
575; CHECK-GI-NEXT:    mov v3.s[2], v2.s[0]
576; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
577; CHECK-GI-NEXT:    mov v1.s[3], v2.s[0]
578; CHECK-GI-NEXT:    mov v3.s[3], v0.s[0]
579; CHECK-GI-NEXT:    mov v2.16b, v1.16b
580; CHECK-GI-NEXT:    mov v1.16b, v3.16b
581; CHECK-GI-NEXT:    mov v0.16b, v2.16b
582; CHECK-GI-NEXT:    add sp, sp, #176
583; CHECK-GI-NEXT:    ret
584entry:
585  %c = call <8 x float> @llvm.sin.v8f32(<8 x float> %a)
586  ret <8 x float> %c
587}
588
589define <7 x half> @sin_v7f16(<7 x half> %a) {
590; CHECK-SD-LABEL: sin_v7f16:
591; CHECK-SD:       // %bb.0: // %entry
592; CHECK-SD-NEXT:    sub sp, sp, #48
593; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
594; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
595; CHECK-SD-NEXT:    .cfi_offset w30, -16
596; CHECK-SD-NEXT:    mov h1, v0.h[1]
597; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
598; CHECK-SD-NEXT:    fcvt s0, h1
599; CHECK-SD-NEXT:    bl sinf
600; CHECK-SD-NEXT:    fcvt h0, s0
601; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
602; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
603; CHECK-SD-NEXT:    fcvt s0, h0
604; CHECK-SD-NEXT:    bl sinf
605; CHECK-SD-NEXT:    fcvt h0, s0
606; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
607; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
608; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
609; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
610; CHECK-SD-NEXT:    mov h0, v0.h[2]
611; CHECK-SD-NEXT:    fcvt s0, h0
612; CHECK-SD-NEXT:    bl sinf
613; CHECK-SD-NEXT:    fcvt h0, s0
614; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
615; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
616; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
617; CHECK-SD-NEXT:    mov h0, v0.h[3]
618; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
619; CHECK-SD-NEXT:    fcvt s0, h0
620; CHECK-SD-NEXT:    bl sinf
621; CHECK-SD-NEXT:    fcvt h0, s0
622; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
623; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
624; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
625; CHECK-SD-NEXT:    mov h0, v0.h[4]
626; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
627; CHECK-SD-NEXT:    fcvt s0, h0
628; CHECK-SD-NEXT:    bl sinf
629; CHECK-SD-NEXT:    fcvt h0, s0
630; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
631; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
632; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
633; CHECK-SD-NEXT:    mov h0, v0.h[5]
634; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
635; CHECK-SD-NEXT:    fcvt s0, h0
636; CHECK-SD-NEXT:    bl sinf
637; CHECK-SD-NEXT:    fcvt h0, s0
638; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
639; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
640; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
641; CHECK-SD-NEXT:    mov h0, v0.h[6]
642; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
643; CHECK-SD-NEXT:    fcvt s0, h0
644; CHECK-SD-NEXT:    bl sinf
645; CHECK-SD-NEXT:    fcvt h0, s0
646; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
647; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
648; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
649; CHECK-SD-NEXT:    mov h0, v0.h[7]
650; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
651; CHECK-SD-NEXT:    fcvt s0, h0
652; CHECK-SD-NEXT:    bl sinf
653; CHECK-SD-NEXT:    fcvt h1, s0
654; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
655; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
656; CHECK-SD-NEXT:    mov v0.h[7], v1.h[0]
657; CHECK-SD-NEXT:    add sp, sp, #48
658; CHECK-SD-NEXT:    ret
659;
660; CHECK-GI-LABEL: sin_v7f16:
661; CHECK-GI:       // %bb.0: // %entry
662; CHECK-GI-NEXT:    sub sp, sp, #160
663; CHECK-GI-NEXT:    stp d13, d12, [sp, #96] // 16-byte Folded Spill
664; CHECK-GI-NEXT:    stp d11, d10, [sp, #112] // 16-byte Folded Spill
665; CHECK-GI-NEXT:    stp d9, d8, [sp, #128] // 16-byte Folded Spill
666; CHECK-GI-NEXT:    str x30, [sp, #144] // 8-byte Folded Spill
667; CHECK-GI-NEXT:    .cfi_def_cfa_offset 160
668; CHECK-GI-NEXT:    .cfi_offset w30, -16
669; CHECK-GI-NEXT:    .cfi_offset b8, -24
670; CHECK-GI-NEXT:    .cfi_offset b9, -32
671; CHECK-GI-NEXT:    .cfi_offset b10, -40
672; CHECK-GI-NEXT:    .cfi_offset b11, -48
673; CHECK-GI-NEXT:    .cfi_offset b12, -56
674; CHECK-GI-NEXT:    .cfi_offset b13, -64
675; CHECK-GI-NEXT:    mov h8, v0.h[1]
676; CHECK-GI-NEXT:    mov h9, v0.h[2]
677; CHECK-GI-NEXT:    mov h10, v0.h[3]
678; CHECK-GI-NEXT:    mov h11, v0.h[4]
679; CHECK-GI-NEXT:    mov h12, v0.h[5]
680; CHECK-GI-NEXT:    mov h13, v0.h[6]
681; CHECK-GI-NEXT:    fcvt s0, h0
682; CHECK-GI-NEXT:    bl sinf
683; CHECK-GI-NEXT:    fcvt s1, h8
684; CHECK-GI-NEXT:    fcvt h0, s0
685; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
686; CHECK-GI-NEXT:    fmov s0, s1
687; CHECK-GI-NEXT:    bl sinf
688; CHECK-GI-NEXT:    fcvt s1, h9
689; CHECK-GI-NEXT:    fcvt h0, s0
690; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
691; CHECK-GI-NEXT:    fmov s0, s1
692; CHECK-GI-NEXT:    bl sinf
693; CHECK-GI-NEXT:    fcvt s1, h10
694; CHECK-GI-NEXT:    fcvt h0, s0
695; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
696; CHECK-GI-NEXT:    fmov s0, s1
697; CHECK-GI-NEXT:    bl sinf
698; CHECK-GI-NEXT:    fcvt s1, h11
699; CHECK-GI-NEXT:    fcvt h0, s0
700; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
701; CHECK-GI-NEXT:    fmov s0, s1
702; CHECK-GI-NEXT:    bl sinf
703; CHECK-GI-NEXT:    fcvt s1, h12
704; CHECK-GI-NEXT:    fcvt h0, s0
705; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
706; CHECK-GI-NEXT:    fmov s0, s1
707; CHECK-GI-NEXT:    bl sinf
708; CHECK-GI-NEXT:    fcvt s1, h13
709; CHECK-GI-NEXT:    fcvt h0, s0
710; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
711; CHECK-GI-NEXT:    fmov s0, s1
712; CHECK-GI-NEXT:    bl sinf
713; CHECK-GI-NEXT:    ldp q3, q2, [sp, #48] // 32-byte Folded Reload
714; CHECK-GI-NEXT:    fcvt h0, s0
715; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
716; CHECK-GI-NEXT:    ldp d9, d8, [sp, #128] // 16-byte Folded Reload
717; CHECK-GI-NEXT:    ldp d11, d10, [sp, #112] // 16-byte Folded Reload
718; CHECK-GI-NEXT:    ldr x30, [sp, #144] // 8-byte Folded Reload
719; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
720; CHECK-GI-NEXT:    ldp d13, d12, [sp, #96] // 16-byte Folded Reload
721; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
722; CHECK-GI-NEXT:    ldp q2, q3, [sp, #16] // 32-byte Folded Reload
723; CHECK-GI-NEXT:    mov v1.h[3], v3.h[0]
724; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
725; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
726; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
727; CHECK-GI-NEXT:    mov v1.h[6], v0.h[0]
728; CHECK-GI-NEXT:    mov v0.16b, v1.16b
729; CHECK-GI-NEXT:    add sp, sp, #160
730; CHECK-GI-NEXT:    ret
731entry:
732  %c = call <7 x half> @llvm.sin.v7f16(<7 x half> %a)
733  ret <7 x half> %c
734}
735
736define <4 x half> @sin_v4f16(<4 x half> %a) {
737; CHECK-SD-LABEL: sin_v4f16:
738; CHECK-SD:       // %bb.0: // %entry
739; CHECK-SD-NEXT:    sub sp, sp, #48
740; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
741; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
742; CHECK-SD-NEXT:    .cfi_offset w30, -16
743; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
744; CHECK-SD-NEXT:    mov h1, v0.h[1]
745; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
746; CHECK-SD-NEXT:    fcvt s0, h1
747; CHECK-SD-NEXT:    bl sinf
748; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
749; CHECK-SD-NEXT:    fcvt h0, s0
750; CHECK-SD-NEXT:    fcvt s1, h1
751; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
752; CHECK-SD-NEXT:    fmov s0, s1
753; CHECK-SD-NEXT:    bl sinf
754; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
755; CHECK-SD-NEXT:    fcvt h2, s0
756; CHECK-SD-NEXT:    mov h1, v1.h[2]
757; CHECK-SD-NEXT:    fcvt s0, h1
758; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
759; CHECK-SD-NEXT:    mov v2.h[1], v1.h[0]
760; CHECK-SD-NEXT:    str q2, [sp] // 16-byte Folded Spill
761; CHECK-SD-NEXT:    bl sinf
762; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
763; CHECK-SD-NEXT:    fcvt h2, s0
764; CHECK-SD-NEXT:    mov h1, v1.h[3]
765; CHECK-SD-NEXT:    fcvt s0, h1
766; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
767; CHECK-SD-NEXT:    mov v1.h[2], v2.h[0]
768; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
769; CHECK-SD-NEXT:    bl sinf
770; CHECK-SD-NEXT:    fcvt h1, s0
771; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
772; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
773; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
774; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
775; CHECK-SD-NEXT:    add sp, sp, #48
776; CHECK-SD-NEXT:    ret
777;
778; CHECK-GI-LABEL: sin_v4f16:
779; CHECK-GI:       // %bb.0: // %entry
780; CHECK-GI-NEXT:    sub sp, sp, #80
781; CHECK-GI-NEXT:    str d10, [sp, #48] // 8-byte Folded Spill
782; CHECK-GI-NEXT:    stp d9, d8, [sp, #56] // 16-byte Folded Spill
783; CHECK-GI-NEXT:    str x30, [sp, #72] // 8-byte Folded Spill
784; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
785; CHECK-GI-NEXT:    .cfi_offset w30, -8
786; CHECK-GI-NEXT:    .cfi_offset b8, -16
787; CHECK-GI-NEXT:    .cfi_offset b9, -24
788; CHECK-GI-NEXT:    .cfi_offset b10, -32
789; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
790; CHECK-GI-NEXT:    mov h8, v0.h[1]
791; CHECK-GI-NEXT:    mov h9, v0.h[2]
792; CHECK-GI-NEXT:    mov h10, v0.h[3]
793; CHECK-GI-NEXT:    fcvt s0, h0
794; CHECK-GI-NEXT:    bl sinf
795; CHECK-GI-NEXT:    fcvt s1, h8
796; CHECK-GI-NEXT:    fcvt h0, s0
797; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
798; CHECK-GI-NEXT:    fmov s0, s1
799; CHECK-GI-NEXT:    bl sinf
800; CHECK-GI-NEXT:    fcvt s1, h9
801; CHECK-GI-NEXT:    fcvt h0, s0
802; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
803; CHECK-GI-NEXT:    fmov s0, s1
804; CHECK-GI-NEXT:    bl sinf
805; CHECK-GI-NEXT:    fcvt s1, h10
806; CHECK-GI-NEXT:    fcvt h0, s0
807; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
808; CHECK-GI-NEXT:    fmov s0, s1
809; CHECK-GI-NEXT:    bl sinf
810; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
811; CHECK-GI-NEXT:    fcvt h0, s0
812; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
813; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
814; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
815; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
816; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
817; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
818; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
819; CHECK-GI-NEXT:    mov v0.16b, v1.16b
820; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
821; CHECK-GI-NEXT:    add sp, sp, #80
822; CHECK-GI-NEXT:    ret
823entry:
824  %c = call <4 x half> @llvm.sin.v4f16(<4 x half> %a)
825  ret <4 x half> %c
826}
827
828define <8 x half> @sin_v8f16(<8 x half> %a) {
829; CHECK-SD-LABEL: sin_v8f16:
830; CHECK-SD:       // %bb.0: // %entry
831; CHECK-SD-NEXT:    sub sp, sp, #48
832; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
833; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
834; CHECK-SD-NEXT:    .cfi_offset w30, -16
835; CHECK-SD-NEXT:    mov h1, v0.h[1]
836; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
837; CHECK-SD-NEXT:    fcvt s0, h1
838; CHECK-SD-NEXT:    bl sinf
839; CHECK-SD-NEXT:    fcvt h0, s0
840; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
841; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
842; CHECK-SD-NEXT:    fcvt s0, h0
843; CHECK-SD-NEXT:    bl sinf
844; CHECK-SD-NEXT:    fcvt h0, s0
845; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
846; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
847; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
848; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
849; CHECK-SD-NEXT:    mov h0, v0.h[2]
850; CHECK-SD-NEXT:    fcvt s0, h0
851; CHECK-SD-NEXT:    bl sinf
852; CHECK-SD-NEXT:    fcvt h0, s0
853; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
854; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
855; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
856; CHECK-SD-NEXT:    mov h0, v0.h[3]
857; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
858; CHECK-SD-NEXT:    fcvt s0, h0
859; CHECK-SD-NEXT:    bl sinf
860; CHECK-SD-NEXT:    fcvt h0, s0
861; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
862; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
863; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
864; CHECK-SD-NEXT:    mov h0, v0.h[4]
865; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
866; CHECK-SD-NEXT:    fcvt s0, h0
867; CHECK-SD-NEXT:    bl sinf
868; CHECK-SD-NEXT:    fcvt h0, s0
869; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
870; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
871; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
872; CHECK-SD-NEXT:    mov h0, v0.h[5]
873; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
874; CHECK-SD-NEXT:    fcvt s0, h0
875; CHECK-SD-NEXT:    bl sinf
876; CHECK-SD-NEXT:    fcvt h0, s0
877; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
878; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
879; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
880; CHECK-SD-NEXT:    mov h0, v0.h[6]
881; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
882; CHECK-SD-NEXT:    fcvt s0, h0
883; CHECK-SD-NEXT:    bl sinf
884; CHECK-SD-NEXT:    fcvt h0, s0
885; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
886; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
887; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
888; CHECK-SD-NEXT:    mov h0, v0.h[7]
889; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
890; CHECK-SD-NEXT:    fcvt s0, h0
891; CHECK-SD-NEXT:    bl sinf
892; CHECK-SD-NEXT:    fcvt h1, s0
893; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
894; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
895; CHECK-SD-NEXT:    mov v0.h[7], v1.h[0]
896; CHECK-SD-NEXT:    add sp, sp, #48
897; CHECK-SD-NEXT:    ret
898;
899; CHECK-GI-LABEL: sin_v8f16:
900; CHECK-GI:       // %bb.0: // %entry
901; CHECK-GI-NEXT:    sub sp, sp, #176
902; CHECK-GI-NEXT:    str d14, [sp, #112] // 8-byte Folded Spill
903; CHECK-GI-NEXT:    stp d13, d12, [sp, #120] // 16-byte Folded Spill
904; CHECK-GI-NEXT:    stp d11, d10, [sp, #136] // 16-byte Folded Spill
905; CHECK-GI-NEXT:    stp d9, d8, [sp, #152] // 16-byte Folded Spill
906; CHECK-GI-NEXT:    str x30, [sp, #168] // 8-byte Folded Spill
907; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
908; CHECK-GI-NEXT:    .cfi_offset w30, -8
909; CHECK-GI-NEXT:    .cfi_offset b8, -16
910; CHECK-GI-NEXT:    .cfi_offset b9, -24
911; CHECK-GI-NEXT:    .cfi_offset b10, -32
912; CHECK-GI-NEXT:    .cfi_offset b11, -40
913; CHECK-GI-NEXT:    .cfi_offset b12, -48
914; CHECK-GI-NEXT:    .cfi_offset b13, -56
915; CHECK-GI-NEXT:    .cfi_offset b14, -64
916; CHECK-GI-NEXT:    mov h8, v0.h[1]
917; CHECK-GI-NEXT:    mov h9, v0.h[2]
918; CHECK-GI-NEXT:    mov h10, v0.h[3]
919; CHECK-GI-NEXT:    mov h11, v0.h[4]
920; CHECK-GI-NEXT:    mov h12, v0.h[5]
921; CHECK-GI-NEXT:    mov h13, v0.h[6]
922; CHECK-GI-NEXT:    mov h14, v0.h[7]
923; CHECK-GI-NEXT:    fcvt s0, h0
924; CHECK-GI-NEXT:    bl sinf
925; CHECK-GI-NEXT:    fcvt s1, h8
926; CHECK-GI-NEXT:    fcvt h0, s0
927; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
928; CHECK-GI-NEXT:    fmov s0, s1
929; CHECK-GI-NEXT:    bl sinf
930; CHECK-GI-NEXT:    fcvt s1, h9
931; CHECK-GI-NEXT:    fcvt h0, s0
932; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
933; CHECK-GI-NEXT:    fmov s0, s1
934; CHECK-GI-NEXT:    bl sinf
935; CHECK-GI-NEXT:    fcvt s1, h10
936; CHECK-GI-NEXT:    fcvt h0, s0
937; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
938; CHECK-GI-NEXT:    fmov s0, s1
939; CHECK-GI-NEXT:    bl sinf
940; CHECK-GI-NEXT:    fcvt s1, h11
941; CHECK-GI-NEXT:    fcvt h0, s0
942; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
943; CHECK-GI-NEXT:    fmov s0, s1
944; CHECK-GI-NEXT:    bl sinf
945; CHECK-GI-NEXT:    fcvt s1, h12
946; CHECK-GI-NEXT:    fcvt h0, s0
947; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
948; CHECK-GI-NEXT:    fmov s0, s1
949; CHECK-GI-NEXT:    bl sinf
950; CHECK-GI-NEXT:    fcvt s1, h13
951; CHECK-GI-NEXT:    fcvt h0, s0
952; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
953; CHECK-GI-NEXT:    fmov s0, s1
954; CHECK-GI-NEXT:    bl sinf
955; CHECK-GI-NEXT:    fcvt s1, h14
956; CHECK-GI-NEXT:    fcvt h0, s0
957; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
958; CHECK-GI-NEXT:    fmov s0, s1
959; CHECK-GI-NEXT:    bl sinf
960; CHECK-GI-NEXT:    ldp q3, q2, [sp, #64] // 32-byte Folded Reload
961; CHECK-GI-NEXT:    fcvt h0, s0
962; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
963; CHECK-GI-NEXT:    ldp d9, d8, [sp, #152] // 16-byte Folded Reload
964; CHECK-GI-NEXT:    ldp d11, d10, [sp, #136] // 16-byte Folded Reload
965; CHECK-GI-NEXT:    ldr x30, [sp, #168] // 8-byte Folded Reload
966; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
967; CHECK-GI-NEXT:    ldr d14, [sp, #112] // 8-byte Folded Reload
968; CHECK-GI-NEXT:    ldp d13, d12, [sp, #120] // 16-byte Folded Reload
969; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
970; CHECK-GI-NEXT:    ldp q2, q3, [sp, #32] // 32-byte Folded Reload
971; CHECK-GI-NEXT:    mov v1.h[3], v3.h[0]
972; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
973; CHECK-GI-NEXT:    ldp q2, q3, [sp] // 32-byte Folded Reload
974; CHECK-GI-NEXT:    mov v1.h[5], v3.h[0]
975; CHECK-GI-NEXT:    mov v1.h[6], v2.h[0]
976; CHECK-GI-NEXT:    mov v1.h[7], v0.h[0]
977; CHECK-GI-NEXT:    mov v0.16b, v1.16b
978; CHECK-GI-NEXT:    add sp, sp, #176
979; CHECK-GI-NEXT:    ret
980entry:
981  %c = call <8 x half> @llvm.sin.v8f16(<8 x half> %a)
982  ret <8 x half> %c
983}
984
985define <16 x half> @sin_v16f16(<16 x half> %a) {
986; CHECK-SD-LABEL: sin_v16f16:
987; CHECK-SD:       // %bb.0: // %entry
988; CHECK-SD-NEXT:    sub sp, sp, #64
989; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
990; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
991; CHECK-SD-NEXT:    .cfi_offset w30, -16
992; CHECK-SD-NEXT:    stp q1, q0, [sp] // 32-byte Folded Spill
993; CHECK-SD-NEXT:    mov h1, v0.h[1]
994; CHECK-SD-NEXT:    fcvt s0, h1
995; CHECK-SD-NEXT:    bl sinf
996; CHECK-SD-NEXT:    fcvt h0, s0
997; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
998; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
999; CHECK-SD-NEXT:    fcvt s0, h0
1000; CHECK-SD-NEXT:    bl sinf
1001; CHECK-SD-NEXT:    fcvt h0, s0
1002; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1003; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
1004; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1005; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1006; CHECK-SD-NEXT:    mov h0, v0.h[2]
1007; CHECK-SD-NEXT:    fcvt s0, h0
1008; CHECK-SD-NEXT:    bl sinf
1009; CHECK-SD-NEXT:    fcvt h0, s0
1010; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1011; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
1012; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1013; CHECK-SD-NEXT:    mov h0, v0.h[3]
1014; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1015; CHECK-SD-NEXT:    fcvt s0, h0
1016; CHECK-SD-NEXT:    bl sinf
1017; CHECK-SD-NEXT:    fcvt h0, s0
1018; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1019; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
1020; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1021; CHECK-SD-NEXT:    mov h0, v0.h[4]
1022; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1023; CHECK-SD-NEXT:    fcvt s0, h0
1024; CHECK-SD-NEXT:    bl sinf
1025; CHECK-SD-NEXT:    fcvt h0, s0
1026; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1027; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
1028; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1029; CHECK-SD-NEXT:    mov h0, v0.h[5]
1030; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1031; CHECK-SD-NEXT:    fcvt s0, h0
1032; CHECK-SD-NEXT:    bl sinf
1033; CHECK-SD-NEXT:    fcvt h0, s0
1034; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1035; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
1036; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1037; CHECK-SD-NEXT:    mov h0, v0.h[6]
1038; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1039; CHECK-SD-NEXT:    fcvt s0, h0
1040; CHECK-SD-NEXT:    bl sinf
1041; CHECK-SD-NEXT:    fcvt h0, s0
1042; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1043; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
1044; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1045; CHECK-SD-NEXT:    mov h0, v0.h[7]
1046; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1047; CHECK-SD-NEXT:    fcvt s0, h0
1048; CHECK-SD-NEXT:    bl sinf
1049; CHECK-SD-NEXT:    fcvt h0, s0
1050; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1051; CHECK-SD-NEXT:    mov v1.h[7], v0.h[0]
1052; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1053; CHECK-SD-NEXT:    mov h0, v0.h[1]
1054; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1055; CHECK-SD-NEXT:    fcvt s0, h0
1056; CHECK-SD-NEXT:    bl sinf
1057; CHECK-SD-NEXT:    fcvt h0, s0
1058; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1059; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1060; CHECK-SD-NEXT:    fcvt s0, h0
1061; CHECK-SD-NEXT:    bl sinf
1062; CHECK-SD-NEXT:    fcvt h0, s0
1063; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1064; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
1065; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1066; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1067; CHECK-SD-NEXT:    mov h0, v0.h[2]
1068; CHECK-SD-NEXT:    fcvt s0, h0
1069; CHECK-SD-NEXT:    bl sinf
1070; CHECK-SD-NEXT:    fcvt h0, s0
1071; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1072; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
1073; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1074; CHECK-SD-NEXT:    mov h0, v0.h[3]
1075; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1076; CHECK-SD-NEXT:    fcvt s0, h0
1077; CHECK-SD-NEXT:    bl sinf
1078; CHECK-SD-NEXT:    fcvt h0, s0
1079; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1080; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
1081; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1082; CHECK-SD-NEXT:    mov h0, v0.h[4]
1083; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1084; CHECK-SD-NEXT:    fcvt s0, h0
1085; CHECK-SD-NEXT:    bl sinf
1086; CHECK-SD-NEXT:    fcvt h0, s0
1087; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1088; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
1089; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1090; CHECK-SD-NEXT:    mov h0, v0.h[5]
1091; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1092; CHECK-SD-NEXT:    fcvt s0, h0
1093; CHECK-SD-NEXT:    bl sinf
1094; CHECK-SD-NEXT:    fcvt h0, s0
1095; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1096; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
1097; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1098; CHECK-SD-NEXT:    mov h0, v0.h[6]
1099; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1100; CHECK-SD-NEXT:    fcvt s0, h0
1101; CHECK-SD-NEXT:    bl sinf
1102; CHECK-SD-NEXT:    fcvt h0, s0
1103; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1104; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
1105; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1106; CHECK-SD-NEXT:    mov h0, v0.h[7]
1107; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1108; CHECK-SD-NEXT:    fcvt s0, h0
1109; CHECK-SD-NEXT:    bl sinf
1110; CHECK-SD-NEXT:    fmov s1, s0
1111; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1112; CHECK-SD-NEXT:    fcvt h2, s1
1113; CHECK-SD-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
1114; CHECK-SD-NEXT:    mov v1.h[7], v2.h[0]
1115; CHECK-SD-NEXT:    add sp, sp, #64
1116; CHECK-SD-NEXT:    ret
1117;
1118; CHECK-GI-LABEL: sin_v16f16:
1119; CHECK-GI:       // %bb.0: // %entry
1120; CHECK-GI-NEXT:    sub sp, sp, #320
1121; CHECK-GI-NEXT:    stp d15, d14, [sp, #240] // 16-byte Folded Spill
1122; CHECK-GI-NEXT:    stp d13, d12, [sp, #256] // 16-byte Folded Spill
1123; CHECK-GI-NEXT:    stp d11, d10, [sp, #272] // 16-byte Folded Spill
1124; CHECK-GI-NEXT:    stp d9, d8, [sp, #288] // 16-byte Folded Spill
1125; CHECK-GI-NEXT:    stp x29, x30, [sp, #304] // 16-byte Folded Spill
1126; CHECK-GI-NEXT:    .cfi_def_cfa_offset 320
1127; CHECK-GI-NEXT:    .cfi_offset w30, -8
1128; CHECK-GI-NEXT:    .cfi_offset w29, -16
1129; CHECK-GI-NEXT:    .cfi_offset b8, -24
1130; CHECK-GI-NEXT:    .cfi_offset b9, -32
1131; CHECK-GI-NEXT:    .cfi_offset b10, -40
1132; CHECK-GI-NEXT:    .cfi_offset b11, -48
1133; CHECK-GI-NEXT:    .cfi_offset b12, -56
1134; CHECK-GI-NEXT:    .cfi_offset b13, -64
1135; CHECK-GI-NEXT:    .cfi_offset b14, -72
1136; CHECK-GI-NEXT:    .cfi_offset b15, -80
1137; CHECK-GI-NEXT:    mov v2.16b, v1.16b
1138; CHECK-GI-NEXT:    str q1, [sp, #80] // 16-byte Folded Spill
1139; CHECK-GI-NEXT:    mov h14, v1.h[1]
1140; CHECK-GI-NEXT:    mov h1, v1.h[2]
1141; CHECK-GI-NEXT:    mov h15, v0.h[1]
1142; CHECK-GI-NEXT:    mov h8, v0.h[2]
1143; CHECK-GI-NEXT:    mov h9, v0.h[3]
1144; CHECK-GI-NEXT:    mov h10, v0.h[4]
1145; CHECK-GI-NEXT:    mov h11, v0.h[5]
1146; CHECK-GI-NEXT:    mov h12, v0.h[6]
1147; CHECK-GI-NEXT:    mov h13, v0.h[7]
1148; CHECK-GI-NEXT:    fcvt s0, h0
1149; CHECK-GI-NEXT:    str h1, [sp, #16] // 2-byte Folded Spill
1150; CHECK-GI-NEXT:    mov h1, v2.h[3]
1151; CHECK-GI-NEXT:    str h1, [sp, #32] // 2-byte Folded Spill
1152; CHECK-GI-NEXT:    mov h1, v2.h[4]
1153; CHECK-GI-NEXT:    str h1, [sp, #48] // 2-byte Folded Spill
1154; CHECK-GI-NEXT:    mov h1, v2.h[5]
1155; CHECK-GI-NEXT:    str h1, [sp, #64] // 2-byte Folded Spill
1156; CHECK-GI-NEXT:    mov h1, v2.h[6]
1157; CHECK-GI-NEXT:    str h1, [sp, #96] // 2-byte Folded Spill
1158; CHECK-GI-NEXT:    mov h1, v2.h[7]
1159; CHECK-GI-NEXT:    str h1, [sp, #160] // 2-byte Folded Spill
1160; CHECK-GI-NEXT:    bl sinf
1161; CHECK-GI-NEXT:    fcvt s1, h15
1162; CHECK-GI-NEXT:    fcvt h0, s0
1163; CHECK-GI-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
1164; CHECK-GI-NEXT:    fmov s0, s1
1165; CHECK-GI-NEXT:    bl sinf
1166; CHECK-GI-NEXT:    fcvt s1, h8
1167; CHECK-GI-NEXT:    fcvt h0, s0
1168; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
1169; CHECK-GI-NEXT:    fmov s0, s1
1170; CHECK-GI-NEXT:    bl sinf
1171; CHECK-GI-NEXT:    fcvt s1, h9
1172; CHECK-GI-NEXT:    fcvt h0, s0
1173; CHECK-GI-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
1174; CHECK-GI-NEXT:    fmov s0, s1
1175; CHECK-GI-NEXT:    bl sinf
1176; CHECK-GI-NEXT:    fcvt s1, h10
1177; CHECK-GI-NEXT:    fcvt h0, s0
1178; CHECK-GI-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
1179; CHECK-GI-NEXT:    fmov s0, s1
1180; CHECK-GI-NEXT:    bl sinf
1181; CHECK-GI-NEXT:    fcvt s1, h11
1182; CHECK-GI-NEXT:    fcvt h0, s0
1183; CHECK-GI-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
1184; CHECK-GI-NEXT:    fmov s0, s1
1185; CHECK-GI-NEXT:    bl sinf
1186; CHECK-GI-NEXT:    fcvt s1, h12
1187; CHECK-GI-NEXT:    fcvt h0, s0
1188; CHECK-GI-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
1189; CHECK-GI-NEXT:    fmov s0, s1
1190; CHECK-GI-NEXT:    bl sinf
1191; CHECK-GI-NEXT:    fcvt s1, h13
1192; CHECK-GI-NEXT:    fcvt h0, s0
1193; CHECK-GI-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
1194; CHECK-GI-NEXT:    fmov s0, s1
1195; CHECK-GI-NEXT:    bl sinf
1196; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
1197; CHECK-GI-NEXT:    fcvt h0, s0
1198; CHECK-GI-NEXT:    fcvt s1, h1
1199; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
1200; CHECK-GI-NEXT:    fmov s0, s1
1201; CHECK-GI-NEXT:    bl sinf
1202; CHECK-GI-NEXT:    fcvt s1, h14
1203; CHECK-GI-NEXT:    fcvt h0, s0
1204; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1205; CHECK-GI-NEXT:    fmov s0, s1
1206; CHECK-GI-NEXT:    bl sinf
1207; CHECK-GI-NEXT:    ldr h1, [sp, #16] // 2-byte Folded Reload
1208; CHECK-GI-NEXT:    fcvt h0, s0
1209; CHECK-GI-NEXT:    fcvt s1, h1
1210; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1211; CHECK-GI-NEXT:    fmov s0, s1
1212; CHECK-GI-NEXT:    bl sinf
1213; CHECK-GI-NEXT:    ldr h1, [sp, #32] // 2-byte Folded Reload
1214; CHECK-GI-NEXT:    fcvt h0, s0
1215; CHECK-GI-NEXT:    fcvt s1, h1
1216; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1217; CHECK-GI-NEXT:    fmov s0, s1
1218; CHECK-GI-NEXT:    bl sinf
1219; CHECK-GI-NEXT:    ldr h1, [sp, #48] // 2-byte Folded Reload
1220; CHECK-GI-NEXT:    fcvt h0, s0
1221; CHECK-GI-NEXT:    fcvt s1, h1
1222; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
1223; CHECK-GI-NEXT:    fmov s0, s1
1224; CHECK-GI-NEXT:    bl sinf
1225; CHECK-GI-NEXT:    ldr h1, [sp, #64] // 2-byte Folded Reload
1226; CHECK-GI-NEXT:    fcvt h0, s0
1227; CHECK-GI-NEXT:    fcvt s1, h1
1228; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
1229; CHECK-GI-NEXT:    fmov s0, s1
1230; CHECK-GI-NEXT:    bl sinf
1231; CHECK-GI-NEXT:    ldr h1, [sp, #96] // 2-byte Folded Reload
1232; CHECK-GI-NEXT:    fcvt h0, s0
1233; CHECK-GI-NEXT:    fcvt s1, h1
1234; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
1235; CHECK-GI-NEXT:    fmov s0, s1
1236; CHECK-GI-NEXT:    bl sinf
1237; CHECK-GI-NEXT:    ldr h1, [sp, #160] // 2-byte Folded Reload
1238; CHECK-GI-NEXT:    fcvt h0, s0
1239; CHECK-GI-NEXT:    fcvt s1, h1
1240; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
1241; CHECK-GI-NEXT:    fmov s0, s1
1242; CHECK-GI-NEXT:    bl sinf
1243; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
1244; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
1245; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
1246; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
1247; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
1248; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
1249; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
1250; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
1251; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
1252; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
1253; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
1254; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
1255; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
1256; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
1257; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
1258; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
1259; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
1260; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]
1261; CHECK-GI-NEXT:    ldr q2, [sp, #176] // 16-byte Folded Reload
1262; CHECK-GI-NEXT:    mov v3.h[4], v2.h[0]
1263; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
1264; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
1265; CHECK-GI-NEXT:    ldr q2, [sp, #144] // 16-byte Folded Reload
1266; CHECK-GI-NEXT:    mov v3.h[5], v2.h[0]
1267; CHECK-GI-NEXT:    ldr q2, [sp, #96] // 16-byte Folded Reload
1268; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
1269; CHECK-GI-NEXT:    fcvt h2, s0
1270; CHECK-GI-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
1271; CHECK-GI-NEXT:    mov v3.h[6], v0.h[0]
1272; CHECK-GI-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
1273; CHECK-GI-NEXT:    mov v1.h[6], v0.h[0]
1274; CHECK-GI-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
1275; CHECK-GI-NEXT:    mov v3.h[7], v0.h[0]
1276; CHECK-GI-NEXT:    mov v1.h[7], v2.h[0]
1277; CHECK-GI-NEXT:    mov v0.16b, v3.16b
1278; CHECK-GI-NEXT:    add sp, sp, #320
1279; CHECK-GI-NEXT:    ret
1280entry:
1281  %c = call <16 x half> @llvm.sin.v16f16(<16 x half> %a)
1282  ret <16 x half> %c
1283}
1284
1285define <2 x fp128> @sin_v2fp128(<2 x fp128> %a) {
1286; CHECK-LABEL: sin_v2fp128:
1287; CHECK:       // %bb.0: // %entry
1288; CHECK-NEXT:    sub sp, sp, #48
1289; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
1290; CHECK-NEXT:    .cfi_def_cfa_offset 48
1291; CHECK-NEXT:    .cfi_offset w30, -16
1292; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1293; CHECK-NEXT:    bl sinl
1294; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
1295; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1296; CHECK-NEXT:    bl sinl
1297; CHECK-NEXT:    mov v1.16b, v0.16b
1298; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1299; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
1300; CHECK-NEXT:    add sp, sp, #48
1301; CHECK-NEXT:    ret
1302entry:
1303  %c = call <2 x fp128> @llvm.sin.v2fp128(<2 x fp128> %a)
1304  ret <2 x fp128> %c
1305}
1306
1307define double @cos_f64(double %a) {
1308; CHECK-LABEL: cos_f64:
1309; CHECK:       // %bb.0: // %entry
1310; CHECK-NEXT:    b cos
1311entry:
1312  %c = call double @llvm.cos.f64(double %a)
1313  ret double %c
1314}
1315
1316define float @cos_f32(float %a) {
1317; CHECK-LABEL: cos_f32:
1318; CHECK:       // %bb.0: // %entry
1319; CHECK-NEXT:    b cosf
1320entry:
1321  %c = call float @llvm.cos.f32(float %a)
1322  ret float %c
1323}
1324
1325define half @cos_f16(half %a) {
1326; CHECK-LABEL: cos_f16:
1327; CHECK:       // %bb.0: // %entry
1328; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1329; CHECK-NEXT:    .cfi_def_cfa_offset 16
1330; CHECK-NEXT:    .cfi_offset w30, -16
1331; CHECK-NEXT:    fcvt s0, h0
1332; CHECK-NEXT:    bl cosf
1333; CHECK-NEXT:    fcvt h0, s0
1334; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1335; CHECK-NEXT:    ret
1336entry:
1337  %c = call half @llvm.cos.f16(half %a)
1338  ret half %c
1339}
1340
1341define fp128 @cos_fp128(fp128 %a) {
1342; CHECK-LABEL: cos_fp128:
1343; CHECK:       // %bb.0: // %entry
1344; CHECK-NEXT:    b cosl
1345entry:
1346  %c = call fp128 @llvm.cos.fp128(fp128 %a)
1347  ret fp128 %c
1348}
1349
1350define <1 x double> @cos_v1f64(<1 x double> %x) {
1351; CHECK-LABEL: cos_v1f64:
1352; CHECK:       // %bb.0:
1353; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1354; CHECK-NEXT:    .cfi_def_cfa_offset 16
1355; CHECK-NEXT:    .cfi_offset w30, -16
1356; CHECK-NEXT:    bl cos
1357; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1358; CHECK-NEXT:    ret
1359  %c = call <1 x double> @llvm.cos.v1f64(<1 x double> %x)
1360  ret <1 x double> %c
1361}
1362
1363define <2 x double> @cos_v2f64(<2 x double> %a) {
1364; CHECK-SD-LABEL: cos_v2f64:
1365; CHECK-SD:       // %bb.0: // %entry
1366; CHECK-SD-NEXT:    sub sp, sp, #48
1367; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
1368; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
1369; CHECK-SD-NEXT:    .cfi_offset w30, -16
1370; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1371; CHECK-SD-NEXT:    mov d0, v0.d[1]
1372; CHECK-SD-NEXT:    bl cos
1373; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1374; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1375; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1376; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1377; CHECK-SD-NEXT:    bl cos
1378; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1379; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1380; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
1381; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
1382; CHECK-SD-NEXT:    add sp, sp, #48
1383; CHECK-SD-NEXT:    ret
1384;
1385; CHECK-GI-LABEL: cos_v2f64:
1386; CHECK-GI:       // %bb.0: // %entry
1387; CHECK-GI-NEXT:    sub sp, sp, #32
1388; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
1389; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
1390; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
1391; CHECK-GI-NEXT:    .cfi_offset w30, -8
1392; CHECK-GI-NEXT:    .cfi_offset b8, -16
1393; CHECK-GI-NEXT:    mov d8, v0.d[1]
1394; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1395; CHECK-GI-NEXT:    bl cos
1396; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1397; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1398; CHECK-GI-NEXT:    fmov d0, d8
1399; CHECK-GI-NEXT:    bl cos
1400; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1401; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1402; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
1403; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
1404; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
1405; CHECK-GI-NEXT:    mov v0.16b, v1.16b
1406; CHECK-GI-NEXT:    add sp, sp, #32
1407; CHECK-GI-NEXT:    ret
1408entry:
1409  %c = call <2 x double> @llvm.cos.v2f64(<2 x double> %a)
1410  ret <2 x double> %c
1411}
1412
1413define <3 x double> @cos_v3f64(<3 x double> %a) {
1414; CHECK-SD-LABEL: cos_v3f64:
1415; CHECK-SD:       // %bb.0: // %entry
1416; CHECK-SD-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
1417; CHECK-SD-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
1418; CHECK-SD-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
1419; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
1420; CHECK-SD-NEXT:    .cfi_offset w30, -8
1421; CHECK-SD-NEXT:    .cfi_offset b8, -16
1422; CHECK-SD-NEXT:    .cfi_offset b9, -24
1423; CHECK-SD-NEXT:    .cfi_offset b10, -32
1424; CHECK-SD-NEXT:    fmov d8, d2
1425; CHECK-SD-NEXT:    fmov d9, d1
1426; CHECK-SD-NEXT:    bl cos
1427; CHECK-SD-NEXT:    fmov d10, d0
1428; CHECK-SD-NEXT:    fmov d0, d9
1429; CHECK-SD-NEXT:    bl cos
1430; CHECK-SD-NEXT:    fmov d9, d0
1431; CHECK-SD-NEXT:    fmov d0, d8
1432; CHECK-SD-NEXT:    bl cos
1433; CHECK-SD-NEXT:    fmov d1, d9
1434; CHECK-SD-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
1435; CHECK-SD-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
1436; CHECK-SD-NEXT:    fmov d2, d0
1437; CHECK-SD-NEXT:    fmov d0, d10
1438; CHECK-SD-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
1439; CHECK-SD-NEXT:    ret
1440;
1441; CHECK-GI-LABEL: cos_v3f64:
1442; CHECK-GI:       // %bb.0: // %entry
1443; CHECK-GI-NEXT:    str d10, [sp, #-32]! // 8-byte Folded Spill
1444; CHECK-GI-NEXT:    stp d9, d8, [sp, #8] // 16-byte Folded Spill
1445; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
1446; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
1447; CHECK-GI-NEXT:    .cfi_offset w30, -8
1448; CHECK-GI-NEXT:    .cfi_offset b8, -16
1449; CHECK-GI-NEXT:    .cfi_offset b9, -24
1450; CHECK-GI-NEXT:    .cfi_offset b10, -32
1451; CHECK-GI-NEXT:    fmov d8, d1
1452; CHECK-GI-NEXT:    fmov d9, d2
1453; CHECK-GI-NEXT:    bl cos
1454; CHECK-GI-NEXT:    fmov d10, d0
1455; CHECK-GI-NEXT:    fmov d0, d8
1456; CHECK-GI-NEXT:    bl cos
1457; CHECK-GI-NEXT:    fmov d8, d0
1458; CHECK-GI-NEXT:    fmov d0, d9
1459; CHECK-GI-NEXT:    bl cos
1460; CHECK-GI-NEXT:    fmov d1, d8
1461; CHECK-GI-NEXT:    ldp d9, d8, [sp, #8] // 16-byte Folded Reload
1462; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
1463; CHECK-GI-NEXT:    fmov d2, d0
1464; CHECK-GI-NEXT:    fmov d0, d10
1465; CHECK-GI-NEXT:    ldr d10, [sp], #32 // 8-byte Folded Reload
1466; CHECK-GI-NEXT:    ret
1467entry:
1468  %c = call <3 x double> @llvm.cos.v3f64(<3 x double> %a)
1469  ret <3 x double> %c
1470}
1471
1472define <4 x double> @cos_v4f64(<4 x double> %a) {
1473; CHECK-SD-LABEL: cos_v4f64:
1474; CHECK-SD:       // %bb.0: // %entry
1475; CHECK-SD-NEXT:    sub sp, sp, #64
1476; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
1477; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
1478; CHECK-SD-NEXT:    .cfi_offset w30, -16
1479; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1480; CHECK-SD-NEXT:    mov d0, v0.d[1]
1481; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1482; CHECK-SD-NEXT:    bl cos
1483; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1484; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1485; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1486; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1487; CHECK-SD-NEXT:    bl cos
1488; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1489; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1490; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
1491; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1492; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
1493; CHECK-SD-NEXT:    mov d0, v0.d[1]
1494; CHECK-SD-NEXT:    bl cos
1495; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1496; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1497; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
1498; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1499; CHECK-SD-NEXT:    bl cos
1500; CHECK-SD-NEXT:    fmov d1, d0
1501; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
1502; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1503; CHECK-SD-NEXT:    mov v1.d[1], v2.d[0]
1504; CHECK-SD-NEXT:    add sp, sp, #64
1505; CHECK-SD-NEXT:    ret
1506;
1507; CHECK-GI-LABEL: cos_v4f64:
1508; CHECK-GI:       // %bb.0: // %entry
1509; CHECK-GI-NEXT:    sub sp, sp, #80
1510; CHECK-GI-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
1511; CHECK-GI-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
1512; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
1513; CHECK-GI-NEXT:    .cfi_offset w30, -16
1514; CHECK-GI-NEXT:    .cfi_offset b8, -24
1515; CHECK-GI-NEXT:    .cfi_offset b9, -32
1516; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
1517; CHECK-GI-NEXT:    mov d8, v0.d[1]
1518; CHECK-GI-NEXT:    mov d9, v1.d[1]
1519; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1520; CHECK-GI-NEXT:    bl cos
1521; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1522; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1523; CHECK-GI-NEXT:    fmov d0, d8
1524; CHECK-GI-NEXT:    bl cos
1525; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1526; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1527; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1528; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1529; CHECK-GI-NEXT:    bl cos
1530; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1531; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1532; CHECK-GI-NEXT:    fmov d0, d9
1533; CHECK-GI-NEXT:    bl cos
1534; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
1535; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1536; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
1537; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1538; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
1539; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1540; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
1541; CHECK-GI-NEXT:    mov v0.16b, v2.16b
1542; CHECK-GI-NEXT:    add sp, sp, #80
1543; CHECK-GI-NEXT:    ret
1544entry:
1545  %c = call <4 x double> @llvm.cos.v4f64(<4 x double> %a)
1546  ret <4 x double> %c
1547}
1548
1549define <2 x float> @cos_v2f32(<2 x float> %a) {
1550; CHECK-SD-LABEL: cos_v2f32:
1551; CHECK-SD:       // %bb.0: // %entry
1552; CHECK-SD-NEXT:    sub sp, sp, #48
1553; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
1554; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
1555; CHECK-SD-NEXT:    .cfi_offset w30, -16
1556; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1557; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1558; CHECK-SD-NEXT:    mov s0, v0.s[1]
1559; CHECK-SD-NEXT:    bl cosf
1560; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1561; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1562; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1563; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
1564; CHECK-SD-NEXT:    bl cosf
1565; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1566; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1567; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
1568; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
1569; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1570; CHECK-SD-NEXT:    add sp, sp, #48
1571; CHECK-SD-NEXT:    ret
1572;
1573; CHECK-GI-LABEL: cos_v2f32:
1574; CHECK-GI:       // %bb.0: // %entry
1575; CHECK-GI-NEXT:    sub sp, sp, #32
1576; CHECK-GI-NEXT:    str d8, [sp, #16] // 8-byte Folded Spill
1577; CHECK-GI-NEXT:    str x30, [sp, #24] // 8-byte Folded Spill
1578; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
1579; CHECK-GI-NEXT:    .cfi_offset w30, -8
1580; CHECK-GI-NEXT:    .cfi_offset b8, -16
1581; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1582; CHECK-GI-NEXT:    mov s8, v0.s[1]
1583; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
1584; CHECK-GI-NEXT:    bl cosf
1585; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1586; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1587; CHECK-GI-NEXT:    fmov s0, s8
1588; CHECK-GI-NEXT:    bl cosf
1589; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1590; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1591; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
1592; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
1593; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
1594; CHECK-GI-NEXT:    fmov d0, d1
1595; CHECK-GI-NEXT:    add sp, sp, #32
1596; CHECK-GI-NEXT:    ret
1597entry:
1598  %c = call <2 x float> @llvm.cos.v2f32(<2 x float> %a)
1599  ret <2 x float> %c
1600}
1601
1602define <3 x float> @cos_v3f32(<3 x float> %a) {
1603; CHECK-SD-LABEL: cos_v3f32:
1604; CHECK-SD:       // %bb.0: // %entry
1605; CHECK-SD-NEXT:    sub sp, sp, #48
1606; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
1607; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
1608; CHECK-SD-NEXT:    .cfi_offset w30, -16
1609; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1610; CHECK-SD-NEXT:    mov s0, v0.s[1]
1611; CHECK-SD-NEXT:    bl cosf
1612; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1613; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1614; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1615; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
1616; CHECK-SD-NEXT:    bl cosf
1617; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1618; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1619; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
1620; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1621; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1622; CHECK-SD-NEXT:    mov s0, v0.s[2]
1623; CHECK-SD-NEXT:    bl cosf
1624; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1625; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1626; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
1627; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
1628; CHECK-SD-NEXT:    mov v0.16b, v1.16b
1629; CHECK-SD-NEXT:    add sp, sp, #48
1630; CHECK-SD-NEXT:    ret
1631;
1632; CHECK-GI-LABEL: cos_v3f32:
1633; CHECK-GI:       // %bb.0: // %entry
1634; CHECK-GI-NEXT:    sub sp, sp, #64
1635; CHECK-GI-NEXT:    stp d9, d8, [sp, #32] // 16-byte Folded Spill
1636; CHECK-GI-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
1637; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
1638; CHECK-GI-NEXT:    .cfi_offset w30, -16
1639; CHECK-GI-NEXT:    .cfi_offset b8, -24
1640; CHECK-GI-NEXT:    .cfi_offset b9, -32
1641; CHECK-GI-NEXT:    mov s8, v0.s[1]
1642; CHECK-GI-NEXT:    mov s9, v0.s[2]
1643; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
1644; CHECK-GI-NEXT:    bl cosf
1645; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1646; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1647; CHECK-GI-NEXT:    fmov s0, s8
1648; CHECK-GI-NEXT:    bl cosf
1649; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1650; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1651; CHECK-GI-NEXT:    fmov s0, s9
1652; CHECK-GI-NEXT:    bl cosf
1653; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
1654; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1655; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1656; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
1657; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
1658; CHECK-GI-NEXT:    mov v1.s[2], v0.s[0]
1659; CHECK-GI-NEXT:    mov v0.16b, v1.16b
1660; CHECK-GI-NEXT:    add sp, sp, #64
1661; CHECK-GI-NEXT:    ret
1662entry:
1663  %c = call <3 x float> @llvm.cos.v3f32(<3 x float> %a)
1664  ret <3 x float> %c
1665}
1666
1667define <4 x float> @cos_v4f32(<4 x float> %a) {
1668; CHECK-SD-LABEL: cos_v4f32:
1669; CHECK-SD:       // %bb.0: // %entry
1670; CHECK-SD-NEXT:    sub sp, sp, #48
1671; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
1672; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
1673; CHECK-SD-NEXT:    .cfi_offset w30, -16
1674; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1675; CHECK-SD-NEXT:    mov s0, v0.s[1]
1676; CHECK-SD-NEXT:    bl cosf
1677; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1678; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1679; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1680; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
1681; CHECK-SD-NEXT:    bl cosf
1682; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1683; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1684; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
1685; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1686; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1687; CHECK-SD-NEXT:    mov s0, v0.s[2]
1688; CHECK-SD-NEXT:    bl cosf
1689; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1690; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1691; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
1692; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1693; CHECK-SD-NEXT:    mov s0, v0.s[3]
1694; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
1695; CHECK-SD-NEXT:    bl cosf
1696; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1697; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1698; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
1699; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
1700; CHECK-SD-NEXT:    mov v0.16b, v1.16b
1701; CHECK-SD-NEXT:    add sp, sp, #48
1702; CHECK-SD-NEXT:    ret
1703;
1704; CHECK-GI-LABEL: cos_v4f32:
1705; CHECK-GI:       // %bb.0: // %entry
1706; CHECK-GI-NEXT:    sub sp, sp, #80
1707; CHECK-GI-NEXT:    str d10, [sp, #48] // 8-byte Folded Spill
1708; CHECK-GI-NEXT:    stp d9, d8, [sp, #56] // 16-byte Folded Spill
1709; CHECK-GI-NEXT:    str x30, [sp, #72] // 8-byte Folded Spill
1710; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
1711; CHECK-GI-NEXT:    .cfi_offset w30, -8
1712; CHECK-GI-NEXT:    .cfi_offset b8, -16
1713; CHECK-GI-NEXT:    .cfi_offset b9, -24
1714; CHECK-GI-NEXT:    .cfi_offset b10, -32
1715; CHECK-GI-NEXT:    mov s8, v0.s[1]
1716; CHECK-GI-NEXT:    mov s9, v0.s[2]
1717; CHECK-GI-NEXT:    mov s10, v0.s[3]
1718; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
1719; CHECK-GI-NEXT:    bl cosf
1720; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1721; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1722; CHECK-GI-NEXT:    fmov s0, s8
1723; CHECK-GI-NEXT:    bl cosf
1724; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1725; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1726; CHECK-GI-NEXT:    fmov s0, s9
1727; CHECK-GI-NEXT:    bl cosf
1728; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1729; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1730; CHECK-GI-NEXT:    fmov s0, s10
1731; CHECK-GI-NEXT:    bl cosf
1732; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
1733; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1734; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
1735; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
1736; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
1737; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
1738; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
1739; CHECK-GI-NEXT:    mov v1.s[2], v2.s[0]
1740; CHECK-GI-NEXT:    mov v1.s[3], v0.s[0]
1741; CHECK-GI-NEXT:    mov v0.16b, v1.16b
1742; CHECK-GI-NEXT:    add sp, sp, #80
1743; CHECK-GI-NEXT:    ret
1744entry:
1745  %c = call <4 x float> @llvm.cos.v4f32(<4 x float> %a)
1746  ret <4 x float> %c
1747}
1748
1749define <8 x float> @cos_v8f32(<8 x float> %a) {
1750; CHECK-SD-LABEL: cos_v8f32:
1751; CHECK-SD:       // %bb.0: // %entry
1752; CHECK-SD-NEXT:    sub sp, sp, #64
1753; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
1754; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
1755; CHECK-SD-NEXT:    .cfi_offset w30, -16
1756; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
1757; CHECK-SD-NEXT:    mov s0, v0.s[1]
1758; CHECK-SD-NEXT:    bl cosf
1759; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1760; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1761; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1762; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
1763; CHECK-SD-NEXT:    bl cosf
1764; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1765; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1766; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
1767; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1768; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1769; CHECK-SD-NEXT:    mov s0, v0.s[2]
1770; CHECK-SD-NEXT:    bl cosf
1771; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1772; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1773; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
1774; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1775; CHECK-SD-NEXT:    mov s0, v0.s[3]
1776; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1777; CHECK-SD-NEXT:    bl cosf
1778; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1779; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1780; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
1781; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1782; CHECK-SD-NEXT:    mov s0, v0.s[1]
1783; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1784; CHECK-SD-NEXT:    bl cosf
1785; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1786; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1787; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1788; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
1789; CHECK-SD-NEXT:    bl cosf
1790; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1791; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1792; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
1793; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1794; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1795; CHECK-SD-NEXT:    mov s0, v0.s[2]
1796; CHECK-SD-NEXT:    bl cosf
1797; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1798; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
1799; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
1800; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1801; CHECK-SD-NEXT:    mov s0, v0.s[3]
1802; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
1803; CHECK-SD-NEXT:    bl cosf
1804; CHECK-SD-NEXT:    fmov s2, s0
1805; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1806; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
1807; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1808; CHECK-SD-NEXT:    mov v1.s[3], v2.s[0]
1809; CHECK-SD-NEXT:    add sp, sp, #64
1810; CHECK-SD-NEXT:    ret
1811;
1812; CHECK-GI-LABEL: cos_v8f32:
1813; CHECK-GI:       // %bb.0: // %entry
1814; CHECK-GI-NEXT:    sub sp, sp, #176
1815; CHECK-GI-NEXT:    stp d13, d12, [sp, #112] // 16-byte Folded Spill
1816; CHECK-GI-NEXT:    stp d11, d10, [sp, #128] // 16-byte Folded Spill
1817; CHECK-GI-NEXT:    stp d9, d8, [sp, #144] // 16-byte Folded Spill
1818; CHECK-GI-NEXT:    str x30, [sp, #160] // 8-byte Folded Spill
1819; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
1820; CHECK-GI-NEXT:    .cfi_offset w30, -16
1821; CHECK-GI-NEXT:    .cfi_offset b8, -24
1822; CHECK-GI-NEXT:    .cfi_offset b9, -32
1823; CHECK-GI-NEXT:    .cfi_offset b10, -40
1824; CHECK-GI-NEXT:    .cfi_offset b11, -48
1825; CHECK-GI-NEXT:    .cfi_offset b12, -56
1826; CHECK-GI-NEXT:    .cfi_offset b13, -64
1827; CHECK-GI-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
1828; CHECK-GI-NEXT:    mov s8, v0.s[1]
1829; CHECK-GI-NEXT:    mov s9, v0.s[2]
1830; CHECK-GI-NEXT:    mov s10, v0.s[3]
1831; CHECK-GI-NEXT:    mov s11, v1.s[1]
1832; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
1833; CHECK-GI-NEXT:    mov s12, v1.s[2]
1834; CHECK-GI-NEXT:    mov s13, v1.s[3]
1835; CHECK-GI-NEXT:    bl cosf
1836; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1837; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
1838; CHECK-GI-NEXT:    fmov s0, s8
1839; CHECK-GI-NEXT:    bl cosf
1840; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1841; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
1842; CHECK-GI-NEXT:    fmov s0, s9
1843; CHECK-GI-NEXT:    bl cosf
1844; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1845; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
1846; CHECK-GI-NEXT:    fmov s0, s10
1847; CHECK-GI-NEXT:    bl cosf
1848; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1849; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
1850; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
1851; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
1852; CHECK-GI-NEXT:    bl cosf
1853; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1854; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1855; CHECK-GI-NEXT:    fmov s0, s11
1856; CHECK-GI-NEXT:    bl cosf
1857; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1858; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1859; CHECK-GI-NEXT:    fmov s0, s12
1860; CHECK-GI-NEXT:    bl cosf
1861; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1862; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
1863; CHECK-GI-NEXT:    fmov s0, s13
1864; CHECK-GI-NEXT:    bl cosf
1865; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
1866; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
1867; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
1868; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
1869; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
1870; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
1871; CHECK-GI-NEXT:    ldp q2, q3, [sp, #16] // 32-byte Folded Reload
1872; CHECK-GI-NEXT:    ldp d13, d12, [sp, #112] // 16-byte Folded Reload
1873; CHECK-GI-NEXT:    mov v3.s[1], v2.s[0]
1874; CHECK-GI-NEXT:    ldr q2, [sp, #96] // 16-byte Folded Reload
1875; CHECK-GI-NEXT:    mov v1.s[2], v2.s[0]
1876; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
1877; CHECK-GI-NEXT:    mov v3.s[2], v2.s[0]
1878; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
1879; CHECK-GI-NEXT:    mov v1.s[3], v2.s[0]
1880; CHECK-GI-NEXT:    mov v3.s[3], v0.s[0]
1881; CHECK-GI-NEXT:    mov v2.16b, v1.16b
1882; CHECK-GI-NEXT:    mov v1.16b, v3.16b
1883; CHECK-GI-NEXT:    mov v0.16b, v2.16b
1884; CHECK-GI-NEXT:    add sp, sp, #176
1885; CHECK-GI-NEXT:    ret
1886entry:
1887  %c = call <8 x float> @llvm.cos.v8f32(<8 x float> %a)
1888  ret <8 x float> %c
1889}
1890
1891define <7 x half> @cos_v7f16(<7 x half> %a) {
1892; CHECK-SD-LABEL: cos_v7f16:
1893; CHECK-SD:       // %bb.0: // %entry
1894; CHECK-SD-NEXT:    sub sp, sp, #48
1895; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
1896; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
1897; CHECK-SD-NEXT:    .cfi_offset w30, -16
1898; CHECK-SD-NEXT:    mov h1, v0.h[1]
1899; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
1900; CHECK-SD-NEXT:    fcvt s0, h1
1901; CHECK-SD-NEXT:    bl cosf
1902; CHECK-SD-NEXT:    fcvt h0, s0
1903; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1904; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1905; CHECK-SD-NEXT:    fcvt s0, h0
1906; CHECK-SD-NEXT:    bl cosf
1907; CHECK-SD-NEXT:    fcvt h0, s0
1908; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1909; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
1910; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
1911; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1912; CHECK-SD-NEXT:    mov h0, v0.h[2]
1913; CHECK-SD-NEXT:    fcvt s0, h0
1914; CHECK-SD-NEXT:    bl cosf
1915; CHECK-SD-NEXT:    fcvt h0, s0
1916; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1917; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
1918; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1919; CHECK-SD-NEXT:    mov h0, v0.h[3]
1920; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1921; CHECK-SD-NEXT:    fcvt s0, h0
1922; CHECK-SD-NEXT:    bl cosf
1923; CHECK-SD-NEXT:    fcvt h0, s0
1924; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1925; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
1926; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1927; CHECK-SD-NEXT:    mov h0, v0.h[4]
1928; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1929; CHECK-SD-NEXT:    fcvt s0, h0
1930; CHECK-SD-NEXT:    bl cosf
1931; CHECK-SD-NEXT:    fcvt h0, s0
1932; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1933; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
1934; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1935; CHECK-SD-NEXT:    mov h0, v0.h[5]
1936; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1937; CHECK-SD-NEXT:    fcvt s0, h0
1938; CHECK-SD-NEXT:    bl cosf
1939; CHECK-SD-NEXT:    fcvt h0, s0
1940; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1941; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
1942; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1943; CHECK-SD-NEXT:    mov h0, v0.h[6]
1944; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1945; CHECK-SD-NEXT:    fcvt s0, h0
1946; CHECK-SD-NEXT:    bl cosf
1947; CHECK-SD-NEXT:    fcvt h0, s0
1948; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
1949; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
1950; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
1951; CHECK-SD-NEXT:    mov h0, v0.h[7]
1952; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
1953; CHECK-SD-NEXT:    fcvt s0, h0
1954; CHECK-SD-NEXT:    bl cosf
1955; CHECK-SD-NEXT:    fcvt h1, s0
1956; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
1957; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
1958; CHECK-SD-NEXT:    mov v0.h[7], v1.h[0]
1959; CHECK-SD-NEXT:    add sp, sp, #48
1960; CHECK-SD-NEXT:    ret
1961;
1962; CHECK-GI-LABEL: cos_v7f16:
1963; CHECK-GI:       // %bb.0: // %entry
1964; CHECK-GI-NEXT:    sub sp, sp, #160
1965; CHECK-GI-NEXT:    stp d13, d12, [sp, #96] // 16-byte Folded Spill
1966; CHECK-GI-NEXT:    stp d11, d10, [sp, #112] // 16-byte Folded Spill
1967; CHECK-GI-NEXT:    stp d9, d8, [sp, #128] // 16-byte Folded Spill
1968; CHECK-GI-NEXT:    str x30, [sp, #144] // 8-byte Folded Spill
1969; CHECK-GI-NEXT:    .cfi_def_cfa_offset 160
1970; CHECK-GI-NEXT:    .cfi_offset w30, -16
1971; CHECK-GI-NEXT:    .cfi_offset b8, -24
1972; CHECK-GI-NEXT:    .cfi_offset b9, -32
1973; CHECK-GI-NEXT:    .cfi_offset b10, -40
1974; CHECK-GI-NEXT:    .cfi_offset b11, -48
1975; CHECK-GI-NEXT:    .cfi_offset b12, -56
1976; CHECK-GI-NEXT:    .cfi_offset b13, -64
1977; CHECK-GI-NEXT:    mov h8, v0.h[1]
1978; CHECK-GI-NEXT:    mov h9, v0.h[2]
1979; CHECK-GI-NEXT:    mov h10, v0.h[3]
1980; CHECK-GI-NEXT:    mov h11, v0.h[4]
1981; CHECK-GI-NEXT:    mov h12, v0.h[5]
1982; CHECK-GI-NEXT:    mov h13, v0.h[6]
1983; CHECK-GI-NEXT:    fcvt s0, h0
1984; CHECK-GI-NEXT:    bl cosf
1985; CHECK-GI-NEXT:    fcvt s1, h8
1986; CHECK-GI-NEXT:    fcvt h0, s0
1987; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
1988; CHECK-GI-NEXT:    fmov s0, s1
1989; CHECK-GI-NEXT:    bl cosf
1990; CHECK-GI-NEXT:    fcvt s1, h9
1991; CHECK-GI-NEXT:    fcvt h0, s0
1992; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
1993; CHECK-GI-NEXT:    fmov s0, s1
1994; CHECK-GI-NEXT:    bl cosf
1995; CHECK-GI-NEXT:    fcvt s1, h10
1996; CHECK-GI-NEXT:    fcvt h0, s0
1997; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
1998; CHECK-GI-NEXT:    fmov s0, s1
1999; CHECK-GI-NEXT:    bl cosf
2000; CHECK-GI-NEXT:    fcvt s1, h11
2001; CHECK-GI-NEXT:    fcvt h0, s0
2002; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
2003; CHECK-GI-NEXT:    fmov s0, s1
2004; CHECK-GI-NEXT:    bl cosf
2005; CHECK-GI-NEXT:    fcvt s1, h12
2006; CHECK-GI-NEXT:    fcvt h0, s0
2007; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2008; CHECK-GI-NEXT:    fmov s0, s1
2009; CHECK-GI-NEXT:    bl cosf
2010; CHECK-GI-NEXT:    fcvt s1, h13
2011; CHECK-GI-NEXT:    fcvt h0, s0
2012; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
2013; CHECK-GI-NEXT:    fmov s0, s1
2014; CHECK-GI-NEXT:    bl cosf
2015; CHECK-GI-NEXT:    ldp q3, q2, [sp, #48] // 32-byte Folded Reload
2016; CHECK-GI-NEXT:    fcvt h0, s0
2017; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
2018; CHECK-GI-NEXT:    ldp d9, d8, [sp, #128] // 16-byte Folded Reload
2019; CHECK-GI-NEXT:    ldp d11, d10, [sp, #112] // 16-byte Folded Reload
2020; CHECK-GI-NEXT:    ldr x30, [sp, #144] // 8-byte Folded Reload
2021; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
2022; CHECK-GI-NEXT:    ldp d13, d12, [sp, #96] // 16-byte Folded Reload
2023; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
2024; CHECK-GI-NEXT:    ldp q2, q3, [sp, #16] // 32-byte Folded Reload
2025; CHECK-GI-NEXT:    mov v1.h[3], v3.h[0]
2026; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
2027; CHECK-GI-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
2028; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
2029; CHECK-GI-NEXT:    mov v1.h[6], v0.h[0]
2030; CHECK-GI-NEXT:    mov v0.16b, v1.16b
2031; CHECK-GI-NEXT:    add sp, sp, #160
2032; CHECK-GI-NEXT:    ret
2033entry:
2034  %c = call <7 x half> @llvm.cos.v7f16(<7 x half> %a)
2035  ret <7 x half> %c
2036}
2037
2038define <4 x half> @cos_v4f16(<4 x half> %a) {
2039; CHECK-SD-LABEL: cos_v4f16:
2040; CHECK-SD:       // %bb.0: // %entry
2041; CHECK-SD-NEXT:    sub sp, sp, #48
2042; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
2043; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
2044; CHECK-SD-NEXT:    .cfi_offset w30, -16
2045; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
2046; CHECK-SD-NEXT:    mov h1, v0.h[1]
2047; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2048; CHECK-SD-NEXT:    fcvt s0, h1
2049; CHECK-SD-NEXT:    bl cosf
2050; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2051; CHECK-SD-NEXT:    fcvt h0, s0
2052; CHECK-SD-NEXT:    fcvt s1, h1
2053; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
2054; CHECK-SD-NEXT:    fmov s0, s1
2055; CHECK-SD-NEXT:    bl cosf
2056; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2057; CHECK-SD-NEXT:    fcvt h2, s0
2058; CHECK-SD-NEXT:    mov h1, v1.h[2]
2059; CHECK-SD-NEXT:    fcvt s0, h1
2060; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
2061; CHECK-SD-NEXT:    mov v2.h[1], v1.h[0]
2062; CHECK-SD-NEXT:    str q2, [sp] // 16-byte Folded Spill
2063; CHECK-SD-NEXT:    bl cosf
2064; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2065; CHECK-SD-NEXT:    fcvt h2, s0
2066; CHECK-SD-NEXT:    mov h1, v1.h[3]
2067; CHECK-SD-NEXT:    fcvt s0, h1
2068; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
2069; CHECK-SD-NEXT:    mov v1.h[2], v2.h[0]
2070; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
2071; CHECK-SD-NEXT:    bl cosf
2072; CHECK-SD-NEXT:    fcvt h1, s0
2073; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2074; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
2075; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
2076; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2077; CHECK-SD-NEXT:    add sp, sp, #48
2078; CHECK-SD-NEXT:    ret
2079;
2080; CHECK-GI-LABEL: cos_v4f16:
2081; CHECK-GI:       // %bb.0: // %entry
2082; CHECK-GI-NEXT:    sub sp, sp, #80
2083; CHECK-GI-NEXT:    str d10, [sp, #48] // 8-byte Folded Spill
2084; CHECK-GI-NEXT:    stp d9, d8, [sp, #56] // 16-byte Folded Spill
2085; CHECK-GI-NEXT:    str x30, [sp, #72] // 8-byte Folded Spill
2086; CHECK-GI-NEXT:    .cfi_def_cfa_offset 80
2087; CHECK-GI-NEXT:    .cfi_offset w30, -8
2088; CHECK-GI-NEXT:    .cfi_offset b8, -16
2089; CHECK-GI-NEXT:    .cfi_offset b9, -24
2090; CHECK-GI-NEXT:    .cfi_offset b10, -32
2091; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
2092; CHECK-GI-NEXT:    mov h8, v0.h[1]
2093; CHECK-GI-NEXT:    mov h9, v0.h[2]
2094; CHECK-GI-NEXT:    mov h10, v0.h[3]
2095; CHECK-GI-NEXT:    fcvt s0, h0
2096; CHECK-GI-NEXT:    bl cosf
2097; CHECK-GI-NEXT:    fcvt s1, h8
2098; CHECK-GI-NEXT:    fcvt h0, s0
2099; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
2100; CHECK-GI-NEXT:    fmov s0, s1
2101; CHECK-GI-NEXT:    bl cosf
2102; CHECK-GI-NEXT:    fcvt s1, h9
2103; CHECK-GI-NEXT:    fcvt h0, s0
2104; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
2105; CHECK-GI-NEXT:    fmov s0, s1
2106; CHECK-GI-NEXT:    bl cosf
2107; CHECK-GI-NEXT:    fcvt s1, h10
2108; CHECK-GI-NEXT:    fcvt h0, s0
2109; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2110; CHECK-GI-NEXT:    fmov s0, s1
2111; CHECK-GI-NEXT:    bl cosf
2112; CHECK-GI-NEXT:    ldp q3, q2, [sp] // 32-byte Folded Reload
2113; CHECK-GI-NEXT:    fcvt h0, s0
2114; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2115; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
2116; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
2117; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
2118; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
2119; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
2120; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
2121; CHECK-GI-NEXT:    mov v0.16b, v1.16b
2122; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
2123; CHECK-GI-NEXT:    add sp, sp, #80
2124; CHECK-GI-NEXT:    ret
2125entry:
2126  %c = call <4 x half> @llvm.cos.v4f16(<4 x half> %a)
2127  ret <4 x half> %c
2128}
2129
2130define <8 x half> @cos_v8f16(<8 x half> %a) {
2131; CHECK-SD-LABEL: cos_v8f16:
2132; CHECK-SD:       // %bb.0: // %entry
2133; CHECK-SD-NEXT:    sub sp, sp, #48
2134; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
2135; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
2136; CHECK-SD-NEXT:    .cfi_offset w30, -16
2137; CHECK-SD-NEXT:    mov h1, v0.h[1]
2138; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
2139; CHECK-SD-NEXT:    fcvt s0, h1
2140; CHECK-SD-NEXT:    bl cosf
2141; CHECK-SD-NEXT:    fcvt h0, s0
2142; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2143; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2144; CHECK-SD-NEXT:    fcvt s0, h0
2145; CHECK-SD-NEXT:    bl cosf
2146; CHECK-SD-NEXT:    fcvt h0, s0
2147; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2148; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
2149; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2150; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2151; CHECK-SD-NEXT:    mov h0, v0.h[2]
2152; CHECK-SD-NEXT:    fcvt s0, h0
2153; CHECK-SD-NEXT:    bl cosf
2154; CHECK-SD-NEXT:    fcvt h0, s0
2155; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2156; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
2157; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2158; CHECK-SD-NEXT:    mov h0, v0.h[3]
2159; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2160; CHECK-SD-NEXT:    fcvt s0, h0
2161; CHECK-SD-NEXT:    bl cosf
2162; CHECK-SD-NEXT:    fcvt h0, s0
2163; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2164; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
2165; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2166; CHECK-SD-NEXT:    mov h0, v0.h[4]
2167; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2168; CHECK-SD-NEXT:    fcvt s0, h0
2169; CHECK-SD-NEXT:    bl cosf
2170; CHECK-SD-NEXT:    fcvt h0, s0
2171; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2172; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
2173; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2174; CHECK-SD-NEXT:    mov h0, v0.h[5]
2175; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2176; CHECK-SD-NEXT:    fcvt s0, h0
2177; CHECK-SD-NEXT:    bl cosf
2178; CHECK-SD-NEXT:    fcvt h0, s0
2179; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2180; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
2181; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2182; CHECK-SD-NEXT:    mov h0, v0.h[6]
2183; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2184; CHECK-SD-NEXT:    fcvt s0, h0
2185; CHECK-SD-NEXT:    bl cosf
2186; CHECK-SD-NEXT:    fcvt h0, s0
2187; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2188; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
2189; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2190; CHECK-SD-NEXT:    mov h0, v0.h[7]
2191; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2192; CHECK-SD-NEXT:    fcvt s0, h0
2193; CHECK-SD-NEXT:    bl cosf
2194; CHECK-SD-NEXT:    fcvt h1, s0
2195; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2196; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
2197; CHECK-SD-NEXT:    mov v0.h[7], v1.h[0]
2198; CHECK-SD-NEXT:    add sp, sp, #48
2199; CHECK-SD-NEXT:    ret
2200;
2201; CHECK-GI-LABEL: cos_v8f16:
2202; CHECK-GI:       // %bb.0: // %entry
2203; CHECK-GI-NEXT:    sub sp, sp, #176
2204; CHECK-GI-NEXT:    str d14, [sp, #112] // 8-byte Folded Spill
2205; CHECK-GI-NEXT:    stp d13, d12, [sp, #120] // 16-byte Folded Spill
2206; CHECK-GI-NEXT:    stp d11, d10, [sp, #136] // 16-byte Folded Spill
2207; CHECK-GI-NEXT:    stp d9, d8, [sp, #152] // 16-byte Folded Spill
2208; CHECK-GI-NEXT:    str x30, [sp, #168] // 8-byte Folded Spill
2209; CHECK-GI-NEXT:    .cfi_def_cfa_offset 176
2210; CHECK-GI-NEXT:    .cfi_offset w30, -8
2211; CHECK-GI-NEXT:    .cfi_offset b8, -16
2212; CHECK-GI-NEXT:    .cfi_offset b9, -24
2213; CHECK-GI-NEXT:    .cfi_offset b10, -32
2214; CHECK-GI-NEXT:    .cfi_offset b11, -40
2215; CHECK-GI-NEXT:    .cfi_offset b12, -48
2216; CHECK-GI-NEXT:    .cfi_offset b13, -56
2217; CHECK-GI-NEXT:    .cfi_offset b14, -64
2218; CHECK-GI-NEXT:    mov h8, v0.h[1]
2219; CHECK-GI-NEXT:    mov h9, v0.h[2]
2220; CHECK-GI-NEXT:    mov h10, v0.h[3]
2221; CHECK-GI-NEXT:    mov h11, v0.h[4]
2222; CHECK-GI-NEXT:    mov h12, v0.h[5]
2223; CHECK-GI-NEXT:    mov h13, v0.h[6]
2224; CHECK-GI-NEXT:    mov h14, v0.h[7]
2225; CHECK-GI-NEXT:    fcvt s0, h0
2226; CHECK-GI-NEXT:    bl cosf
2227; CHECK-GI-NEXT:    fcvt s1, h8
2228; CHECK-GI-NEXT:    fcvt h0, s0
2229; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
2230; CHECK-GI-NEXT:    fmov s0, s1
2231; CHECK-GI-NEXT:    bl cosf
2232; CHECK-GI-NEXT:    fcvt s1, h9
2233; CHECK-GI-NEXT:    fcvt h0, s0
2234; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
2235; CHECK-GI-NEXT:    fmov s0, s1
2236; CHECK-GI-NEXT:    bl cosf
2237; CHECK-GI-NEXT:    fcvt s1, h10
2238; CHECK-GI-NEXT:    fcvt h0, s0
2239; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
2240; CHECK-GI-NEXT:    fmov s0, s1
2241; CHECK-GI-NEXT:    bl cosf
2242; CHECK-GI-NEXT:    fcvt s1, h11
2243; CHECK-GI-NEXT:    fcvt h0, s0
2244; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
2245; CHECK-GI-NEXT:    fmov s0, s1
2246; CHECK-GI-NEXT:    bl cosf
2247; CHECK-GI-NEXT:    fcvt s1, h12
2248; CHECK-GI-NEXT:    fcvt h0, s0
2249; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
2250; CHECK-GI-NEXT:    fmov s0, s1
2251; CHECK-GI-NEXT:    bl cosf
2252; CHECK-GI-NEXT:    fcvt s1, h13
2253; CHECK-GI-NEXT:    fcvt h0, s0
2254; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2255; CHECK-GI-NEXT:    fmov s0, s1
2256; CHECK-GI-NEXT:    bl cosf
2257; CHECK-GI-NEXT:    fcvt s1, h14
2258; CHECK-GI-NEXT:    fcvt h0, s0
2259; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
2260; CHECK-GI-NEXT:    fmov s0, s1
2261; CHECK-GI-NEXT:    bl cosf
2262; CHECK-GI-NEXT:    ldp q3, q2, [sp, #64] // 32-byte Folded Reload
2263; CHECK-GI-NEXT:    fcvt h0, s0
2264; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
2265; CHECK-GI-NEXT:    ldp d9, d8, [sp, #152] // 16-byte Folded Reload
2266; CHECK-GI-NEXT:    ldp d11, d10, [sp, #136] // 16-byte Folded Reload
2267; CHECK-GI-NEXT:    ldr x30, [sp, #168] // 8-byte Folded Reload
2268; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
2269; CHECK-GI-NEXT:    ldr d14, [sp, #112] // 8-byte Folded Reload
2270; CHECK-GI-NEXT:    ldp d13, d12, [sp, #120] // 16-byte Folded Reload
2271; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
2272; CHECK-GI-NEXT:    ldp q2, q3, [sp, #32] // 32-byte Folded Reload
2273; CHECK-GI-NEXT:    mov v1.h[3], v3.h[0]
2274; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
2275; CHECK-GI-NEXT:    ldp q2, q3, [sp] // 32-byte Folded Reload
2276; CHECK-GI-NEXT:    mov v1.h[5], v3.h[0]
2277; CHECK-GI-NEXT:    mov v1.h[6], v2.h[0]
2278; CHECK-GI-NEXT:    mov v1.h[7], v0.h[0]
2279; CHECK-GI-NEXT:    mov v0.16b, v1.16b
2280; CHECK-GI-NEXT:    add sp, sp, #176
2281; CHECK-GI-NEXT:    ret
2282entry:
2283  %c = call <8 x half> @llvm.cos.v8f16(<8 x half> %a)
2284  ret <8 x half> %c
2285}
2286
2287define <16 x half> @cos_v16f16(<16 x half> %a) {
2288; CHECK-SD-LABEL: cos_v16f16:
2289; CHECK-SD:       // %bb.0: // %entry
2290; CHECK-SD-NEXT:    sub sp, sp, #64
2291; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
2292; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
2293; CHECK-SD-NEXT:    .cfi_offset w30, -16
2294; CHECK-SD-NEXT:    stp q1, q0, [sp] // 32-byte Folded Spill
2295; CHECK-SD-NEXT:    mov h1, v0.h[1]
2296; CHECK-SD-NEXT:    fcvt s0, h1
2297; CHECK-SD-NEXT:    bl cosf
2298; CHECK-SD-NEXT:    fcvt h0, s0
2299; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
2300; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2301; CHECK-SD-NEXT:    fcvt s0, h0
2302; CHECK-SD-NEXT:    bl cosf
2303; CHECK-SD-NEXT:    fcvt h0, s0
2304; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2305; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
2306; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
2307; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2308; CHECK-SD-NEXT:    mov h0, v0.h[2]
2309; CHECK-SD-NEXT:    fcvt s0, h0
2310; CHECK-SD-NEXT:    bl cosf
2311; CHECK-SD-NEXT:    fcvt h0, s0
2312; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2313; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
2314; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2315; CHECK-SD-NEXT:    mov h0, v0.h[3]
2316; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
2317; CHECK-SD-NEXT:    fcvt s0, h0
2318; CHECK-SD-NEXT:    bl cosf
2319; CHECK-SD-NEXT:    fcvt h0, s0
2320; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2321; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
2322; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2323; CHECK-SD-NEXT:    mov h0, v0.h[4]
2324; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
2325; CHECK-SD-NEXT:    fcvt s0, h0
2326; CHECK-SD-NEXT:    bl cosf
2327; CHECK-SD-NEXT:    fcvt h0, s0
2328; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2329; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
2330; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2331; CHECK-SD-NEXT:    mov h0, v0.h[5]
2332; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
2333; CHECK-SD-NEXT:    fcvt s0, h0
2334; CHECK-SD-NEXT:    bl cosf
2335; CHECK-SD-NEXT:    fcvt h0, s0
2336; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2337; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
2338; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2339; CHECK-SD-NEXT:    mov h0, v0.h[6]
2340; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
2341; CHECK-SD-NEXT:    fcvt s0, h0
2342; CHECK-SD-NEXT:    bl cosf
2343; CHECK-SD-NEXT:    fcvt h0, s0
2344; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2345; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
2346; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2347; CHECK-SD-NEXT:    mov h0, v0.h[7]
2348; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
2349; CHECK-SD-NEXT:    fcvt s0, h0
2350; CHECK-SD-NEXT:    bl cosf
2351; CHECK-SD-NEXT:    fcvt h0, s0
2352; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
2353; CHECK-SD-NEXT:    mov v1.h[7], v0.h[0]
2354; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2355; CHECK-SD-NEXT:    mov h0, v0.h[1]
2356; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
2357; CHECK-SD-NEXT:    fcvt s0, h0
2358; CHECK-SD-NEXT:    bl cosf
2359; CHECK-SD-NEXT:    fcvt h0, s0
2360; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2361; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2362; CHECK-SD-NEXT:    fcvt s0, h0
2363; CHECK-SD-NEXT:    bl cosf
2364; CHECK-SD-NEXT:    fcvt h0, s0
2365; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2366; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
2367; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2368; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2369; CHECK-SD-NEXT:    mov h0, v0.h[2]
2370; CHECK-SD-NEXT:    fcvt s0, h0
2371; CHECK-SD-NEXT:    bl cosf
2372; CHECK-SD-NEXT:    fcvt h0, s0
2373; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2374; CHECK-SD-NEXT:    mov v1.h[2], v0.h[0]
2375; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2376; CHECK-SD-NEXT:    mov h0, v0.h[3]
2377; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2378; CHECK-SD-NEXT:    fcvt s0, h0
2379; CHECK-SD-NEXT:    bl cosf
2380; CHECK-SD-NEXT:    fcvt h0, s0
2381; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2382; CHECK-SD-NEXT:    mov v1.h[3], v0.h[0]
2383; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2384; CHECK-SD-NEXT:    mov h0, v0.h[4]
2385; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2386; CHECK-SD-NEXT:    fcvt s0, h0
2387; CHECK-SD-NEXT:    bl cosf
2388; CHECK-SD-NEXT:    fcvt h0, s0
2389; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2390; CHECK-SD-NEXT:    mov v1.h[4], v0.h[0]
2391; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2392; CHECK-SD-NEXT:    mov h0, v0.h[5]
2393; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2394; CHECK-SD-NEXT:    fcvt s0, h0
2395; CHECK-SD-NEXT:    bl cosf
2396; CHECK-SD-NEXT:    fcvt h0, s0
2397; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2398; CHECK-SD-NEXT:    mov v1.h[5], v0.h[0]
2399; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2400; CHECK-SD-NEXT:    mov h0, v0.h[6]
2401; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2402; CHECK-SD-NEXT:    fcvt s0, h0
2403; CHECK-SD-NEXT:    bl cosf
2404; CHECK-SD-NEXT:    fcvt h0, s0
2405; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
2406; CHECK-SD-NEXT:    mov v1.h[6], v0.h[0]
2407; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2408; CHECK-SD-NEXT:    mov h0, v0.h[7]
2409; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2410; CHECK-SD-NEXT:    fcvt s0, h0
2411; CHECK-SD-NEXT:    bl cosf
2412; CHECK-SD-NEXT:    fmov s1, s0
2413; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
2414; CHECK-SD-NEXT:    fcvt h2, s1
2415; CHECK-SD-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
2416; CHECK-SD-NEXT:    mov v1.h[7], v2.h[0]
2417; CHECK-SD-NEXT:    add sp, sp, #64
2418; CHECK-SD-NEXT:    ret
2419;
2420; CHECK-GI-LABEL: cos_v16f16:
2421; CHECK-GI:       // %bb.0: // %entry
2422; CHECK-GI-NEXT:    sub sp, sp, #320
2423; CHECK-GI-NEXT:    stp d15, d14, [sp, #240] // 16-byte Folded Spill
2424; CHECK-GI-NEXT:    stp d13, d12, [sp, #256] // 16-byte Folded Spill
2425; CHECK-GI-NEXT:    stp d11, d10, [sp, #272] // 16-byte Folded Spill
2426; CHECK-GI-NEXT:    stp d9, d8, [sp, #288] // 16-byte Folded Spill
2427; CHECK-GI-NEXT:    stp x29, x30, [sp, #304] // 16-byte Folded Spill
2428; CHECK-GI-NEXT:    .cfi_def_cfa_offset 320
2429; CHECK-GI-NEXT:    .cfi_offset w30, -8
2430; CHECK-GI-NEXT:    .cfi_offset w29, -16
2431; CHECK-GI-NEXT:    .cfi_offset b8, -24
2432; CHECK-GI-NEXT:    .cfi_offset b9, -32
2433; CHECK-GI-NEXT:    .cfi_offset b10, -40
2434; CHECK-GI-NEXT:    .cfi_offset b11, -48
2435; CHECK-GI-NEXT:    .cfi_offset b12, -56
2436; CHECK-GI-NEXT:    .cfi_offset b13, -64
2437; CHECK-GI-NEXT:    .cfi_offset b14, -72
2438; CHECK-GI-NEXT:    .cfi_offset b15, -80
2439; CHECK-GI-NEXT:    mov v2.16b, v1.16b
2440; CHECK-GI-NEXT:    str q1, [sp, #80] // 16-byte Folded Spill
2441; CHECK-GI-NEXT:    mov h14, v1.h[1]
2442; CHECK-GI-NEXT:    mov h1, v1.h[2]
2443; CHECK-GI-NEXT:    mov h15, v0.h[1]
2444; CHECK-GI-NEXT:    mov h8, v0.h[2]
2445; CHECK-GI-NEXT:    mov h9, v0.h[3]
2446; CHECK-GI-NEXT:    mov h10, v0.h[4]
2447; CHECK-GI-NEXT:    mov h11, v0.h[5]
2448; CHECK-GI-NEXT:    mov h12, v0.h[6]
2449; CHECK-GI-NEXT:    mov h13, v0.h[7]
2450; CHECK-GI-NEXT:    fcvt s0, h0
2451; CHECK-GI-NEXT:    str h1, [sp, #16] // 2-byte Folded Spill
2452; CHECK-GI-NEXT:    mov h1, v2.h[3]
2453; CHECK-GI-NEXT:    str h1, [sp, #32] // 2-byte Folded Spill
2454; CHECK-GI-NEXT:    mov h1, v2.h[4]
2455; CHECK-GI-NEXT:    str h1, [sp, #48] // 2-byte Folded Spill
2456; CHECK-GI-NEXT:    mov h1, v2.h[5]
2457; CHECK-GI-NEXT:    str h1, [sp, #64] // 2-byte Folded Spill
2458; CHECK-GI-NEXT:    mov h1, v2.h[6]
2459; CHECK-GI-NEXT:    str h1, [sp, #96] // 2-byte Folded Spill
2460; CHECK-GI-NEXT:    mov h1, v2.h[7]
2461; CHECK-GI-NEXT:    str h1, [sp, #160] // 2-byte Folded Spill
2462; CHECK-GI-NEXT:    bl cosf
2463; CHECK-GI-NEXT:    fcvt s1, h15
2464; CHECK-GI-NEXT:    fcvt h0, s0
2465; CHECK-GI-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
2466; CHECK-GI-NEXT:    fmov s0, s1
2467; CHECK-GI-NEXT:    bl cosf
2468; CHECK-GI-NEXT:    fcvt s1, h8
2469; CHECK-GI-NEXT:    fcvt h0, s0
2470; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
2471; CHECK-GI-NEXT:    fmov s0, s1
2472; CHECK-GI-NEXT:    bl cosf
2473; CHECK-GI-NEXT:    fcvt s1, h9
2474; CHECK-GI-NEXT:    fcvt h0, s0
2475; CHECK-GI-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
2476; CHECK-GI-NEXT:    fmov s0, s1
2477; CHECK-GI-NEXT:    bl cosf
2478; CHECK-GI-NEXT:    fcvt s1, h10
2479; CHECK-GI-NEXT:    fcvt h0, s0
2480; CHECK-GI-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
2481; CHECK-GI-NEXT:    fmov s0, s1
2482; CHECK-GI-NEXT:    bl cosf
2483; CHECK-GI-NEXT:    fcvt s1, h11
2484; CHECK-GI-NEXT:    fcvt h0, s0
2485; CHECK-GI-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
2486; CHECK-GI-NEXT:    fmov s0, s1
2487; CHECK-GI-NEXT:    bl cosf
2488; CHECK-GI-NEXT:    fcvt s1, h12
2489; CHECK-GI-NEXT:    fcvt h0, s0
2490; CHECK-GI-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
2491; CHECK-GI-NEXT:    fmov s0, s1
2492; CHECK-GI-NEXT:    bl cosf
2493; CHECK-GI-NEXT:    fcvt s1, h13
2494; CHECK-GI-NEXT:    fcvt h0, s0
2495; CHECK-GI-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
2496; CHECK-GI-NEXT:    fmov s0, s1
2497; CHECK-GI-NEXT:    bl cosf
2498; CHECK-GI-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
2499; CHECK-GI-NEXT:    fcvt h0, s0
2500; CHECK-GI-NEXT:    fcvt s1, h1
2501; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
2502; CHECK-GI-NEXT:    fmov s0, s1
2503; CHECK-GI-NEXT:    bl cosf
2504; CHECK-GI-NEXT:    fcvt s1, h14
2505; CHECK-GI-NEXT:    fcvt h0, s0
2506; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
2507; CHECK-GI-NEXT:    fmov s0, s1
2508; CHECK-GI-NEXT:    bl cosf
2509; CHECK-GI-NEXT:    ldr h1, [sp, #16] // 2-byte Folded Reload
2510; CHECK-GI-NEXT:    fcvt h0, s0
2511; CHECK-GI-NEXT:    fcvt s1, h1
2512; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
2513; CHECK-GI-NEXT:    fmov s0, s1
2514; CHECK-GI-NEXT:    bl cosf
2515; CHECK-GI-NEXT:    ldr h1, [sp, #32] // 2-byte Folded Reload
2516; CHECK-GI-NEXT:    fcvt h0, s0
2517; CHECK-GI-NEXT:    fcvt s1, h1
2518; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
2519; CHECK-GI-NEXT:    fmov s0, s1
2520; CHECK-GI-NEXT:    bl cosf
2521; CHECK-GI-NEXT:    ldr h1, [sp, #48] // 2-byte Folded Reload
2522; CHECK-GI-NEXT:    fcvt h0, s0
2523; CHECK-GI-NEXT:    fcvt s1, h1
2524; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
2525; CHECK-GI-NEXT:    fmov s0, s1
2526; CHECK-GI-NEXT:    bl cosf
2527; CHECK-GI-NEXT:    ldr h1, [sp, #64] // 2-byte Folded Reload
2528; CHECK-GI-NEXT:    fcvt h0, s0
2529; CHECK-GI-NEXT:    fcvt s1, h1
2530; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
2531; CHECK-GI-NEXT:    fmov s0, s1
2532; CHECK-GI-NEXT:    bl cosf
2533; CHECK-GI-NEXT:    ldr h1, [sp, #96] // 2-byte Folded Reload
2534; CHECK-GI-NEXT:    fcvt h0, s0
2535; CHECK-GI-NEXT:    fcvt s1, h1
2536; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
2537; CHECK-GI-NEXT:    fmov s0, s1
2538; CHECK-GI-NEXT:    bl cosf
2539; CHECK-GI-NEXT:    ldr h1, [sp, #160] // 2-byte Folded Reload
2540; CHECK-GI-NEXT:    fcvt h0, s0
2541; CHECK-GI-NEXT:    fcvt s1, h1
2542; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
2543; CHECK-GI-NEXT:    fmov s0, s1
2544; CHECK-GI-NEXT:    bl cosf
2545; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
2546; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
2547; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
2548; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
2549; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
2550; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
2551; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
2552; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
2553; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
2554; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
2555; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
2556; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
2557; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
2558; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
2559; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
2560; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
2561; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
2562; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]
2563; CHECK-GI-NEXT:    ldr q2, [sp, #176] // 16-byte Folded Reload
2564; CHECK-GI-NEXT:    mov v3.h[4], v2.h[0]
2565; CHECK-GI-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
2566; CHECK-GI-NEXT:    mov v1.h[4], v2.h[0]
2567; CHECK-GI-NEXT:    ldr q2, [sp, #144] // 16-byte Folded Reload
2568; CHECK-GI-NEXT:    mov v3.h[5], v2.h[0]
2569; CHECK-GI-NEXT:    ldr q2, [sp, #96] // 16-byte Folded Reload
2570; CHECK-GI-NEXT:    mov v1.h[5], v2.h[0]
2571; CHECK-GI-NEXT:    fcvt h2, s0
2572; CHECK-GI-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
2573; CHECK-GI-NEXT:    mov v3.h[6], v0.h[0]
2574; CHECK-GI-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
2575; CHECK-GI-NEXT:    mov v1.h[6], v0.h[0]
2576; CHECK-GI-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
2577; CHECK-GI-NEXT:    mov v3.h[7], v0.h[0]
2578; CHECK-GI-NEXT:    mov v1.h[7], v2.h[0]
2579; CHECK-GI-NEXT:    mov v0.16b, v3.16b
2580; CHECK-GI-NEXT:    add sp, sp, #320
2581; CHECK-GI-NEXT:    ret
2582entry:
2583  %c = call <16 x half> @llvm.cos.v16f16(<16 x half> %a)
2584  ret <16 x half> %c
2585}
2586
2587define <2 x fp128> @cos_v2fp128(<2 x fp128> %a) {
2588; CHECK-LABEL: cos_v2fp128:
2589; CHECK:       // %bb.0: // %entry
2590; CHECK-NEXT:    sub sp, sp, #48
2591; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
2592; CHECK-NEXT:    .cfi_def_cfa_offset 48
2593; CHECK-NEXT:    .cfi_offset w30, -16
2594; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
2595; CHECK-NEXT:    bl cosl
2596; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
2597; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
2598; CHECK-NEXT:    bl cosl
2599; CHECK-NEXT:    mov v1.16b, v0.16b
2600; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
2601; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
2602; CHECK-NEXT:    add sp, sp, #48
2603; CHECK-NEXT:    ret
2604entry:
2605  %c = call <2 x fp128> @llvm.cos.v2fp128(<2 x fp128> %a)
2606  ret <2 x fp128> %c
2607}
2608
2609; This is testing that we do not produce incorrect tailcall lowerings
2610define i64 @donttailcall(double noundef %x, double noundef %y) {
2611; CHECK-LABEL: donttailcall:
2612; CHECK:       // %bb.0: // %entry
2613; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
2614; CHECK-NEXT:    .cfi_def_cfa_offset 16
2615; CHECK-NEXT:    .cfi_offset w30, -16
2616; CHECK-NEXT:    bl sin
2617; CHECK-NEXT:    fmov x0, d0
2618; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
2619; CHECK-NEXT:    ret
2620entry:
2621  %call = tail call double @llvm.sin.f64(double noundef %x)
2622  %0 = bitcast double %call to i64
2623  ret i64 %0
2624}
2625
2626
2627declare <16 x half> @llvm.cos.v16f16(<16 x half>)
2628declare <16 x half> @llvm.sin.v16f16(<16 x half>)
2629declare <2 x double> @llvm.cos.v2f64(<2 x double>)
2630declare <2 x double> @llvm.sin.v2f64(<2 x double>)
2631declare <2 x float> @llvm.cos.v2f32(<2 x float>)
2632declare <2 x float> @llvm.sin.v2f32(<2 x float>)
2633declare <2 x fp128> @llvm.cos.v2fp128(<2 x fp128>)
2634declare <2 x fp128> @llvm.sin.v2fp128(<2 x fp128>)
2635declare <3 x double> @llvm.cos.v3f64(<3 x double>)
2636declare <3 x double> @llvm.sin.v3f64(<3 x double>)
2637declare <3 x float> @llvm.cos.v3f32(<3 x float>)
2638declare <3 x float> @llvm.sin.v3f32(<3 x float>)
2639declare <4 x double> @llvm.cos.v4f64(<4 x double>)
2640declare <4 x double> @llvm.sin.v4f64(<4 x double>)
2641declare <4 x float> @llvm.cos.v4f32(<4 x float>)
2642declare <4 x float> @llvm.sin.v4f32(<4 x float>)
2643declare <4 x half> @llvm.cos.v4f16(<4 x half>)
2644declare <4 x half> @llvm.sin.v4f16(<4 x half>)
2645declare <7 x half> @llvm.cos.v7f16(<7 x half>)
2646declare <7 x half> @llvm.sin.v7f16(<7 x half>)
2647declare <8 x float> @llvm.cos.v8f32(<8 x float>)
2648declare <8 x float> @llvm.sin.v8f32(<8 x float>)
2649declare <8 x half> @llvm.cos.v8f16(<8 x half>)
2650declare <8 x half> @llvm.sin.v8f16(<8 x half>)
2651declare double @llvm.cos.f64(double)
2652declare double @llvm.sin.f64(double)
2653declare float @llvm.cos.f32(float)
2654declare float @llvm.sin.f32(float)
2655declare fp128 @llvm.cos.fp128(fp128)
2656declare fp128 @llvm.sin.fp128(fp128)
2657declare half @llvm.cos.f16(half)
2658declare half @llvm.sin.f16(half)
2659