xref: /llvm-project/llvm/test/CodeGen/X86/vec-libcalls.ll (revision 378fe2fc23fa56181577d411fe6d51fa531cd860)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s
3
4; PR38527 - https://bugs.llvm.org/show_bug.cgi?id=38527
5
6; Use an AVX target to show that the potential problem
7; is not limited to 128-bit types/registers. Ie, widening
8; up to 256-bits may also result in bogus libcalls.
9
10; Use fsin as the representative test for various data types.
11
12declare <1 x float> @llvm.sin.v1f32(<1 x float>)
13declare <2 x float> @llvm.sin.v2f32(<2 x float>)
14declare <3 x float> @llvm.sin.v3f32(<3 x float>)
15declare <4 x float> @llvm.sin.v4f32(<4 x float>)
16declare <5 x float> @llvm.sin.v5f32(<5 x float>)
17declare <6 x float> @llvm.sin.v6f32(<6 x float>)
18declare <3 x double> @llvm.sin.v3f64(<3 x double>)
19
20declare <1 x float> @llvm.tan.v1f32(<1 x float>)
21declare <2 x float> @llvm.tan.v2f32(<2 x float>)
22declare <3 x float> @llvm.tan.v3f32(<3 x float>)
23declare <4 x float> @llvm.tan.v4f32(<4 x float>)
24declare <5 x float> @llvm.tan.v5f32(<5 x float>)
25declare <6 x float> @llvm.tan.v6f32(<6 x float>)
26declare <3 x double> @llvm.tan.v3f64(<3 x double>)
27
28declare <1 x float> @llvm.acos.v1f32(<1 x float>)
29declare <2 x float> @llvm.acos.v2f32(<2 x float>)
30declare <3 x float> @llvm.acos.v3f32(<3 x float>)
31declare <4 x float> @llvm.acos.v4f32(<4 x float>)
32declare <5 x float> @llvm.acos.v5f32(<5 x float>)
33declare <6 x float> @llvm.acos.v6f32(<6 x float>)
34declare <3 x double> @llvm.acos.v3f64(<3 x double
35>)
36declare <1 x float> @llvm.asin.v1f32(<1 x float>)
37declare <2 x float> @llvm.asin.v2f32(<2 x float>)
38declare <3 x float> @llvm.asin.v3f32(<3 x float>)
39declare <4 x float> @llvm.asin.v4f32(<4 x float>)
40declare <5 x float> @llvm.asin.v5f32(<5 x float>)
41declare <6 x float> @llvm.asin.v6f32(<6 x float>)
42declare <3 x double> @llvm.asin.v3f64(<3 x double>)
43
44declare <1 x float> @llvm.atan.v1f32(<1 x float>)
45declare <2 x float> @llvm.atan.v2f32(<2 x float>)
46declare <3 x float> @llvm.atan.v3f32(<3 x float>)
47declare <4 x float> @llvm.atan.v4f32(<4 x float>)
48declare <5 x float> @llvm.atan.v5f32(<5 x float>)
49declare <6 x float> @llvm.atan.v6f32(<6 x float>)
50declare <3 x double> @llvm.atan.v3f64(<3 x double>)
51
52declare <1 x float> @llvm.cosh.v1f32(<1 x float>)
53declare <2 x float> @llvm.cosh.v2f32(<2 x float>)
54declare <3 x float> @llvm.cosh.v3f32(<3 x float>)
55declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
56declare <5 x float> @llvm.cosh.v5f32(<5 x float>)
57declare <6 x float> @llvm.cosh.v6f32(<6 x float>)
58declare <3 x double> @llvm.cosh.v3f64(<3 x double>)
59
60declare <1 x float> @llvm.sinh.v1f32(<1 x float>)
61declare <2 x float> @llvm.sinh.v2f32(<2 x float>)
62declare <3 x float> @llvm.sinh.v3f32(<3 x float>)
63declare <4 x float> @llvm.sinh.v4f32(<4 x float>)
64declare <5 x float> @llvm.sinh.v5f32(<5 x float>)
65declare <6 x float> @llvm.sinh.v6f32(<6 x float>)
66declare <3 x double> @llvm.sinh.v3f64(<3 x double>)
67
68declare <1 x float> @llvm.tanh.v1f32(<1 x float>)
69declare <2 x float> @llvm.tanh.v2f32(<2 x float>)
70declare <3 x float> @llvm.tanh.v3f32(<3 x float>)
71declare <4 x float> @llvm.tanh.v4f32(<4 x float>)
72declare <5 x float> @llvm.tanh.v5f32(<5 x float>)
73declare <6 x float> @llvm.tanh.v6f32(<6 x float>)
74declare <3 x double> @llvm.tanh.v3f64(<3 x double>)
75
76; Verify that all of the potential libcall candidates are handled.
77; Some of these have custom lowering, so those cases won't have
78; libcalls.
79
80declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
81declare <2 x float> @llvm.ceil.v2f32(<2 x float>)
82declare <2 x float> @llvm.cos.v2f32(<2 x float>)
83declare <2 x float> @llvm.exp.v2f32(<2 x float>)
84declare <2 x float> @llvm.exp2.v2f32(<2 x float>)
85declare <2 x float> @llvm.floor.v2f32(<2 x float>)
86declare <2 x float> @llvm.log.v2f32(<2 x float>)
87declare <2 x float> @llvm.log10.v2f32(<2 x float>)
88declare <2 x float> @llvm.log2.v2f32(<2 x float>)
89declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>)
90declare <2 x float> @llvm.rint.v2f32(<2 x float>)
91declare <2 x float> @llvm.round.v2f32(<2 x float>)
92declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
93declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
94
95define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
96; CHECK-LABEL: sin_v1f32:
97; CHECK:       # %bb.0:
98; CHECK-NEXT:    pushq %rax
99; CHECK-NEXT:    callq sinf@PLT
100; CHECK-NEXT:    popq %rax
101; CHECK-NEXT:    retq
102  %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
103  ret <1 x float> %r
104}
105
106define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
107; CHECK-LABEL: sin_v2f32:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    subq $40, %rsp
110; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
111; CHECK-NEXT:    callq sinf@PLT
112; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
113; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
114; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
115; CHECK-NEXT:    callq sinf@PLT
116; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
117; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
118; CHECK-NEXT:    addq $40, %rsp
119; CHECK-NEXT:    retq
120  %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x)
121  ret <2 x float> %r
122}
123
124define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
125; CHECK-LABEL: sin_v3f32:
126; CHECK:       # %bb.0:
127; CHECK-NEXT:    subq $40, %rsp
128; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
129; CHECK-NEXT:    callq sinf@PLT
130; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
131; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
132; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
133; CHECK-NEXT:    callq sinf@PLT
134; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
135; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
136; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
137; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
138; CHECK-NEXT:    # xmm0 = mem[1,0]
139; CHECK-NEXT:    callq sinf@PLT
140; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
141; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
142; CHECK-NEXT:    addq $40, %rsp
143; CHECK-NEXT:    retq
144  %r = call <3 x float> @llvm.sin.v3f32(<3 x float> %x)
145  ret <3 x float> %r
146}
147
148define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
149; CHECK-LABEL: sin_v4f32:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    subq $40, %rsp
152; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
153; CHECK-NEXT:    callq sinf@PLT
154; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
155; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
156; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
157; CHECK-NEXT:    callq sinf@PLT
158; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
159; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
160; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
161; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
162; CHECK-NEXT:    # xmm0 = mem[1,0]
163; CHECK-NEXT:    callq sinf@PLT
164; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
165; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
166; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
167; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
168; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
169; CHECK-NEXT:    callq sinf@PLT
170; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
171; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
172; CHECK-NEXT:    addq $40, %rsp
173; CHECK-NEXT:    retq
174  %r = call <4 x float> @llvm.sin.v4f32(<4 x float> %x)
175  ret <4 x float> %r
176}
177
178define <5 x float> @sin_v5f32(<5 x float> %x) nounwind {
179; CHECK-LABEL: sin_v5f32:
180; CHECK:       # %bb.0:
181; CHECK-NEXT:    subq $72, %rsp
182; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
183; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
184; CHECK-NEXT:    vzeroupper
185; CHECK-NEXT:    callq sinf@PLT
186; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
187; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
188; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
189; CHECK-NEXT:    callq sinf@PLT
190; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
191; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
192; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
193; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
194; CHECK-NEXT:    # xmm0 = mem[1,0]
195; CHECK-NEXT:    callq sinf@PLT
196; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
197; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
198; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
199; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
200; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
201; CHECK-NEXT:    callq sinf@PLT
202; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
203; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
204; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
205; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
206; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
207; CHECK-NEXT:    vzeroupper
208; CHECK-NEXT:    callq sinf@PLT
209; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
210; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
211; CHECK-NEXT:    addq $72, %rsp
212; CHECK-NEXT:    retq
213  %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x)
214  ret <5 x float> %r
215}
216
217define <6 x float> @sin_v6f32(<6 x float> %x) nounwind {
218; CHECK-LABEL: sin_v6f32:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    subq $72, %rsp
221; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
222; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
223; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
224; CHECK-NEXT:    vzeroupper
225; CHECK-NEXT:    callq sinf@PLT
226; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
227; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
228; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
229; CHECK-NEXT:    callq sinf@PLT
230; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
231; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
232; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
233; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
234; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
235; CHECK-NEXT:    vzeroupper
236; CHECK-NEXT:    callq sinf@PLT
237; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
238; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
239; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
240; CHECK-NEXT:    callq sinf@PLT
241; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
242; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
243; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
244; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
245; CHECK-NEXT:    # xmm0 = mem[1,0]
246; CHECK-NEXT:    callq sinf@PLT
247; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
248; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
249; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
250; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
251; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
252; CHECK-NEXT:    callq sinf@PLT
253; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
254; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
255; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
256; CHECK-NEXT:    addq $72, %rsp
257; CHECK-NEXT:    retq
258  %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x)
259  ret <6 x float> %r
260}
261
262define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
263; CHECK-LABEL: sin_v3f64:
264; CHECK:       # %bb.0:
265; CHECK-NEXT:    subq $72, %rsp
266; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
267; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
268; CHECK-NEXT:    vzeroupper
269; CHECK-NEXT:    callq sin@PLT
270; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
271; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
272; CHECK-NEXT:    # xmm0 = mem[1,0]
273; CHECK-NEXT:    callq sin@PLT
274; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
275; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
276; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
277; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
278; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
279; CHECK-NEXT:    vzeroupper
280; CHECK-NEXT:    callq sin@PLT
281; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
282; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
283; CHECK-NEXT:    addq $72, %rsp
284; CHECK-NEXT:    retq
285  %r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x)
286  ret <3 x double> %r
287}
288
289define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
290; CHECK-LABEL: tan_v1f32:
291; CHECK:       # %bb.0:
292; CHECK-NEXT:    pushq %rax
293; CHECK-NEXT:    callq tanf@PLT
294; CHECK-NEXT:    popq %rax
295; CHECK-NEXT:    retq
296  %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
297  ret <1 x float> %r
298}
299
300define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
301; CHECK-LABEL: tan_v2f32:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    subq $40, %rsp
304; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
305; CHECK-NEXT:    callq tanf@PLT
306; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
307; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
308; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
309; CHECK-NEXT:    callq tanf@PLT
310; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
311; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
312; CHECK-NEXT:    addq $40, %rsp
313; CHECK-NEXT:    retq
314  %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
315  ret <2 x float> %r
316}
317
318define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
319; CHECK-LABEL: tan_v3f32:
320; CHECK:       # %bb.0:
321; CHECK-NEXT:    subq $40, %rsp
322; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
323; CHECK-NEXT:    callq tanf@PLT
324; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
325; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
326; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
327; CHECK-NEXT:    callq tanf@PLT
328; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
329; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
330; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
331; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
332; CHECK-NEXT:    # xmm0 = mem[1,0]
333; CHECK-NEXT:    callq tanf@PLT
334; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
335; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
336; CHECK-NEXT:    addq $40, %rsp
337; CHECK-NEXT:    retq
338  %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
339  ret <3 x float> %r
340}
341
342define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
343; CHECK-LABEL: tan_v4f32:
344; CHECK:       # %bb.0:
345; CHECK-NEXT:    subq $40, %rsp
346; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
347; CHECK-NEXT:    callq tanf@PLT
348; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
349; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
350; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
351; CHECK-NEXT:    callq tanf@PLT
352; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
353; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
354; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
355; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
356; CHECK-NEXT:    # xmm0 = mem[1,0]
357; CHECK-NEXT:    callq tanf@PLT
358; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
359; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
360; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
361; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
362; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
363; CHECK-NEXT:    callq tanf@PLT
364; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
365; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
366; CHECK-NEXT:    addq $40, %rsp
367; CHECK-NEXT:    retq
368  %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
369  ret <4 x float> %r
370}
371
372define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
373; CHECK-LABEL: tan_v5f32:
374; CHECK:       # %bb.0:
375; CHECK-NEXT:    subq $72, %rsp
376; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
377; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
378; CHECK-NEXT:    vzeroupper
379; CHECK-NEXT:    callq tanf@PLT
380; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
381; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
382; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
383; CHECK-NEXT:    callq tanf@PLT
384; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
385; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
386; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
387; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
388; CHECK-NEXT:    # xmm0 = mem[1,0]
389; CHECK-NEXT:    callq tanf@PLT
390; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
391; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
392; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
393; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
394; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
395; CHECK-NEXT:    callq tanf@PLT
396; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
397; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
398; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
399; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
400; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
401; CHECK-NEXT:    vzeroupper
402; CHECK-NEXT:    callq tanf@PLT
403; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
404; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
405; CHECK-NEXT:    addq $72, %rsp
406; CHECK-NEXT:    retq
407  %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
408  ret <5 x float> %r
409}
410
411define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
412; CHECK-LABEL: tan_v6f32:
413; CHECK:       # %bb.0:
414; CHECK-NEXT:    subq $72, %rsp
415; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
416; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
417; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
418; CHECK-NEXT:    vzeroupper
419; CHECK-NEXT:    callq tanf@PLT
420; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
421; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
422; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
423; CHECK-NEXT:    callq tanf@PLT
424; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
425; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
426; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
427; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
428; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
429; CHECK-NEXT:    vzeroupper
430; CHECK-NEXT:    callq tanf@PLT
431; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
432; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
433; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
434; CHECK-NEXT:    callq tanf@PLT
435; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
436; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
437; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
438; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
439; CHECK-NEXT:    # xmm0 = mem[1,0]
440; CHECK-NEXT:    callq tanf@PLT
441; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
442; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
443; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
444; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
445; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
446; CHECK-NEXT:    callq tanf@PLT
447; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
448; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
449; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
450; CHECK-NEXT:    addq $72, %rsp
451; CHECK-NEXT:    retq
452  %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
453  ret <6 x float> %r
454}
455
456define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
457; CHECK-LABEL: tan_v3f64:
458; CHECK:       # %bb.0:
459; CHECK-NEXT:    subq $72, %rsp
460; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
461; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
462; CHECK-NEXT:    vzeroupper
463; CHECK-NEXT:    callq tan@PLT
464; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
465; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
466; CHECK-NEXT:    # xmm0 = mem[1,0]
467; CHECK-NEXT:    callq tan@PLT
468; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
469; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
470; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
471; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
472; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
473; CHECK-NEXT:    vzeroupper
474; CHECK-NEXT:    callq tan@PLT
475; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
476; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
477; CHECK-NEXT:    addq $72, %rsp
478; CHECK-NEXT:    retq
479  %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
480  ret <3 x double> %r
481}
482
483define <1 x float> @acos_v1f32(<1 x float> %x) nounwind {
484; CHECK-LABEL: acos_v1f32:
485; CHECK:       # %bb.0:
486; CHECK-NEXT:    pushq %rax
487; CHECK-NEXT:    callq acosf@PLT
488; CHECK-NEXT:    popq %rax
489; CHECK-NEXT:    retq
490  %r = call <1 x float> @llvm.acos.v1f32(<1 x float> %x)
491  ret <1 x float> %r
492}
493
494define <2 x float> @acos_v2f32(<2 x float> %x) nounwind {
495; CHECK-LABEL: acos_v2f32:
496; CHECK:       # %bb.0:
497; CHECK-NEXT:    subq $40, %rsp
498; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
499; CHECK-NEXT:    callq acosf@PLT
500; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
501; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
502; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
503; CHECK-NEXT:    callq acosf@PLT
504; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
505; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
506; CHECK-NEXT:    addq $40, %rsp
507; CHECK-NEXT:    retq
508  %r = call <2 x float> @llvm.acos.v2f32(<2 x float> %x)
509  ret <2 x float> %r
510}
511
512define <3 x float> @acos_v3f32(<3 x float> %x) nounwind {
513; CHECK-LABEL: acos_v3f32:
514; CHECK:       # %bb.0:
515; CHECK-NEXT:    subq $40, %rsp
516; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
517; CHECK-NEXT:    callq acosf@PLT
518; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
519; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
520; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
521; CHECK-NEXT:    callq acosf@PLT
522; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
523; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
524; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
525; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
526; CHECK-NEXT:    # xmm0 = mem[1,0]
527; CHECK-NEXT:    callq acosf@PLT
528; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
529; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
530; CHECK-NEXT:    addq $40, %rsp
531; CHECK-NEXT:    retq
532  %r = call <3 x float> @llvm.acos.v3f32(<3 x float> %x)
533  ret <3 x float> %r
534}
535
536define <4 x float> @acos_v4f32(<4 x float> %x) nounwind {
537; CHECK-LABEL: acos_v4f32:
538; CHECK:       # %bb.0:
539; CHECK-NEXT:    subq $40, %rsp
540; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
541; CHECK-NEXT:    callq acosf@PLT
542; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
543; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
544; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
545; CHECK-NEXT:    callq acosf@PLT
546; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
547; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
548; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
549; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
550; CHECK-NEXT:    # xmm0 = mem[1,0]
551; CHECK-NEXT:    callq acosf@PLT
552; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
553; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
554; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
555; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
556; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
557; CHECK-NEXT:    callq acosf@PLT
558; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
559; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
560; CHECK-NEXT:    addq $40, %rsp
561; CHECK-NEXT:    retq
562  %r = call <4 x float> @llvm.acos.v4f32(<4 x float> %x)
563  ret <4 x float> %r
564}
565
566define <5 x float> @acos_v5f32(<5 x float> %x) nounwind {
567; CHECK-LABEL: acos_v5f32:
568; CHECK:       # %bb.0:
569; CHECK-NEXT:    subq $72, %rsp
570; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
571; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
572; CHECK-NEXT:    vzeroupper
573; CHECK-NEXT:    callq acosf@PLT
574; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
575; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
576; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
577; CHECK-NEXT:    callq acosf@PLT
578; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
579; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
580; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
581; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
582; CHECK-NEXT:    # xmm0 = mem[1,0]
583; CHECK-NEXT:    callq acosf@PLT
584; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
585; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
586; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
587; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
588; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
589; CHECK-NEXT:    callq acosf@PLT
590; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
591; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
592; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
593; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
594; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
595; CHECK-NEXT:    vzeroupper
596; CHECK-NEXT:    callq acosf@PLT
597; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
598; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
599; CHECK-NEXT:    addq $72, %rsp
600; CHECK-NEXT:    retq
601  %r = call <5 x float> @llvm.acos.v5f32(<5 x float> %x)
602  ret <5 x float> %r
603}
604
605define <6 x float> @acos_v6f32(<6 x float> %x) nounwind {
606; CHECK-LABEL: acos_v6f32:
607; CHECK:       # %bb.0:
608; CHECK-NEXT:    subq $72, %rsp
609; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
610; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
611; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
612; CHECK-NEXT:    vzeroupper
613; CHECK-NEXT:    callq acosf@PLT
614; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
615; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
616; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
617; CHECK-NEXT:    callq acosf@PLT
618; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
619; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
620; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
621; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
622; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
623; CHECK-NEXT:    vzeroupper
624; CHECK-NEXT:    callq acosf@PLT
625; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
626; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
627; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
628; CHECK-NEXT:    callq acosf@PLT
629; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
630; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
631; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
632; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
633; CHECK-NEXT:    # xmm0 = mem[1,0]
634; CHECK-NEXT:    callq acosf@PLT
635; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
636; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
637; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
638; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
639; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
640; CHECK-NEXT:    callq acosf@PLT
641; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
642; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
643; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
644; CHECK-NEXT:    addq $72, %rsp
645; CHECK-NEXT:    retq
646  %r = call <6 x float> @llvm.acos.v6f32(<6 x float> %x)
647  ret <6 x float> %r
648}
649
650define <3 x double> @acos_v3f64(<3 x double> %x) nounwind {
651; CHECK-LABEL: acos_v3f64:
652; CHECK:       # %bb.0:
653; CHECK-NEXT:    subq $72, %rsp
654; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
655; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
656; CHECK-NEXT:    vzeroupper
657; CHECK-NEXT:    callq acos@PLT
658; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
659; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
660; CHECK-NEXT:    # xmm0 = mem[1,0]
661; CHECK-NEXT:    callq acos@PLT
662; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
663; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
664; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
665; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
666; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
667; CHECK-NEXT:    vzeroupper
668; CHECK-NEXT:    callq acos@PLT
669; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
670; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
671; CHECK-NEXT:    addq $72, %rsp
672; CHECK-NEXT:    retq
673  %r = call <3 x double> @llvm.acos.v3f64(<3 x double> %x)
674  ret <3 x double> %r
675}
676
677define <1 x float> @asin_v1f32(<1 x float> %x) nounwind {
678; CHECK-LABEL: asin_v1f32:
679; CHECK:       # %bb.0:
680; CHECK-NEXT:    pushq %rax
681; CHECK-NEXT:    callq asinf@PLT
682; CHECK-NEXT:    popq %rax
683; CHECK-NEXT:    retq
684  %r = call <1 x float> @llvm.asin.v1f32(<1 x float> %x)
685  ret <1 x float> %r
686}
687
688define <2 x float> @asin_v2f32(<2 x float> %x) nounwind {
689; CHECK-LABEL: asin_v2f32:
690; CHECK:       # %bb.0:
691; CHECK-NEXT:    subq $40, %rsp
692; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
693; CHECK-NEXT:    callq asinf@PLT
694; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
695; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
696; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
697; CHECK-NEXT:    callq asinf@PLT
698; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
699; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
700; CHECK-NEXT:    addq $40, %rsp
701; CHECK-NEXT:    retq
702  %r = call <2 x float> @llvm.asin.v2f32(<2 x float> %x)
703  ret <2 x float> %r
704}
705
706define <3 x float> @asin_v3f32(<3 x float> %x) nounwind {
707; CHECK-LABEL: asin_v3f32:
708; CHECK:       # %bb.0:
709; CHECK-NEXT:    subq $40, %rsp
710; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
711; CHECK-NEXT:    callq asinf@PLT
712; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
713; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
714; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
715; CHECK-NEXT:    callq asinf@PLT
716; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
717; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
718; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
719; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
720; CHECK-NEXT:    # xmm0 = mem[1,0]
721; CHECK-NEXT:    callq asinf@PLT
722; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
723; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
724; CHECK-NEXT:    addq $40, %rsp
725; CHECK-NEXT:    retq
726  %r = call <3 x float> @llvm.asin.v3f32(<3 x float> %x)
727  ret <3 x float> %r
728}
729
730define <4 x float> @asin_v4f32(<4 x float> %x) nounwind {
731; CHECK-LABEL: asin_v4f32:
732; CHECK:       # %bb.0:
733; CHECK-NEXT:    subq $40, %rsp
734; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
735; CHECK-NEXT:    callq asinf@PLT
736; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
737; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
738; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
739; CHECK-NEXT:    callq asinf@PLT
740; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
741; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
742; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
743; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
744; CHECK-NEXT:    # xmm0 = mem[1,0]
745; CHECK-NEXT:    callq asinf@PLT
746; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
747; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
748; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
749; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
750; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
751; CHECK-NEXT:    callq asinf@PLT
752; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
753; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
754; CHECK-NEXT:    addq $40, %rsp
755; CHECK-NEXT:    retq
756  %r = call <4 x float> @llvm.asin.v4f32(<4 x float> %x)
757  ret <4 x float> %r
758}
759
760define <5 x float> @asin_v5f32(<5 x float> %x) nounwind {
761; CHECK-LABEL: asin_v5f32:
762; CHECK:       # %bb.0:
763; CHECK-NEXT:    subq $72, %rsp
764; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
765; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
766; CHECK-NEXT:    vzeroupper
767; CHECK-NEXT:    callq asinf@PLT
768; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
769; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
770; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
771; CHECK-NEXT:    callq asinf@PLT
772; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
773; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
774; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
775; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
776; CHECK-NEXT:    # xmm0 = mem[1,0]
777; CHECK-NEXT:    callq asinf@PLT
778; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
779; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
780; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
781; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
782; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
783; CHECK-NEXT:    callq asinf@PLT
784; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
785; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
786; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
787; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
788; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
789; CHECK-NEXT:    vzeroupper
790; CHECK-NEXT:    callq asinf@PLT
791; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
792; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
793; CHECK-NEXT:    addq $72, %rsp
794; CHECK-NEXT:    retq
795  %r = call <5 x float> @llvm.asin.v5f32(<5 x float> %x)
796  ret <5 x float> %r
797}
798
799define <6 x float> @asin_v6f32(<6 x float> %x) nounwind {
800; CHECK-LABEL: asin_v6f32:
801; CHECK:       # %bb.0:
802; CHECK-NEXT:    subq $72, %rsp
803; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
804; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
805; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
806; CHECK-NEXT:    vzeroupper
807; CHECK-NEXT:    callq asinf@PLT
808; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
809; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
810; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
811; CHECK-NEXT:    callq asinf@PLT
812; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
813; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
814; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
815; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
816; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
817; CHECK-NEXT:    vzeroupper
818; CHECK-NEXT:    callq asinf@PLT
819; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
820; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
821; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
822; CHECK-NEXT:    callq asinf@PLT
823; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
824; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
825; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
826; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
827; CHECK-NEXT:    # xmm0 = mem[1,0]
828; CHECK-NEXT:    callq asinf@PLT
829; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
830; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
831; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
832; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
833; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
834; CHECK-NEXT:    callq asinf@PLT
835; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
836; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
837; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
838; CHECK-NEXT:    addq $72, %rsp
839; CHECK-NEXT:    retq
840  %r = call <6 x float> @llvm.asin.v6f32(<6 x float> %x)
841  ret <6 x float> %r
842}
843
844define <3 x double> @asin_v3f64(<3 x double> %x) nounwind {
845; CHECK-LABEL: asin_v3f64:
846; CHECK:       # %bb.0:
847; CHECK-NEXT:    subq $72, %rsp
848; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
849; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
850; CHECK-NEXT:    vzeroupper
851; CHECK-NEXT:    callq asin@PLT
852; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
853; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
854; CHECK-NEXT:    # xmm0 = mem[1,0]
855; CHECK-NEXT:    callq asin@PLT
856; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
857; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
858; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
859; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
860; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
861; CHECK-NEXT:    vzeroupper
862; CHECK-NEXT:    callq asin@PLT
863; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
864; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
865; CHECK-NEXT:    addq $72, %rsp
866; CHECK-NEXT:    retq
867  %r = call <3 x double> @llvm.asin.v3f64(<3 x double> %x)
868  ret <3 x double> %r
869}
870
871define <1 x float> @atan_v1f32(<1 x float> %x) nounwind {
872; CHECK-LABEL: atan_v1f32:
873; CHECK:       # %bb.0:
874; CHECK-NEXT:    pushq %rax
875; CHECK-NEXT:    callq atanf@PLT
876; CHECK-NEXT:    popq %rax
877; CHECK-NEXT:    retq
878  %r = call <1 x float> @llvm.atan.v1f32(<1 x float> %x)
879  ret <1 x float> %r
880}
881
882define <2 x float> @atan_v2f32(<2 x float> %x) nounwind {
883; CHECK-LABEL: atan_v2f32:
884; CHECK:       # %bb.0:
885; CHECK-NEXT:    subq $40, %rsp
886; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
887; CHECK-NEXT:    callq atanf@PLT
888; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
889; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
890; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
891; CHECK-NEXT:    callq atanf@PLT
892; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
893; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
894; CHECK-NEXT:    addq $40, %rsp
895; CHECK-NEXT:    retq
896  %r = call <2 x float> @llvm.atan.v2f32(<2 x float> %x)
897  ret <2 x float> %r
898}
899
900define <3 x float> @atan_v3f32(<3 x float> %x) nounwind {
901; CHECK-LABEL: atan_v3f32:
902; CHECK:       # %bb.0:
903; CHECK-NEXT:    subq $40, %rsp
904; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
905; CHECK-NEXT:    callq atanf@PLT
906; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
907; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
908; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
909; CHECK-NEXT:    callq atanf@PLT
910; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
911; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
912; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
913; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
914; CHECK-NEXT:    # xmm0 = mem[1,0]
915; CHECK-NEXT:    callq atanf@PLT
916; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
917; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
918; CHECK-NEXT:    addq $40, %rsp
919; CHECK-NEXT:    retq
920  %r = call <3 x float> @llvm.atan.v3f32(<3 x float> %x)
921  ret <3 x float> %r
922}
923
924define <4 x float> @atan_v4f32(<4 x float> %x) nounwind {
925; CHECK-LABEL: atan_v4f32:
926; CHECK:       # %bb.0:
927; CHECK-NEXT:    subq $40, %rsp
928; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
929; CHECK-NEXT:    callq atanf@PLT
930; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
931; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
932; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
933; CHECK-NEXT:    callq atanf@PLT
934; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
935; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
936; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
937; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
938; CHECK-NEXT:    # xmm0 = mem[1,0]
939; CHECK-NEXT:    callq atanf@PLT
940; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
941; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
942; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
943; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
944; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
945; CHECK-NEXT:    callq atanf@PLT
946; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
947; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
948; CHECK-NEXT:    addq $40, %rsp
949; CHECK-NEXT:    retq
950  %r = call <4 x float> @llvm.atan.v4f32(<4 x float> %x)
951  ret <4 x float> %r
952}
953
954define <5 x float> @atan_v5f32(<5 x float> %x) nounwind {
955; CHECK-LABEL: atan_v5f32:
956; CHECK:       # %bb.0:
957; CHECK-NEXT:    subq $72, %rsp
958; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
959; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
960; CHECK-NEXT:    vzeroupper
961; CHECK-NEXT:    callq atanf@PLT
962; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
963; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
964; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
965; CHECK-NEXT:    callq atanf@PLT
966; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
967; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
968; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
969; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
970; CHECK-NEXT:    # xmm0 = mem[1,0]
971; CHECK-NEXT:    callq atanf@PLT
972; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
973; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
974; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
975; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
976; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
977; CHECK-NEXT:    callq atanf@PLT
978; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
979; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
980; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
981; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
982; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
983; CHECK-NEXT:    vzeroupper
984; CHECK-NEXT:    callq atanf@PLT
985; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
986; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
987; CHECK-NEXT:    addq $72, %rsp
988; CHECK-NEXT:    retq
989  %r = call <5 x float> @llvm.atan.v5f32(<5 x float> %x)
990  ret <5 x float> %r
991}
992
993define <6 x float> @atan_v6f32(<6 x float> %x) nounwind {
994; CHECK-LABEL: atan_v6f32:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    subq $72, %rsp
997; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
998; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
999; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1000; CHECK-NEXT:    vzeroupper
1001; CHECK-NEXT:    callq atanf@PLT
1002; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1003; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1004; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1005; CHECK-NEXT:    callq atanf@PLT
1006; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1007; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1008; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1009; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1010; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1011; CHECK-NEXT:    vzeroupper
1012; CHECK-NEXT:    callq atanf@PLT
1013; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1014; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1015; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1016; CHECK-NEXT:    callq atanf@PLT
1017; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1018; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1019; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1020; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1021; CHECK-NEXT:    # xmm0 = mem[1,0]
1022; CHECK-NEXT:    callq atanf@PLT
1023; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1024; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1025; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1026; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1027; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1028; CHECK-NEXT:    callq atanf@PLT
1029; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1030; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1031; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1032; CHECK-NEXT:    addq $72, %rsp
1033; CHECK-NEXT:    retq
1034  %r = call <6 x float> @llvm.atan.v6f32(<6 x float> %x)
1035  ret <6 x float> %r
1036}
1037
1038define <3 x double> @atan_v3f64(<3 x double> %x) nounwind {
1039; CHECK-LABEL: atan_v3f64:
1040; CHECK:       # %bb.0:
1041; CHECK-NEXT:    subq $72, %rsp
1042; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1043; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1044; CHECK-NEXT:    vzeroupper
1045; CHECK-NEXT:    callq atan@PLT
1046; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1047; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1048; CHECK-NEXT:    # xmm0 = mem[1,0]
1049; CHECK-NEXT:    callq atan@PLT
1050; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
1051; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1052; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
1053; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1054; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1055; CHECK-NEXT:    vzeroupper
1056; CHECK-NEXT:    callq atan@PLT
1057; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1058; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1059; CHECK-NEXT:    addq $72, %rsp
1060; CHECK-NEXT:    retq
1061  %r = call <3 x double> @llvm.atan.v3f64(<3 x double> %x)
1062  ret <3 x double> %r
1063}
1064
1065define <1 x float> @cosh_v1f32(<1 x float> %x) nounwind {
1066; CHECK-LABEL: cosh_v1f32:
1067; CHECK:       # %bb.0:
1068; CHECK-NEXT:    pushq %rax
1069; CHECK-NEXT:    callq coshf@PLT
1070; CHECK-NEXT:    popq %rax
1071; CHECK-NEXT:    retq
1072  %r = call <1 x float> @llvm.cosh.v1f32(<1 x float> %x)
1073  ret <1 x float> %r
1074}
1075
1076define <2 x float> @cosh_v2f32(<2 x float> %x) nounwind {
1077; CHECK-LABEL: cosh_v2f32:
1078; CHECK:       # %bb.0:
1079; CHECK-NEXT:    subq $40, %rsp
1080; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1081; CHECK-NEXT:    callq coshf@PLT
1082; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1083; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1084; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1085; CHECK-NEXT:    callq coshf@PLT
1086; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1087; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1088; CHECK-NEXT:    addq $40, %rsp
1089; CHECK-NEXT:    retq
1090  %r = call <2 x float> @llvm.cosh.v2f32(<2 x float> %x)
1091  ret <2 x float> %r
1092}
1093
1094define <3 x float> @cosh_v3f32(<3 x float> %x) nounwind {
1095; CHECK-LABEL: cosh_v3f32:
1096; CHECK:       # %bb.0:
1097; CHECK-NEXT:    subq $40, %rsp
1098; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1099; CHECK-NEXT:    callq coshf@PLT
1100; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1101; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1102; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1103; CHECK-NEXT:    callq coshf@PLT
1104; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1105; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1106; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1107; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1108; CHECK-NEXT:    # xmm0 = mem[1,0]
1109; CHECK-NEXT:    callq coshf@PLT
1110; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1111; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1112; CHECK-NEXT:    addq $40, %rsp
1113; CHECK-NEXT:    retq
1114  %r = call <3 x float> @llvm.cosh.v3f32(<3 x float> %x)
1115  ret <3 x float> %r
1116}
1117
1118define <4 x float> @cosh_v4f32(<4 x float> %x) nounwind {
1119; CHECK-LABEL: cosh_v4f32:
1120; CHECK:       # %bb.0:
1121; CHECK-NEXT:    subq $40, %rsp
1122; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1123; CHECK-NEXT:    callq coshf@PLT
1124; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1125; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1126; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1127; CHECK-NEXT:    callq coshf@PLT
1128; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1129; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1130; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1131; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1132; CHECK-NEXT:    # xmm0 = mem[1,0]
1133; CHECK-NEXT:    callq coshf@PLT
1134; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1135; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1136; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1137; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1138; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1139; CHECK-NEXT:    callq coshf@PLT
1140; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1141; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1142; CHECK-NEXT:    addq $40, %rsp
1143; CHECK-NEXT:    retq
1144  %r = call <4 x float> @llvm.cosh.v4f32(<4 x float> %x)
1145  ret <4 x float> %r
1146}
1147
1148define <5 x float> @cosh_v5f32(<5 x float> %x) nounwind {
1149; CHECK-LABEL: cosh_v5f32:
1150; CHECK:       # %bb.0:
1151; CHECK-NEXT:    subq $72, %rsp
1152; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1153; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1154; CHECK-NEXT:    vzeroupper
1155; CHECK-NEXT:    callq coshf@PLT
1156; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1157; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1158; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1159; CHECK-NEXT:    callq coshf@PLT
1160; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1161; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1162; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1163; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1164; CHECK-NEXT:    # xmm0 = mem[1,0]
1165; CHECK-NEXT:    callq coshf@PLT
1166; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1167; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1168; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1169; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1170; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1171; CHECK-NEXT:    callq coshf@PLT
1172; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1173; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1174; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1175; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1176; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1177; CHECK-NEXT:    vzeroupper
1178; CHECK-NEXT:    callq coshf@PLT
1179; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1180; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1181; CHECK-NEXT:    addq $72, %rsp
1182; CHECK-NEXT:    retq
1183  %r = call <5 x float> @llvm.cosh.v5f32(<5 x float> %x)
1184  ret <5 x float> %r
1185}
1186
1187define <6 x float> @cosh_v6f32(<6 x float> %x) nounwind {
1188; CHECK-LABEL: cosh_v6f32:
1189; CHECK:       # %bb.0:
1190; CHECK-NEXT:    subq $72, %rsp
1191; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1192; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1193; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1194; CHECK-NEXT:    vzeroupper
1195; CHECK-NEXT:    callq coshf@PLT
1196; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1197; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1198; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1199; CHECK-NEXT:    callq coshf@PLT
1200; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1201; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1202; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1203; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1204; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1205; CHECK-NEXT:    vzeroupper
1206; CHECK-NEXT:    callq coshf@PLT
1207; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1208; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1209; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1210; CHECK-NEXT:    callq coshf@PLT
1211; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1212; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1213; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1214; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1215; CHECK-NEXT:    # xmm0 = mem[1,0]
1216; CHECK-NEXT:    callq coshf@PLT
1217; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1218; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1219; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1220; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1221; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1222; CHECK-NEXT:    callq coshf@PLT
1223; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1224; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1225; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1226; CHECK-NEXT:    addq $72, %rsp
1227; CHECK-NEXT:    retq
1228  %r = call <6 x float> @llvm.cosh.v6f32(<6 x float> %x)
1229  ret <6 x float> %r
1230}
1231
1232define <3 x double> @cosh_v3f64(<3 x double> %x) nounwind {
1233; CHECK-LABEL: cosh_v3f64:
1234; CHECK:       # %bb.0:
1235; CHECK-NEXT:    subq $72, %rsp
1236; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1237; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1238; CHECK-NEXT:    vzeroupper
1239; CHECK-NEXT:    callq cosh@PLT
1240; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1241; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1242; CHECK-NEXT:    # xmm0 = mem[1,0]
1243; CHECK-NEXT:    callq cosh@PLT
1244; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
1245; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1246; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
1247; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1248; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1249; CHECK-NEXT:    vzeroupper
1250; CHECK-NEXT:    callq cosh@PLT
1251; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1252; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1253; CHECK-NEXT:    addq $72, %rsp
1254; CHECK-NEXT:    retq
1255  %r = call <3 x double> @llvm.cosh.v3f64(<3 x double> %x)
1256  ret <3 x double> %r
1257}
1258
1259define <1 x float> @sinh_v1f32(<1 x float> %x) nounwind {
1260; CHECK-LABEL: sinh_v1f32:
1261; CHECK:       # %bb.0:
1262; CHECK-NEXT:    pushq %rax
1263; CHECK-NEXT:    callq sinhf@PLT
1264; CHECK-NEXT:    popq %rax
1265; CHECK-NEXT:    retq
1266  %r = call <1 x float> @llvm.sinh.v1f32(<1 x float> %x)
1267  ret <1 x float> %r
1268}
1269
1270define <2 x float> @sinh_v2f32(<2 x float> %x) nounwind {
1271; CHECK-LABEL: sinh_v2f32:
1272; CHECK:       # %bb.0:
1273; CHECK-NEXT:    subq $40, %rsp
1274; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1275; CHECK-NEXT:    callq sinhf@PLT
1276; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1277; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1278; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1279; CHECK-NEXT:    callq sinhf@PLT
1280; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1281; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1282; CHECK-NEXT:    addq $40, %rsp
1283; CHECK-NEXT:    retq
1284  %r = call <2 x float> @llvm.sinh.v2f32(<2 x float> %x)
1285  ret <2 x float> %r
1286}
1287
1288define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind {
1289; CHECK-LABEL: sinh_v3f32:
1290; CHECK:       # %bb.0:
1291; CHECK-NEXT:    subq $40, %rsp
1292; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1293; CHECK-NEXT:    callq sinhf@PLT
1294; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1295; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1296; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1297; CHECK-NEXT:    callq sinhf@PLT
1298; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1299; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1300; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1301; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1302; CHECK-NEXT:    # xmm0 = mem[1,0]
1303; CHECK-NEXT:    callq sinhf@PLT
1304; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1305; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1306; CHECK-NEXT:    addq $40, %rsp
1307; CHECK-NEXT:    retq
1308  %r = call <3 x float> @llvm.sinh.v3f32(<3 x float> %x)
1309  ret <3 x float> %r
1310}
1311
1312define <4 x float> @sinh_v4f32(<4 x float> %x) nounwind {
1313; CHECK-LABEL: sinh_v4f32:
1314; CHECK:       # %bb.0:
1315; CHECK-NEXT:    subq $40, %rsp
1316; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1317; CHECK-NEXT:    callq sinhf@PLT
1318; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1319; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1320; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1321; CHECK-NEXT:    callq sinhf@PLT
1322; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1323; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1324; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1325; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1326; CHECK-NEXT:    # xmm0 = mem[1,0]
1327; CHECK-NEXT:    callq sinhf@PLT
1328; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1329; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1330; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1331; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1332; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1333; CHECK-NEXT:    callq sinhf@PLT
1334; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1335; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1336; CHECK-NEXT:    addq $40, %rsp
1337; CHECK-NEXT:    retq
1338  %r = call <4 x float> @llvm.sinh.v4f32(<4 x float> %x)
1339  ret <4 x float> %r
1340}
1341
1342define <5 x float> @sinh_v5f32(<5 x float> %x) nounwind {
1343; CHECK-LABEL: sinh_v5f32:
1344; CHECK:       # %bb.0:
1345; CHECK-NEXT:    subq $72, %rsp
1346; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1347; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1348; CHECK-NEXT:    vzeroupper
1349; CHECK-NEXT:    callq sinhf@PLT
1350; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1351; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1352; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1353; CHECK-NEXT:    callq sinhf@PLT
1354; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1355; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1356; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1357; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1358; CHECK-NEXT:    # xmm0 = mem[1,0]
1359; CHECK-NEXT:    callq sinhf@PLT
1360; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1361; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1362; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1363; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1364; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1365; CHECK-NEXT:    callq sinhf@PLT
1366; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1367; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1368; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1369; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1370; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1371; CHECK-NEXT:    vzeroupper
1372; CHECK-NEXT:    callq sinhf@PLT
1373; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1374; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1375; CHECK-NEXT:    addq $72, %rsp
1376; CHECK-NEXT:    retq
1377  %r = call <5 x float> @llvm.sinh.v5f32(<5 x float> %x)
1378  ret <5 x float> %r
1379}
1380
1381define <6 x float> @sinh_v6f32(<6 x float> %x) nounwind {
1382; CHECK-LABEL: sinh_v6f32:
1383; CHECK:       # %bb.0:
1384; CHECK-NEXT:    subq $72, %rsp
1385; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1386; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1387; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1388; CHECK-NEXT:    vzeroupper
1389; CHECK-NEXT:    callq sinhf@PLT
1390; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1391; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1392; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1393; CHECK-NEXT:    callq sinhf@PLT
1394; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1395; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1396; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1397; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1398; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1399; CHECK-NEXT:    vzeroupper
1400; CHECK-NEXT:    callq sinhf@PLT
1401; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1402; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1403; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1404; CHECK-NEXT:    callq sinhf@PLT
1405; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1406; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1407; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1408; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1409; CHECK-NEXT:    # xmm0 = mem[1,0]
1410; CHECK-NEXT:    callq sinhf@PLT
1411; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1412; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1413; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1414; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1415; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1416; CHECK-NEXT:    callq sinhf@PLT
1417; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1418; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1419; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1420; CHECK-NEXT:    addq $72, %rsp
1421; CHECK-NEXT:    retq
1422  %r = call <6 x float> @llvm.sinh.v6f32(<6 x float> %x)
1423  ret <6 x float> %r
1424}
1425
1426define <3 x double> @sinh_v3f64(<3 x double> %x) nounwind {
1427; CHECK-LABEL: sinh_v3f64:
1428; CHECK:       # %bb.0:
1429; CHECK-NEXT:    subq $72, %rsp
1430; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1431; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1432; CHECK-NEXT:    vzeroupper
1433; CHECK-NEXT:    callq sinh@PLT
1434; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1435; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1436; CHECK-NEXT:    # xmm0 = mem[1,0]
1437; CHECK-NEXT:    callq sinh@PLT
1438; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
1439; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1440; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
1441; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1442; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1443; CHECK-NEXT:    vzeroupper
1444; CHECK-NEXT:    callq sinh@PLT
1445; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1446; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1447; CHECK-NEXT:    addq $72, %rsp
1448; CHECK-NEXT:    retq
1449  %r = call <3 x double> @llvm.sinh.v3f64(<3 x double> %x)
1450  ret <3 x double> %r
1451}
1452
1453define <1 x float> @tanh_v1f32(<1 x float> %x) nounwind {
1454; CHECK-LABEL: tanh_v1f32:
1455; CHECK:       # %bb.0:
1456; CHECK-NEXT:    pushq %rax
1457; CHECK-NEXT:    callq tanhf@PLT
1458; CHECK-NEXT:    popq %rax
1459; CHECK-NEXT:    retq
1460  %r = call <1 x float> @llvm.tanh.v1f32(<1 x float> %x)
1461  ret <1 x float> %r
1462}
1463
1464define <2 x float> @tanh_v2f32(<2 x float> %x) nounwind {
1465; CHECK-LABEL: tanh_v2f32:
1466; CHECK:       # %bb.0:
1467; CHECK-NEXT:    subq $40, %rsp
1468; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1469; CHECK-NEXT:    callq tanhf@PLT
1470; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1471; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1472; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1473; CHECK-NEXT:    callq tanhf@PLT
1474; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1475; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1476; CHECK-NEXT:    addq $40, %rsp
1477; CHECK-NEXT:    retq
1478  %r = call <2 x float> @llvm.tanh.v2f32(<2 x float> %x)
1479  ret <2 x float> %r
1480}
1481
1482define <3 x float> @tanh_v3f32(<3 x float> %x) nounwind {
1483; CHECK-LABEL: tanh_v3f32:
1484; CHECK:       # %bb.0:
1485; CHECK-NEXT:    subq $40, %rsp
1486; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1487; CHECK-NEXT:    callq tanhf@PLT
1488; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1489; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1490; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1491; CHECK-NEXT:    callq tanhf@PLT
1492; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1493; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1494; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1495; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1496; CHECK-NEXT:    # xmm0 = mem[1,0]
1497; CHECK-NEXT:    callq tanhf@PLT
1498; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1499; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1500; CHECK-NEXT:    addq $40, %rsp
1501; CHECK-NEXT:    retq
1502  %r = call <3 x float> @llvm.tanh.v3f32(<3 x float> %x)
1503  ret <3 x float> %r
1504}
1505
1506define <4 x float> @tanh_v4f32(<4 x float> %x) nounwind {
1507; CHECK-LABEL: tanh_v4f32:
1508; CHECK:       # %bb.0:
1509; CHECK-NEXT:    subq $40, %rsp
1510; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1511; CHECK-NEXT:    callq tanhf@PLT
1512; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1513; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1514; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1515; CHECK-NEXT:    callq tanhf@PLT
1516; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1517; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1518; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1519; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1520; CHECK-NEXT:    # xmm0 = mem[1,0]
1521; CHECK-NEXT:    callq tanhf@PLT
1522; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1523; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1524; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1525; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1526; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1527; CHECK-NEXT:    callq tanhf@PLT
1528; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1529; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1530; CHECK-NEXT:    addq $40, %rsp
1531; CHECK-NEXT:    retq
1532  %r = call <4 x float> @llvm.tanh.v4f32(<4 x float> %x)
1533  ret <4 x float> %r
1534}
1535
1536define <5 x float> @tanh_v5f32(<5 x float> %x) nounwind {
1537; CHECK-LABEL: tanh_v5f32:
1538; CHECK:       # %bb.0:
1539; CHECK-NEXT:    subq $72, %rsp
1540; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1541; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1542; CHECK-NEXT:    vzeroupper
1543; CHECK-NEXT:    callq tanhf@PLT
1544; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1545; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1546; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1547; CHECK-NEXT:    callq tanhf@PLT
1548; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1549; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1550; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1551; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1552; CHECK-NEXT:    # xmm0 = mem[1,0]
1553; CHECK-NEXT:    callq tanhf@PLT
1554; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1555; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1556; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1557; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1558; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1559; CHECK-NEXT:    callq tanhf@PLT
1560; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1561; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1562; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
1563; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1564; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1565; CHECK-NEXT:    vzeroupper
1566; CHECK-NEXT:    callq tanhf@PLT
1567; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1568; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1569; CHECK-NEXT:    addq $72, %rsp
1570; CHECK-NEXT:    retq
1571  %r = call <5 x float> @llvm.tanh.v5f32(<5 x float> %x)
1572  ret <5 x float> %r
1573}
1574
1575define <6 x float> @tanh_v6f32(<6 x float> %x) nounwind {
1576; CHECK-LABEL: tanh_v6f32:
1577; CHECK:       # %bb.0:
1578; CHECK-NEXT:    subq $72, %rsp
1579; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1580; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1581; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1582; CHECK-NEXT:    vzeroupper
1583; CHECK-NEXT:    callq tanhf@PLT
1584; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1585; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1586; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1587; CHECK-NEXT:    callq tanhf@PLT
1588; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1589; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1590; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1591; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1592; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1593; CHECK-NEXT:    vzeroupper
1594; CHECK-NEXT:    callq tanhf@PLT
1595; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1596; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1597; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1598; CHECK-NEXT:    callq tanhf@PLT
1599; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1600; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1601; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1602; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1603; CHECK-NEXT:    # xmm0 = mem[1,0]
1604; CHECK-NEXT:    callq tanhf@PLT
1605; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1606; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
1607; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1608; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1609; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
1610; CHECK-NEXT:    callq tanhf@PLT
1611; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
1612; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
1613; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
1614; CHECK-NEXT:    addq $72, %rsp
1615; CHECK-NEXT:    retq
1616  %r = call <6 x float> @llvm.tanh.v6f32(<6 x float> %x)
1617  ret <6 x float> %r
1618}
1619
1620define <3 x double> @tanh_v3f64(<3 x double> %x) nounwind {
1621; CHECK-LABEL: tanh_v3f64:
1622; CHECK:       # %bb.0:
1623; CHECK-NEXT:    subq $72, %rsp
1624; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
1625; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1626; CHECK-NEXT:    vzeroupper
1627; CHECK-NEXT:    callq tanh@PLT
1628; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1629; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1630; CHECK-NEXT:    # xmm0 = mem[1,0]
1631; CHECK-NEXT:    callq tanh@PLT
1632; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
1633; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
1634; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
1635; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
1636; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
1637; CHECK-NEXT:    vzeroupper
1638; CHECK-NEXT:    callq tanh@PLT
1639; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
1640; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1641; CHECK-NEXT:    addq $72, %rsp
1642; CHECK-NEXT:    retq
1643  %r = call <3 x double> @llvm.tanh.v3f64(<3 x double> %x)
1644  ret <3 x double> %r
1645}
1646
1647define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
1648; CHECK-LABEL: fabs_v2f32:
1649; CHECK:       # %bb.0:
1650; CHECK-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1651; CHECK-NEXT:    retq
1652  %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
1653  ret <2 x float> %r
1654}
1655
1656define <2 x float> @ceil_v2f32(<2 x float> %x) nounwind {
1657; CHECK-LABEL: ceil_v2f32:
1658; CHECK:       # %bb.0:
1659; CHECK-NEXT:    vroundps $10, %xmm0, %xmm0
1660; CHECK-NEXT:    retq
1661  %r = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x)
1662  ret <2 x float> %r
1663}
1664
1665define <2 x float> @cos_v2f32(<2 x float> %x) nounwind {
1666; CHECK-LABEL: cos_v2f32:
1667; CHECK:       # %bb.0:
1668; CHECK-NEXT:    subq $40, %rsp
1669; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1670; CHECK-NEXT:    callq cosf@PLT
1671; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1672; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1673; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1674; CHECK-NEXT:    callq cosf@PLT
1675; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1676; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1677; CHECK-NEXT:    addq $40, %rsp
1678; CHECK-NEXT:    retq
1679  %r = call <2 x float> @llvm.cos.v2f32(<2 x float> %x)
1680  ret <2 x float> %r
1681}
1682
1683define <2 x float> @exp_v2f32(<2 x float> %x) nounwind {
1684; CHECK-LABEL: exp_v2f32:
1685; CHECK:       # %bb.0:
1686; CHECK-NEXT:    subq $40, %rsp
1687; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1688; CHECK-NEXT:    callq expf@PLT
1689; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1690; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1691; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1692; CHECK-NEXT:    callq expf@PLT
1693; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1694; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1695; CHECK-NEXT:    addq $40, %rsp
1696; CHECK-NEXT:    retq
1697  %r = call <2 x float> @llvm.exp.v2f32(<2 x float> %x)
1698  ret <2 x float> %r
1699}
1700
1701define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind {
1702; CHECK-LABEL: exp2_v2f32:
1703; CHECK:       # %bb.0:
1704; CHECK-NEXT:    subq $40, %rsp
1705; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1706; CHECK-NEXT:    callq exp2f@PLT
1707; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1708; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1709; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1710; CHECK-NEXT:    callq exp2f@PLT
1711; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1712; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1713; CHECK-NEXT:    addq $40, %rsp
1714; CHECK-NEXT:    retq
1715  %r = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
1716  ret <2 x float> %r
1717}
1718
1719define <2 x float> @floor_v2f32(<2 x float> %x) nounwind {
1720; CHECK-LABEL: floor_v2f32:
1721; CHECK:       # %bb.0:
1722; CHECK-NEXT:    vroundps $9, %xmm0, %xmm0
1723; CHECK-NEXT:    retq
1724  %r = call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
1725  ret <2 x float> %r
1726}
1727
1728define <2 x float> @log_v2f32(<2 x float> %x) nounwind {
1729; CHECK-LABEL: log_v2f32:
1730; CHECK:       # %bb.0:
1731; CHECK-NEXT:    subq $40, %rsp
1732; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1733; CHECK-NEXT:    callq logf@PLT
1734; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1735; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1736; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1737; CHECK-NEXT:    callq logf@PLT
1738; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1739; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1740; CHECK-NEXT:    addq $40, %rsp
1741; CHECK-NEXT:    retq
1742  %r = call <2 x float> @llvm.log.v2f32(<2 x float> %x)
1743  ret <2 x float> %r
1744}
1745
1746define <2 x float> @log10_v2f32(<2 x float> %x) nounwind {
1747; CHECK-LABEL: log10_v2f32:
1748; CHECK:       # %bb.0:
1749; CHECK-NEXT:    subq $40, %rsp
1750; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1751; CHECK-NEXT:    callq log10f@PLT
1752; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1753; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1754; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1755; CHECK-NEXT:    callq log10f@PLT
1756; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1757; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1758; CHECK-NEXT:    addq $40, %rsp
1759; CHECK-NEXT:    retq
1760  %r = call <2 x float> @llvm.log10.v2f32(<2 x float> %x)
1761  ret <2 x float> %r
1762}
1763
1764define <2 x float> @log2_v2f32(<2 x float> %x) nounwind {
1765; CHECK-LABEL: log2_v2f32:
1766; CHECK:       # %bb.0:
1767; CHECK-NEXT:    subq $40, %rsp
1768; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
1769; CHECK-NEXT:    callq log2f@PLT
1770; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1771; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
1772; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
1773; CHECK-NEXT:    callq log2f@PLT
1774; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1775; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
1776; CHECK-NEXT:    addq $40, %rsp
1777; CHECK-NEXT:    retq
1778  %r = call <2 x float> @llvm.log2.v2f32(<2 x float> %x)
1779  ret <2 x float> %r
1780}
1781
1782define <2 x float> @nearbyint__v2f32(<2 x float> %x) nounwind {
1783; CHECK-LABEL: nearbyint__v2f32:
1784; CHECK:       # %bb.0:
1785; CHECK-NEXT:    vroundps $12, %xmm0, %xmm0
1786; CHECK-NEXT:    retq
1787  %r = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %x)
1788  ret <2 x float> %r
1789}
1790
1791define <2 x float> @rint_v2f32(<2 x float> %x) nounwind {
1792; CHECK-LABEL: rint_v2f32:
1793; CHECK:       # %bb.0:
1794; CHECK-NEXT:    vroundps $4, %xmm0, %xmm0
1795; CHECK-NEXT:    retq
1796  %r = call <2 x float> @llvm.rint.v2f32(<2 x float> %x)
1797  ret <2 x float> %r
1798}
1799
1800define <2 x float> @round_v2f32(<2 x float> %x) nounwind {
1801; CHECK-LABEL: round_v2f32:
1802; CHECK:       # %bb.0:
1803; CHECK-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1804; CHECK-NEXT:    vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1805; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
1806; CHECK-NEXT:    vroundps $11, %xmm0, %xmm0
1807; CHECK-NEXT:    retq
1808  %r = call <2 x float> @llvm.round.v2f32(<2 x float> %x)
1809  ret <2 x float> %r
1810}
1811
1812define <2 x float> @sqrt_v2f32(<2 x float> %x) nounwind {
1813; CHECK-LABEL: sqrt_v2f32:
1814; CHECK:       # %bb.0:
1815; CHECK-NEXT:    vsqrtps %xmm0, %xmm0
1816; CHECK-NEXT:    retq
1817  %r = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
1818  ret <2 x float> %r
1819}
1820
1821define <2 x float> @trunc_v2f32(<2 x float> %x) nounwind {
1822; CHECK-LABEL: trunc_v2f32:
1823; CHECK:       # %bb.0:
1824; CHECK-NEXT:    vroundps $11, %xmm0, %xmm0
1825; CHECK-NEXT:    retq
1826  %r = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x)
1827  ret <2 x float> %r
1828}
1829
1830