xref: /llvm-project/llvm/test/CodeGen/X86/vectorcall.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
2; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
3
4; Test integer arguments.
5
6define x86_vectorcallcc i32 @test_int_1() {
7; CHECK-LABEL: {{^}}test_int_1@@0:
8; CHECK: xorl %eax, %eax
9  ret i32 0
10}
11
12define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
13; X86-LABEL: {{^}}test_int_2@@4:
14; X64-LABEL: {{^}}test_int_2@@8:
15; CHECK: movl %ecx, %eax
16  ret i32 %a
17}
18
19define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
20; X86-LABEL: {{^}}test_int_3@@8:
21; X64-LABEL: {{^}}test_int_3@@8:
22; X86: movl %ecx, %eax
23; X64: movq %rcx, %rax
24  %at = trunc i64 %a to i32
25  ret i32 %at
26}
27
28define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
29; X86-LABEL: {{^}}test_int_4@@8:
30; X86: leal (%ecx,%edx), %eax
31; X64-LABEL: {{^}}test_int_4@@16:
32; X64: leal (%rcx,%rdx), %eax
33  %s = add i32 %a, %b
34  ret i32 %s
35}
36
37define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
38; CHECK-LABEL: {{^}}test_int_5:
39  ret i32 0
40}
41
42define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
43; CHECK-LABEL: {{^}}test_fp_1@@16:
44; CHECK: movaps %xmm1, %xmm0
45  ret double %b
46}
47
48define x86_vectorcallcc double @test_fp_2(double, double, double, double, double, double, double %r) {
49; CHECK-LABEL: {{^}}test_fp_2@@56:
50; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
51  ret double %r
52}
53
54define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
55; CHECK-LABEL: {{^}}test_fp_3@@0:
56; CHECK: xorps %xmm0
57; CHECK: xorps %xmm1
58; CHECK: xorps %xmm2
59; CHECK: xorps %xmm3
60  ret {double, double, double, double}
61        { double 0.0, double 0.0, double 0.0, double 0.0 }
62}
63
64; FIXME: Returning via x87 isn't compatible, but its hard to structure the
65; tablegen any other way.
66define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
67; CHECK-LABEL: {{^}}test_fp_4@@0:
68; CHECK: fldz
69; CHECK: xorps %xmm0
70; CHECK: xorps %xmm1
71; CHECK: xorps %xmm2
72; CHECK: xorps %xmm3
73  ret {double, double, double, double, double}
74        { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
75}
76
77define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
78; CHECK-LABEL: {{^}}test_vec_1@@32:
79; CHECK: movaps %xmm1, %xmm0
80  ret <16 x i8> %b
81}
82
83define x86_vectorcallcc <16 x i8> @test_vec_2(double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
84; CHECK-LABEL: {{^}}test_vec_2@@104:
85; X64:           movq    {{[0-9]*}}(%rsp), %rax
86; CHECK:         movaps (%{{rax|ecx}}), %xmm0
87  ret <16 x i8> %r
88}
89
90%struct.HVA5 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> }
91%struct.HVA4 = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
92%struct.HVA3 = type { <4 x float>, <4 x float>, <4 x float> }
93%struct.HVA2 = type { <4 x float>, <4 x float> }
94
95define x86_vectorcallcc <4 x float> @test_mixed_1(i32 %a, %struct.HVA4 inreg %bb, i32 %c) {
96; CHECK-LABEL: test_mixed_1
97; CHECK:       movaps	%xmm1, 16(%{{(e|r)}}sp)
98; CHECK:       movaps	%xmm1, %xmm0
99; CHECK:       ret{{q|l}}
100entry:
101  %b = alloca %struct.HVA4, align 16
102  store %struct.HVA4 %bb, ptr %b, align 16
103  %w1 = getelementptr inbounds %struct.HVA4, ptr %b, i32 0, i32 1
104  %0 = load <4 x float>, ptr %w1, align 16
105  ret <4 x float> %0
106}
107
108define x86_vectorcallcc <4 x float> @test_mixed_2(%struct.HVA4 inreg %a, ptr %b, <4 x float> %c) {
109; CHECK-LABEL: test_mixed_2
110; X86:         movaps  %xmm0, (%esp)
111; X64:         movaps  %xmm2, %xmm0
112; CHECK:       ret{{[ql]}}
113entry:
114  %c.addr = alloca <4 x float>, align 16
115  store <4 x float> %c, ptr %c.addr, align 16
116  %0 = load <4 x float>, ptr %c.addr, align 16
117  ret <4 x float> %0
118}
119
120define x86_vectorcallcc <4 x float> @test_mixed_3(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, ptr %f) {
121; CHECK-LABEL: test_mixed_3
122; CHECK:       movaps	(%{{[re][ac]}}x), %xmm0
123; CHECK:       ret{{[ql]}}
124entry:
125  %0 = load <4 x float>, ptr %f, align 16
126  ret <4 x float> %0
127}
128
129define x86_vectorcallcc <4 x float> @test_mixed_4(%struct.HVA4 inreg %a, ptr %bb, <4 x float> %c) {
130; CHECK-LABEL: test_mixed_4
131; X86:         movaps	16(%eax), %xmm0
132; X64:         movaps	16(%rdx), %xmm0
133; CHECK:       ret{{[ql]}}
134entry:
135  %y4 = getelementptr inbounds %struct.HVA2, ptr %bb, i32 0, i32 1
136  %0 = load <4 x float>, ptr %y4, align 16
137  ret <4 x float> %0
138}
139
140define x86_vectorcallcc <4 x float> @test_mixed_5(%struct.HVA3 inreg %a, ptr %b, <4 x float> %c, %struct.HVA2 inreg %dd) {
141; CHECK-LABEL: test_mixed_5
142; CHECK-DAG:   movaps	%xmm{{[0,5]}}, 16(%{{(e|r)}}sp)
143; CHECK-DAG:   movaps	%xmm5, %xmm0
144; CHECK:       ret{{[ql]}}
145entry:
146  %d = alloca %struct.HVA2, align 16
147  store %struct.HVA2 %dd, ptr %d, align 16
148  %y5 = getelementptr inbounds %struct.HVA2, ptr %d, i32 0, i32 1
149  %0 = load <4 x float>, ptr %y5, align 16
150  ret <4 x float> %0
151}
152
153define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, ptr %b) {
154; CHECK-LABEL: test_mixed_6
155; CHECK:       movaps	(%{{[re]}}sp), %xmm0
156; CHECK:       movaps	16(%{{[re]}}sp), %xmm1
157; CHECK:       movaps	32(%{{[re]}}sp), %xmm2
158; CHECK:       movaps	48(%{{[re]}}sp), %xmm3
159; CHECK:       ret{{[ql]}}
160entry:
161  %retval = alloca %struct.HVA4, align 16
162  call void @llvm.memcpy.p0.p0.i32(ptr align 16 %retval, ptr align 16 %b, i32 64, i1 false)
163  %0 = load %struct.HVA4, ptr %retval, align 16
164  ret %struct.HVA4 %0
165}
166
167declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1)
168declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1)
169declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1)
170
171define x86_vectorcallcc void @test_mixed_7(ptr noalias sret(%struct.HVA5) %agg.result) {
172; CHECK-LABEL: test_mixed_7@@0
173; X64:         mov{{[ql]}}	%rcx, %rax
174; CHECK:       movaps	%xmm{{[0-9]}}, 64(%{{rcx|eax}})
175; CHECK:       movaps	%xmm{{[0-9]}}, 48(%{{rcx|eax}})
176; CHECK:       movaps	%xmm{{[0-9]}}, 32(%{{rcx|eax}})
177; CHECK:       movaps	%xmm{{[0-9]}}, 16(%{{rcx|eax}})
178; CHECK:       movaps	%xmm{{[0-9]}}, (%{{rcx|eax}})
179; CHECK:       ret{{[ql]}}
180entry:
181  %a = alloca %struct.HVA5, align 16
182  call void @llvm.memset.p0.i64(ptr align 16 %a, i8 0, i64 80, i1 false)
183  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %agg.result, ptr align 16 %a, i64 80, i1 false)
184  ret void
185}
186
187define x86_vectorcallcc <4 x float> @test_mixed_8(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) {
188; CHECK-LABEL: test_mixed_8
189; X86:         movaps	%xmm4, %xmm0
190; X64:         movaps	%xmm5, %xmm0
191; CHECK:       ret{{[ql]}}
192entry:
193  %f.addr = alloca <4 x float>, align 16
194  store <4 x float> %f, ptr %f.addr, align 16
195  %0 = load <4 x float>, ptr %f.addr, align 16
196  ret <4 x float> %0
197}
198
199%struct.HFA4 = type { double, double, double, double }
200declare x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 %x, double %y)
201
202define x86_vectorcallcc double @test_mixed_9_caller(%struct.HFA4 inreg %b) {
203; CHECK-LABEL: test_mixed_9_caller
204; CHECK:       movaps  %xmm3, %xmm4
205; CHECK:       movaps  %xmm2, %xmm3
206; CHECK:       movaps  %xmm1, %xmm2
207; X32:         movasd  %xmm0, %xmm1
208; X64:         movap{{d|s}}  %xmm5, %xmm1
209; CHECK:       call{{l|q}}   test_mixed_9_callee@@40
210; CHECK:       addsd   {{.*}}, %xmm0
211; CHECK:       ret{{l|q}}
212entry:
213  %call = call x86_vectorcallcc double @test_mixed_9_callee(%struct.HFA4 inreg %b, double 3.000000e+00)
214  %add = fadd double 1.000000e+00, %call
215  ret double %add
216}
217