xref: /llvm-project/llvm/test/CodeGen/X86/win32_sret.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; We specify -mcpu explicitly to avoid instruction reordering that happens on
2; some setups (e.g., Atom) from affecting the output.
3; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
4; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
5; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
6; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
7; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
8; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
9; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
10; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
11
12; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
13; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
14; arguments are caller-cleanup like normal arguments.
15
16define void @sret1(ptr sret(i8) %x) nounwind {
17entry:
18; WIN32-LABEL:      _sret1:
19; WIN32:      movb $42, ({{%e[abcd]x}})
20; WIN32-NOT:  popl %eax
21; WIN32:    {{retl$}}
22
23; MINGW_X86-LABEL:  _sret1:
24; MINGW_X86:  {{retl$}}
25
26; CYGWIN-LABEL:     _sret1:
27; CYGWIN:     retl $4
28
29; LINUX-LABEL:      sret1:
30; LINUX:      retl $4
31
32  store i8 42, ptr %x, align 4
33  ret void
34}
35
36define void @sret2(ptr sret(i8) %x, i8 %y) nounwind {
37entry:
38; WIN32-LABEL:      _sret2:
39; WIN32:      movb {{.*}}, ({{%e[abcd]x}})
40; WIN32-NOT:  popl %eax
41; WIN32:    {{retl$}}
42
43; MINGW_X86-LABEL:  _sret2:
44; MINGW_X86:  {{retl$}}
45
46; CYGWIN-LABEL:     _sret2:
47; CYGWIN:     retl $4
48
49; LINUX-LABEL:      sret2:
50; LINUX:      retl $4
51
52  store i8 %y, ptr %x
53  ret void
54}
55
56define void @sret3(ptr sret(i8) %x, ptr %y) nounwind {
57entry:
58; WIN32-LABEL:      _sret3:
59; WIN32:      movb $42, ([[REG1:%e[abcd]x]])
60; WIN32-NOT:  movb $13, ([[REG1]])
61; WIN32-NOT:  popl %eax
62; WIN32:    {{retl$}}
63
64; MINGW_X86-LABEL:  _sret3:
65; MINGW_X86:  {{retl$}}
66
67; CYGWIN-LABEL:     _sret3:
68; CYGWIN:     retl $4
69
70; LINUX-LABEL:      sret3:
71; LINUX:      retl $4
72
73  store i8 42, ptr %x
74  store i8 13, ptr %y
75  ret void
76}
77
78; PR15556
79%struct.S4 = type { i32, i32, i32 }
80
81define void @sret4(ptr noalias sret(%struct.S4) %agg.result) {
82entry:
83; WIN32-LABEL:     _sret4:
84; WIN32:     movl $42, ({{%e[abcd]x}})
85; WIN32-NOT: popl %eax
86; WIN32:   {{retl$}}
87
88; MINGW_X86-LABEL: _sret4:
89; MINGW_X86: {{retl$}}
90
91; CYGWIN-LABEL:    _sret4:
92; CYGWIN:    retl $4
93
94; LINUX-LABEL:     sret4:
95; LINUX:     retl $4
96
97  store i32 42, ptr %agg.result, align 4
98  ret void
99}
100
101%struct.S5 = type { i32 }
102%class.C5 = type { i8 }
103
104define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(ptr noalias sret(%struct.S5) %agg.result, ptr %this) {
105entry:
106  %this.addr = alloca ptr, align 4
107  store ptr %this, ptr %this.addr, align 4
108  %this1 = load ptr, ptr %this.addr
109  store i32 42, ptr %agg.result, align 4
110  ret void
111; WIN32-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
112; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
113; CYGWIN-LABEL:    {{^}}"?foo@C5@@QAE?AUS5@@XZ":
114; LINUX-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
115
116; The address of the return structure is passed as an implicit parameter.
117; In the -O0 build, %eax is spilled at the beginning of the function, hence we
118; should match both 4(%esp) and 8(%esp).
119; WIN32:     {{[48]}}(%esp), [[REG:%e[abcd]x]]
120; WIN32:     movl $42, ([[REG]])
121; WIN32:     retl $4
122}
123
124define void @call_foo5() {
125entry:
126  %c = alloca %class.C5, align 1
127  %s = alloca %struct.S5, align 4
128  call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(ptr sret(%struct.S5) %s, ptr %c)
129; WIN32-LABEL:      {{^}}_call_foo5:
130; MINGW_X86-LABEL:  {{^}}_call_foo5:
131; CYGWIN-LABEL:     {{^}}_call_foo5:
132; LINUX-LABEL:      {{^}}call_foo5:
133
134
135; Load the address of the result and put it onto stack
136; The this pointer goes to ECX.
137; (through %ecx in the -O0 build).
138; WIN32-DAG:  leal {{[0-9]*}}(%esp), %e{{[a-d]}}x
139; WIN32-DAG:  {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx
140; WIN32-DAG:  {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}}
141; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
142; WIN32:      retl
143  ret void
144}
145
146
147%struct.test6 = type { i32, i32, i32 }
148define void @test6_f(ptr %x) nounwind {
149; WIN32-LABEL: _test6_f:
150; MINGW_X86-LABEL: _test6_f:
151; CYGWIN-LABEL: _test6_f:
152; LINUX-LABEL: test6_f:
153
154; The %x argument is moved to %ecx. It will be the this pointer.
155; WIN32-DAG: movl    {{16|20}}(%esp), %ecx
156
157
158; The sret pointer is (%esp)
159; WIN32-DAG:      {{leal 4\(%esp\)|movl %esp}}, %eax
160; WIN32-DAG:      {{pushl   %eax|movl %eax, \(%esp\)}}
161
162; The sret pointer is %ecx
163; The %x argument is moved to (%esp). It will be the this pointer.
164; MINGW_X86-DAG:  {{leal 4\(%esp\)|movl %esp}}, %ecx
165; MINGW_X86-DAG: {{pushl   16\(%esp\)|movl %eax, \(%esp\)}}
166; MINGW_X86-NEXT: calll   _test6_g
167
168; CYGWIN-DAG:  {{leal 4\(%esp\)|movl %esp}}, %ecx
169; CYGWIN-DAG:  {{pushl   16\(%esp\)|movl %eax, \(%esp\)}}
170; CYGWIN-NEXT: calll   _test6_g
171
172  %tmp = alloca %struct.test6, align 4
173  call x86_thiscallcc void @test6_g(ptr sret(%struct.test6) %tmp, ptr %x)
174  ret void
175}
176declare x86_thiscallcc void @test6_g(ptr sret(%struct.test6), ptr)
177
178; Flipping the parameters at the IR level generates the same code.
179%struct.test7 = type { i32, i32, i32 }
180define void @test7_f(ptr %x) nounwind {
181; WIN32-LABEL: _test7_f:
182; MINGW_X86-LABEL: _test7_f:
183; CYGWIN-LABEL: _test7_f:
184; LINUX-LABEL: test7_f:
185
186; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
187; WIN32:      movl    {{16|20}}(%esp), %ecx
188; MINGW_X86:  movl    {{16|20}}(%esp), %ecx
189; CYGWIN:     movl    {{16|20}}(%esp), %ecx
190
191; The sret pointer is (%esp)
192; WIN32:      {{leal 4\(%esp\)|movl %esp}}, %eax
193; WIN32-NEXT:     {{pushl   %eax|movl %eax, \(%esp\)}}
194; MINGW_X86:      {{leal 4\(%esp\)|movl %esp}}, %eax
195; MINGW_X86-NEXT: {{pushl   %eax|movl %eax, \(%esp\)}}
196; CYGWIN:      {{leal 4\(%esp\)|movl %esp}}, %eax
197; CYGWIN-NEXT: {{pushl   %eax|movl %eax, \(%esp\)}}
198
199  %tmp = alloca %struct.test7, align 4
200  call x86_thiscallcc void @test7_g(ptr %x, ptr sret(%struct.test7) %tmp)
201  ret void
202}
203
204define x86_thiscallcc void @test7_g(ptr %in, ptr sret(%struct.test7) %out) {
205  %v = load i32, ptr %in
206  store i32 %v, ptr %out
207  call void @clobber_eax()
208  ret void
209
210; Make sure we return the second parameter in %eax.
211; WIN32-LABEL: _test7_g:
212; WIN32: calll _clobber_eax
213; WIN32: movl {{.*}}, %eax
214; WIN32: retl
215}
216
217declare void @clobber_eax()
218
219; Test what happens if the first parameter has to be split by codegen.
220; Realistically, no frontend will generate code like this, but here it is for
221; completeness.
222define void @test8_f(i64 inreg %a, ptr sret(i64) %out) {
223  store i64 %a, ptr %out
224  call void @clobber_eax()
225  ret void
226
227; WIN32-LABEL: _test8_f:
228; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]]
229; WIN32-DAG: movl {{%e[abcd]x}}, 4(%[[out]])
230; WIN32-DAG: movl {{%e[abcd]x}}, (%[[out]])
231; WIN32: calll _clobber_eax
232; WIN32: movl {{.*}}, %eax
233; WIN32: retl
234}
235