1; We specify -mcpu explicitly to avoid instruction reordering that happens on 2; some setups (e.g., Atom) from affecting the output. 3; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 4; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86 5; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN 6; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX 7; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 8; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86 9; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN 10; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX 11 12; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer 13; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer 14; arguments are caller-cleanup like normal arguments. 15 16define void @sret1(ptr sret(i8) %x) nounwind { 17entry: 18; WIN32-LABEL: _sret1: 19; WIN32: movb $42, ({{%e[abcd]x}}) 20; WIN32-NOT: popl %eax 21; WIN32: {{retl$}} 22 23; MINGW_X86-LABEL: _sret1: 24; MINGW_X86: {{retl$}} 25 26; CYGWIN-LABEL: _sret1: 27; CYGWIN: retl $4 28 29; LINUX-LABEL: sret1: 30; LINUX: retl $4 31 32 store i8 42, ptr %x, align 4 33 ret void 34} 35 36define void @sret2(ptr sret(i8) %x, i8 %y) nounwind { 37entry: 38; WIN32-LABEL: _sret2: 39; WIN32: movb {{.*}}, ({{%e[abcd]x}}) 40; WIN32-NOT: popl %eax 41; WIN32: {{retl$}} 42 43; MINGW_X86-LABEL: _sret2: 44; MINGW_X86: {{retl$}} 45 46; CYGWIN-LABEL: _sret2: 47; CYGWIN: retl $4 48 49; LINUX-LABEL: sret2: 50; LINUX: retl $4 51 52 store i8 %y, ptr %x 53 ret void 54} 55 56define void @sret3(ptr sret(i8) %x, ptr %y) nounwind { 57entry: 58; WIN32-LABEL: _sret3: 59; WIN32: movb $42, ([[REG1:%e[abcd]x]]) 60; WIN32-NOT: movb $13, ([[REG1]]) 61; WIN32-NOT: popl %eax 62; WIN32: {{retl$}} 63 64; MINGW_X86-LABEL: _sret3: 65; MINGW_X86: {{retl$}} 66 67; CYGWIN-LABEL: _sret3: 68; CYGWIN: retl $4 69 70; LINUX-LABEL: sret3: 71; LINUX: retl $4 72 73 store i8 42, ptr %x 74 store i8 13, ptr %y 75 ret void 76} 77 78; PR15556 79%struct.S4 = type { i32, i32, i32 } 80 81define void @sret4(ptr noalias sret(%struct.S4) %agg.result) { 82entry: 83; WIN32-LABEL: _sret4: 84; WIN32: movl $42, ({{%e[abcd]x}}) 85; WIN32-NOT: popl %eax 86; WIN32: {{retl$}} 87 88; MINGW_X86-LABEL: _sret4: 89; MINGW_X86: {{retl$}} 90 91; CYGWIN-LABEL: _sret4: 92; CYGWIN: retl $4 93 94; LINUX-LABEL: sret4: 95; LINUX: retl $4 96 97 store i32 42, ptr %agg.result, align 4 98 ret void 99} 100 101%struct.S5 = type { i32 } 102%class.C5 = type { i8 } 103 104define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(ptr noalias sret(%struct.S5) %agg.result, ptr %this) { 105entry: 106 %this.addr = alloca ptr, align 4 107 store ptr %this, ptr %this.addr, align 4 108 %this1 = load ptr, ptr %this.addr 109 store i32 42, ptr %agg.result, align 4 110 ret void 111; WIN32-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 112; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 113; CYGWIN-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 114; LINUX-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ": 115 116; The address of the return structure is passed as an implicit parameter. 117; In the -O0 build, %eax is spilled at the beginning of the function, hence we 118; should match both 4(%esp) and 8(%esp). 119; WIN32: {{[48]}}(%esp), [[REG:%e[abcd]x]] 120; WIN32: movl $42, ([[REG]]) 121; WIN32: retl $4 122} 123 124define void @call_foo5() { 125entry: 126 %c = alloca %class.C5, align 1 127 %s = alloca %struct.S5, align 4 128 call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(ptr sret(%struct.S5) %s, ptr %c) 129; WIN32-LABEL: {{^}}_call_foo5: 130; MINGW_X86-LABEL: {{^}}_call_foo5: 131; CYGWIN-LABEL: {{^}}_call_foo5: 132; LINUX-LABEL: {{^}}call_foo5: 133 134 135; Load the address of the result and put it onto stack 136; The this pointer goes to ECX. 137; (through %ecx in the -O0 build). 138; WIN32-DAG: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x 139; WIN32-DAG: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx 140; WIN32-DAG: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}} 141; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ" 142; WIN32: retl 143 ret void 144} 145 146 147%struct.test6 = type { i32, i32, i32 } 148define void @test6_f(ptr %x) nounwind { 149; WIN32-LABEL: _test6_f: 150; MINGW_X86-LABEL: _test6_f: 151; CYGWIN-LABEL: _test6_f: 152; LINUX-LABEL: test6_f: 153 154; The %x argument is moved to %ecx. It will be the this pointer. 155; WIN32-DAG: movl {{16|20}}(%esp), %ecx 156 157 158; The sret pointer is (%esp) 159; WIN32-DAG: {{leal 4\(%esp\)|movl %esp}}, %eax 160; WIN32-DAG: {{pushl %eax|movl %eax, \(%esp\)}} 161 162; The sret pointer is %ecx 163; The %x argument is moved to (%esp). It will be the this pointer. 164; MINGW_X86-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx 165; MINGW_X86-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} 166; MINGW_X86-NEXT: calll _test6_g 167 168; CYGWIN-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx 169; CYGWIN-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}} 170; CYGWIN-NEXT: calll _test6_g 171 172 %tmp = alloca %struct.test6, align 4 173 call x86_thiscallcc void @test6_g(ptr sret(%struct.test6) %tmp, ptr %x) 174 ret void 175} 176declare x86_thiscallcc void @test6_g(ptr sret(%struct.test6), ptr) 177 178; Flipping the parameters at the IR level generates the same code. 179%struct.test7 = type { i32, i32, i32 } 180define void @test7_f(ptr %x) nounwind { 181; WIN32-LABEL: _test7_f: 182; MINGW_X86-LABEL: _test7_f: 183; CYGWIN-LABEL: _test7_f: 184; LINUX-LABEL: test7_f: 185 186; The %x argument is moved to %ecx on all OSs. It will be the this pointer. 187; WIN32: movl {{16|20}}(%esp), %ecx 188; MINGW_X86: movl {{16|20}}(%esp), %ecx 189; CYGWIN: movl {{16|20}}(%esp), %ecx 190 191; The sret pointer is (%esp) 192; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax 193; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 194; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %eax 195; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 196; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %eax 197; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}} 198 199 %tmp = alloca %struct.test7, align 4 200 call x86_thiscallcc void @test7_g(ptr %x, ptr sret(%struct.test7) %tmp) 201 ret void 202} 203 204define x86_thiscallcc void @test7_g(ptr %in, ptr sret(%struct.test7) %out) { 205 %v = load i32, ptr %in 206 store i32 %v, ptr %out 207 call void @clobber_eax() 208 ret void 209 210; Make sure we return the second parameter in %eax. 211; WIN32-LABEL: _test7_g: 212; WIN32: calll _clobber_eax 213; WIN32: movl {{.*}}, %eax 214; WIN32: retl 215} 216 217declare void @clobber_eax() 218 219; Test what happens if the first parameter has to be split by codegen. 220; Realistically, no frontend will generate code like this, but here it is for 221; completeness. 222define void @test8_f(i64 inreg %a, ptr sret(i64) %out) { 223 store i64 %a, ptr %out 224 call void @clobber_eax() 225 ret void 226 227; WIN32-LABEL: _test8_f: 228; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]] 229; WIN32-DAG: movl {{%e[abcd]x}}, 4(%[[out]]) 230; WIN32-DAG: movl {{%e[abcd]x}}, (%[[out]]) 231; WIN32: calll _clobber_eax 232; WIN32: movl {{.*}}, %eax 233; WIN32: retl 234} 235