xref: /llvm-project/llvm/test/CodeGen/X86/memset-sse-stack-realignment.ll (revision 21f23a37c6f1ba3f5ee34d505e527b1c6af8f99f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
3; need 16 bytes for SSE and 32 bytes for AVX.
4
5; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s --check-prefix=NOSSE
6; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s --check-prefix=SSE
7; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s --check-prefix=SSE
8; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
9; RUN: llc < %s -stackrealign -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX
10
11define void @test1(i32 %t) nounwind {
12; NOSSE-LABEL: test1:
13; NOSSE:       # %bb.0:
14; NOSSE-NEXT:    pushl %ebp
15; NOSSE-NEXT:    movl %esp, %ebp
16; NOSSE-NEXT:    subl $32, %esp
17; NOSSE-NEXT:    movl 8(%ebp), %eax
18; NOSSE-NEXT:    movl $0, -4(%ebp)
19; NOSSE-NEXT:    movl $0, -8(%ebp)
20; NOSSE-NEXT:    movl $0, -12(%ebp)
21; NOSSE-NEXT:    movl $0, -16(%ebp)
22; NOSSE-NEXT:    movl $0, -20(%ebp)
23; NOSSE-NEXT:    movl $0, -24(%ebp)
24; NOSSE-NEXT:    movl $0, -28(%ebp)
25; NOSSE-NEXT:    movl $0, -32(%ebp)
26; NOSSE-NEXT:    addl $3, %eax
27; NOSSE-NEXT:    andl $-4, %eax
28; NOSSE-NEXT:    calll __alloca
29; NOSSE-NEXT:    movl %esp, %eax
30; NOSSE-NEXT:    pushl %eax
31; NOSSE-NEXT:    calll _dummy
32; NOSSE-NEXT:    movl %ebp, %esp
33; NOSSE-NEXT:    popl %ebp
34; NOSSE-NEXT:    retl
35;
36; SSE-LABEL: test1:
37; SSE:       # %bb.0:
38; SSE-NEXT:    pushl %ebp
39; SSE-NEXT:    movl %esp, %ebp
40; SSE-NEXT:    pushl %esi
41; SSE-NEXT:    andl $-16, %esp
42; SSE-NEXT:    subl $48, %esp
43; SSE-NEXT:    movl %esp, %esi
44; SSE-NEXT:    movl 8(%ebp), %eax
45; SSE-NEXT:    xorps %xmm0, %xmm0
46; SSE-NEXT:    movaps %xmm0, 16(%esi)
47; SSE-NEXT:    movaps %xmm0, (%esi)
48; SSE-NEXT:    addl $3, %eax
49; SSE-NEXT:    andl $-4, %eax
50; SSE-NEXT:    calll __alloca
51; SSE-NEXT:    movl %esp, %eax
52; SSE-NEXT:    pushl %eax
53; SSE-NEXT:    calll _dummy
54; SSE-NEXT:    leal -4(%ebp), %esp
55; SSE-NEXT:    popl %esi
56; SSE-NEXT:    popl %ebp
57; SSE-NEXT:    retl
58;
59; AVX-LABEL: test1:
60; AVX:       # %bb.0:
61; AVX-NEXT:    pushl %ebp
62; AVX-NEXT:    movl %esp, %ebp
63; AVX-NEXT:    pushl %esi
64; AVX-NEXT:    andl $-32, %esp
65; AVX-NEXT:    subl $64, %esp
66; AVX-NEXT:    movl %esp, %esi
67; AVX-NEXT:    movl 8(%ebp), %eax
68; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
69; AVX-NEXT:    vmovaps %ymm0, (%esi)
70; AVX-NEXT:    addl $3, %eax
71; AVX-NEXT:    andl $-4, %eax
72; AVX-NEXT:    calll __alloca
73; AVX-NEXT:    movl %esp, %eax
74; AVX-NEXT:    pushl %eax
75; AVX-NEXT:    vzeroupper
76; AVX-NEXT:    calll _dummy
77; AVX-NEXT:    leal -4(%ebp), %esp
78; AVX-NEXT:    popl %esi
79; AVX-NEXT:    popl %ebp
80; AVX-NEXT:    retl
81  %tmp1210 = alloca i8, i32 32, align 4
82  call void @llvm.memset.p0.i64(ptr align 4 %tmp1210, i8 0, i64 32, i1 false)
83  %x = alloca i8, i32 %t
84  call void @dummy(ptr %x)
85  ret void
86}
87
88define void @test2(i32 %t) nounwind {
89; NOSSE-LABEL: test2:
90; NOSSE:       # %bb.0:
91; NOSSE-NEXT:    pushl %ebp
92; NOSSE-NEXT:    movl %esp, %ebp
93; NOSSE-NEXT:    subl $16, %esp
94; NOSSE-NEXT:    movl 8(%ebp), %eax
95; NOSSE-NEXT:    movl $0, -4(%ebp)
96; NOSSE-NEXT:    movl $0, -8(%ebp)
97; NOSSE-NEXT:    movl $0, -12(%ebp)
98; NOSSE-NEXT:    movl $0, -16(%ebp)
99; NOSSE-NEXT:    addl $3, %eax
100; NOSSE-NEXT:    andl $-4, %eax
101; NOSSE-NEXT:    calll __alloca
102; NOSSE-NEXT:    movl %esp, %eax
103; NOSSE-NEXT:    pushl %eax
104; NOSSE-NEXT:    calll _dummy
105; NOSSE-NEXT:    movl %ebp, %esp
106; NOSSE-NEXT:    popl %ebp
107; NOSSE-NEXT:    retl
108;
109; SSE-LABEL: test2:
110; SSE:       # %bb.0:
111; SSE-NEXT:    pushl %ebp
112; SSE-NEXT:    movl %esp, %ebp
113; SSE-NEXT:    pushl %esi
114; SSE-NEXT:    andl $-16, %esp
115; SSE-NEXT:    subl $32, %esp
116; SSE-NEXT:    movl %esp, %esi
117; SSE-NEXT:    movl 8(%ebp), %eax
118; SSE-NEXT:    xorps %xmm0, %xmm0
119; SSE-NEXT:    movaps %xmm0, (%esi)
120; SSE-NEXT:    addl $3, %eax
121; SSE-NEXT:    andl $-4, %eax
122; SSE-NEXT:    calll __alloca
123; SSE-NEXT:    movl %esp, %eax
124; SSE-NEXT:    pushl %eax
125; SSE-NEXT:    calll _dummy
126; SSE-NEXT:    leal -4(%ebp), %esp
127; SSE-NEXT:    popl %esi
128; SSE-NEXT:    popl %ebp
129; SSE-NEXT:    retl
130;
131; AVX-LABEL: test2:
132; AVX:       # %bb.0:
133; AVX-NEXT:    pushl %ebp
134; AVX-NEXT:    movl %esp, %ebp
135; AVX-NEXT:    pushl %esi
136; AVX-NEXT:    andl $-16, %esp
137; AVX-NEXT:    subl $32, %esp
138; AVX-NEXT:    movl %esp, %esi
139; AVX-NEXT:    movl 8(%ebp), %eax
140; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
141; AVX-NEXT:    vmovaps %xmm0, (%esi)
142; AVX-NEXT:    addl $3, %eax
143; AVX-NEXT:    andl $-4, %eax
144; AVX-NEXT:    calll __alloca
145; AVX-NEXT:    movl %esp, %eax
146; AVX-NEXT:    pushl %eax
147; AVX-NEXT:    calll _dummy
148; AVX-NEXT:    leal -4(%ebp), %esp
149; AVX-NEXT:    popl %esi
150; AVX-NEXT:    popl %ebp
151; AVX-NEXT:    retl
152  %tmp1210 = alloca i8, i32 16, align 4
153  call void @llvm.memset.p0.i64(ptr align 4 %tmp1210, i8 0, i64 16, i1 false)
154  %x = alloca i8, i32 %t
155  call void @dummy(ptr %x)
156  ret void
157}
158
159declare void @dummy(ptr)
160
161declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
162