xref: /llvm-project/llvm/test/CodeGen/X86/sse-regcall.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s
3; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s
4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s
5
6; Test regcall when receiving/returning i1
7define x86_regcallcc i1 @test_argReti1(i1 %a)  {
8; WIN32-LABEL: test_argReti1:
9; WIN32:       # %bb.0:
10; WIN32-NEXT:    incb %al
11; WIN32-NEXT:    # kill: def $al killed $al killed $eax
12; WIN32-NEXT:    retl
13;
14; WIN64-LABEL: test_argReti1:
15; WIN64:       # %bb.0:
16; WIN64-NEXT:    incb %al
17; WIN64-NEXT:    # kill: def $al killed $al killed $eax
18; WIN64-NEXT:    retq
19;
20; LINUXOSX-LABEL: test_argReti1:
21; LINUXOSX:       # %bb.0:
22; LINUXOSX-NEXT:    incb %al
23; LINUXOSX-NEXT:    # kill: def $al killed $al killed $eax
24; LINUXOSX-NEXT:    retq
25  %add = add i1 %a, 1
26  ret i1 %add
27}
28
29; Test regcall when passing/retrieving i1
30define x86_regcallcc i1 @test_CallargReti1(i1 %a)  {
31; WIN32-LABEL: test_CallargReti1:
32; WIN32:       # %bb.0:
33; WIN32-NEXT:    incb %al
34; WIN32-NEXT:    movzbl %al, %eax
35; WIN32-NEXT:    calll _test_argReti1
36; WIN32-NEXT:    incb %al
37; WIN32-NEXT:    retl
38;
39; WIN64-LABEL: test_CallargReti1:
40; WIN64:       # %bb.0:
41; WIN64-NEXT:    pushq %rax
42; WIN64-NEXT:    .seh_stackalloc 8
43; WIN64-NEXT:    .seh_endprologue
44; WIN64-NEXT:    incb %al
45; WIN64-NEXT:    movzbl %al, %eax
46; WIN64-NEXT:    callq test_argReti1
47; WIN64-NEXT:    incb %al
48; WIN64-NEXT:    popq %rcx
49; WIN64-NEXT:    retq
50; WIN64-NEXT:    .seh_endproc
51;
52; LINUXOSX-LABEL: test_CallargReti1:
53; LINUXOSX:       # %bb.0:
54; LINUXOSX-NEXT:    pushq %rax
55; LINUXOSX-NEXT:    .cfi_def_cfa_offset 16
56; LINUXOSX-NEXT:    incb %al
57; LINUXOSX-NEXT:    movzbl %al, %eax
58; LINUXOSX-NEXT:    callq *test_argReti1@GOTPCREL(%rip)
59; LINUXOSX-NEXT:    incb %al
60; LINUXOSX-NEXT:    popq %rcx
61; LINUXOSX-NEXT:    .cfi_def_cfa_offset 8
62; LINUXOSX-NEXT:    retq
63  %b = add i1 %a, 1
64  %c = call x86_regcallcc i1 @test_argReti1(i1 %b)
65  %d = add i1 %c, 1
66  ret i1 %d
67}
68
69;test calling conventions - input parameters, callee saved xmms
70define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
71; WIN32-LABEL: testf32_inp:
72; WIN32:       # %bb.0:
73; WIN32-NEXT:    pushl %ebp
74; WIN32-NEXT:    movl %esp, %ebp
75; WIN32-NEXT:    andl $-16, %esp
76; WIN32-NEXT:    subl $32, %esp
77; WIN32-NEXT:    movaps %xmm7, (%esp) # 16-byte Spill
78; WIN32-NEXT:    movaps %xmm6, %xmm7
79; WIN32-NEXT:    movaps %xmm5, %xmm6
80; WIN32-NEXT:    movaps %xmm4, %xmm5
81; WIN32-NEXT:    movaps %xmm1, %xmm4
82; WIN32-NEXT:    movaps %xmm0, %xmm1
83; WIN32-NEXT:    addps %xmm5, %xmm0
84; WIN32-NEXT:    mulps %xmm5, %xmm1
85; WIN32-NEXT:    subps %xmm1, %xmm0
86; WIN32-NEXT:    movups 8(%ebp), %xmm1
87; WIN32-NEXT:    addps %xmm1, %xmm0
88; WIN32-NEXT:    movaps %xmm4, %xmm1
89; WIN32-NEXT:    addps %xmm6, %xmm1
90; WIN32-NEXT:    mulps %xmm6, %xmm4
91; WIN32-NEXT:    subps %xmm4, %xmm1
92; WIN32-NEXT:    movups 24(%ebp), %xmm4
93; WIN32-NEXT:    addps %xmm4, %xmm1
94; WIN32-NEXT:    movaps %xmm2, %xmm4
95; WIN32-NEXT:    addps %xmm7, %xmm4
96; WIN32-NEXT:    mulps %xmm7, %xmm2
97; WIN32-NEXT:    subps %xmm2, %xmm4
98; WIN32-NEXT:    movups 40(%ebp), %xmm2
99; WIN32-NEXT:    addps %xmm2, %xmm4
100; WIN32-NEXT:    movaps %xmm3, %xmm5
101; WIN32-NEXT:    movaps (%esp), %xmm2 # 16-byte Reload
102; WIN32-NEXT:    addps %xmm2, %xmm5
103; WIN32-NEXT:    mulps %xmm2, %xmm3
104; WIN32-NEXT:    subps %xmm3, %xmm5
105; WIN32-NEXT:    movups 56(%ebp), %xmm2
106; WIN32-NEXT:    addps %xmm2, %xmm5
107; WIN32-NEXT:    movaps %xmm4, %xmm2
108; WIN32-NEXT:    movaps %xmm5, %xmm3
109; WIN32-NEXT:    movl %ebp, %esp
110; WIN32-NEXT:    popl %ebp
111; WIN32-NEXT:    retl
112;
113; WIN64-LABEL: testf32_inp:
114; WIN64:       # %bb.0:
115; WIN64-NEXT:    subq $72, %rsp
116; WIN64-NEXT:    movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
117; WIN64-NEXT:    movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
118; WIN64-NEXT:    movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
119; WIN64-NEXT:    movaps %xmm12, (%rsp) # 16-byte Spill
120; WIN64-NEXT:    movaps %xmm0, %xmm12
121; WIN64-NEXT:    addps %xmm4, %xmm12
122; WIN64-NEXT:    movaps %xmm1, %xmm13
123; WIN64-NEXT:    addps %xmm5, %xmm13
124; WIN64-NEXT:    movaps %xmm2, %xmm14
125; WIN64-NEXT:    addps %xmm6, %xmm14
126; WIN64-NEXT:    movaps %xmm3, %xmm15
127; WIN64-NEXT:    addps %xmm7, %xmm15
128; WIN64-NEXT:    mulps %xmm4, %xmm0
129; WIN64-NEXT:    subps %xmm0, %xmm12
130; WIN64-NEXT:    mulps %xmm5, %xmm1
131; WIN64-NEXT:    subps %xmm1, %xmm13
132; WIN64-NEXT:    mulps %xmm6, %xmm2
133; WIN64-NEXT:    subps %xmm2, %xmm14
134; WIN64-NEXT:    mulps %xmm7, %xmm3
135; WIN64-NEXT:    subps %xmm3, %xmm15
136; WIN64-NEXT:    addps %xmm8, %xmm12
137; WIN64-NEXT:    addps %xmm9, %xmm13
138; WIN64-NEXT:    addps %xmm10, %xmm14
139; WIN64-NEXT:    addps %xmm11, %xmm15
140; WIN64-NEXT:    movaps %xmm12, %xmm0
141; WIN64-NEXT:    movaps %xmm13, %xmm1
142; WIN64-NEXT:    movaps %xmm14, %xmm2
143; WIN64-NEXT:    movaps %xmm15, %xmm3
144; WIN64-NEXT:    movaps (%rsp), %xmm12 # 16-byte Reload
145; WIN64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
146; WIN64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
147; WIN64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
148; WIN64-NEXT:    addq $72, %rsp
149; WIN64-NEXT:    retq
150;
151; LINUXOSX-LABEL: testf32_inp:
152; LINUXOSX:       # %bb.0:
153; LINUXOSX-NEXT:    movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
154; LINUXOSX-NEXT:    movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
155; LINUXOSX-NEXT:    movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
156; LINUXOSX-NEXT:    movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
157; LINUXOSX-NEXT:    movaps %xmm0, %xmm12
158; LINUXOSX-NEXT:    addps %xmm4, %xmm12
159; LINUXOSX-NEXT:    movaps %xmm1, %xmm13
160; LINUXOSX-NEXT:    addps %xmm5, %xmm13
161; LINUXOSX-NEXT:    movaps %xmm2, %xmm14
162; LINUXOSX-NEXT:    addps %xmm6, %xmm14
163; LINUXOSX-NEXT:    movaps %xmm3, %xmm15
164; LINUXOSX-NEXT:    addps %xmm7, %xmm15
165; LINUXOSX-NEXT:    mulps %xmm4, %xmm0
166; LINUXOSX-NEXT:    subps %xmm0, %xmm12
167; LINUXOSX-NEXT:    mulps %xmm5, %xmm1
168; LINUXOSX-NEXT:    subps %xmm1, %xmm13
169; LINUXOSX-NEXT:    mulps %xmm6, %xmm2
170; LINUXOSX-NEXT:    subps %xmm2, %xmm14
171; LINUXOSX-NEXT:    mulps %xmm7, %xmm3
172; LINUXOSX-NEXT:    subps %xmm3, %xmm15
173; LINUXOSX-NEXT:    addps %xmm8, %xmm12
174; LINUXOSX-NEXT:    addps %xmm9, %xmm13
175; LINUXOSX-NEXT:    addps %xmm10, %xmm14
176; LINUXOSX-NEXT:    addps %xmm11, %xmm15
177; LINUXOSX-NEXT:    movaps %xmm12, %xmm0
178; LINUXOSX-NEXT:    movaps %xmm13, %xmm1
179; LINUXOSX-NEXT:    movaps %xmm14, %xmm2
180; LINUXOSX-NEXT:    movaps %xmm15, %xmm3
181; LINUXOSX-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
182; LINUXOSX-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
183; LINUXOSX-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
184; LINUXOSX-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
185; LINUXOSX-NEXT:    retq
186  %x1 = fadd <16 x float> %a, %b
187  %x2 = fmul <16 x float> %a, %b
188  %x3 = fsub <16 x float> %x1, %x2
189  %x4 = fadd <16 x float> %x3, %c
190  ret <16 x float> %x4
191}
192
193;test calling conventions - input parameters, callee saved GPRs
194define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6,
195; WIN32-LABEL: testi32_inp:
196; WIN32:       # %bb.0:
197; WIN32-NEXT:    pushl %ebp
198; WIN32-NEXT:    pushl %ebx
199; WIN32-NEXT:    subl $12, %esp
200; WIN32-NEXT:    movl %esi, (%esp) # 4-byte Spill
201; WIN32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
202; WIN32-NEXT:    movl %eax, %ebp
203; WIN32-NEXT:    leal (%edx,%edi), %eax
204; WIN32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
205; WIN32-NEXT:    movl %edx, %eax
206; WIN32-NEXT:    subl %edi, %eax
207; WIN32-NEXT:    movl %ebp, %edx
208; WIN32-NEXT:    subl %ecx, %edx
209; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
210; WIN32-NEXT:    subl {{[0-9]+}}(%esp), %ebx
211; WIN32-NEXT:    imull %edx, %ebx
212; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %esi
213; WIN32-NEXT:    movl %esi, %edx
214; WIN32-NEXT:    subl {{[0-9]+}}(%esp), %edx
215; WIN32-NEXT:    imull %eax, %edx
216; WIN32-NEXT:    addl %ebx, %edx
217; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
218; WIN32-NEXT:    movl (%esp), %edi # 4-byte Reload
219; WIN32-NEXT:    subl %ebx, %edi
220; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
221; WIN32-NEXT:    movl %ecx, %eax
222; WIN32-NEXT:    subl {{[0-9]+}}(%esp), %eax
223; WIN32-NEXT:    imull %edi, %eax
224; WIN32-NEXT:    addl %edx, %eax
225; WIN32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
226; WIN32-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload
227; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
228; WIN32-NEXT:    addl {{[0-9]+}}(%esp), %edx
229; WIN32-NEXT:    imull %edx, %ebp
230; WIN32-NEXT:    addl {{[0-9]+}}(%esp), %esi
231; WIN32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
232; WIN32-NEXT:    addl %esi, %ebp
233; WIN32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
234; WIN32-NEXT:    imull %ebx, %ecx
235; WIN32-NEXT:    addl %ecx, %ebp
236; WIN32-NEXT:    addl %eax, %ebp
237; WIN32-NEXT:    movl %ebp, %eax
238; WIN32-NEXT:    addl $12, %esp
239; WIN32-NEXT:    popl %ebx
240; WIN32-NEXT:    popl %ebp
241; WIN32-NEXT:    retl
242;
243; WIN64-LABEL: testi32_inp:
244; WIN64:       # %bb.0:
245; WIN64-NEXT:    pushq %rbp
246; WIN64-NEXT:    pushq %rbx
247; WIN64-NEXT:    # kill: def $edx killed $edx def $rdx
248; WIN64-NEXT:    # kill: def $esi killed $esi def $rsi
249; WIN64-NEXT:    # kill: def $r15d killed $r15d def $r15
250; WIN64-NEXT:    # kill: def $r14d killed $r14d def $r14
251; WIN64-NEXT:    # kill: def $r12d killed $r12d def $r12
252; WIN64-NEXT:    # kill: def $r11d killed $r11d def $r11
253; WIN64-NEXT:    # kill: def $r10d killed $r10d def $r10
254; WIN64-NEXT:    # kill: def $r9d killed $r9d def $r9
255; WIN64-NEXT:    # kill: def $r8d killed $r8d def $r8
256; WIN64-NEXT:    # kill: def $edi killed $edi def $rdi
257; WIN64-NEXT:    leal (%rdx,%rdi), %ebx
258; WIN64-NEXT:    movl %edx, %ebp
259; WIN64-NEXT:    subl %edi, %ebp
260; WIN64-NEXT:    leal (%rsi,%r8), %edx
261; WIN64-NEXT:    # kill: def $esi killed $esi killed $rsi
262; WIN64-NEXT:    subl %r8d, %esi
263; WIN64-NEXT:    leal (%r9,%r10), %edi
264; WIN64-NEXT:    movl %r9d, %r8d
265; WIN64-NEXT:    subl %r10d, %r8d
266; WIN64-NEXT:    movl %eax, %r9d
267; WIN64-NEXT:    subl %ecx, %r9d
268; WIN64-NEXT:    imull %r9d, %r8d
269; WIN64-NEXT:    leal (%r11,%r12), %r9d
270; WIN64-NEXT:    movl %r11d, %r10d
271; WIN64-NEXT:    subl %r12d, %r10d
272; WIN64-NEXT:    imull %ebp, %r10d
273; WIN64-NEXT:    addl %r8d, %r10d
274; WIN64-NEXT:    leal (%r14,%r15), %r8d
275; WIN64-NEXT:    movl %r14d, %r11d
276; WIN64-NEXT:    subl %r15d, %r11d
277; WIN64-NEXT:    imull %esi, %r11d
278; WIN64-NEXT:    addl %r10d, %r11d
279; WIN64-NEXT:    addl %ecx, %eax
280; WIN64-NEXT:    imull %edi, %eax
281; WIN64-NEXT:    imull %ebx, %r9d
282; WIN64-NEXT:    addl %r9d, %eax
283; WIN64-NEXT:    imull %edx, %r8d
284; WIN64-NEXT:    addl %r8d, %eax
285; WIN64-NEXT:    addl %r11d, %eax
286; WIN64-NEXT:    popq %rbx
287; WIN64-NEXT:    popq %rbp
288; WIN64-NEXT:    retq
289;
290; LINUXOSX-LABEL: testi32_inp:
291; LINUXOSX:       # %bb.0:
292; LINUXOSX-NEXT:    # kill: def $edx killed $edx def $rdx
293; LINUXOSX-NEXT:    # kill: def $esi killed $esi def $rsi
294; LINUXOSX-NEXT:    # kill: def $r14d killed $r14d def $r14
295; LINUXOSX-NEXT:    # kill: def $r13d killed $r13d def $r13
296; LINUXOSX-NEXT:    # kill: def $r12d killed $r12d def $r12
297; LINUXOSX-NEXT:    # kill: def $r9d killed $r9d def $r9
298; LINUXOSX-NEXT:    # kill: def $r8d killed $r8d def $r8
299; LINUXOSX-NEXT:    # kill: def $edi killed $edi def $rdi
300; LINUXOSX-NEXT:    leal (%rdx,%rdi), %r10d
301; LINUXOSX-NEXT:    movl %edx, %r11d
302; LINUXOSX-NEXT:    subl %edi, %r11d
303; LINUXOSX-NEXT:    leal (%rsi,%r8), %edx
304; LINUXOSX-NEXT:    # kill: def $esi killed $esi killed $rsi
305; LINUXOSX-NEXT:    subl %r8d, %esi
306; LINUXOSX-NEXT:    leal (%r9,%r12), %edi
307; LINUXOSX-NEXT:    movl %r9d, %r8d
308; LINUXOSX-NEXT:    subl %r12d, %r8d
309; LINUXOSX-NEXT:    movl %eax, %r9d
310; LINUXOSX-NEXT:    subl %ecx, %r9d
311; LINUXOSX-NEXT:    imull %r9d, %r8d
312; LINUXOSX-NEXT:    leal (%r13,%r14), %r9d
313; LINUXOSX-NEXT:    movl %r13d, %r12d
314; LINUXOSX-NEXT:    subl %r14d, %r12d
315; LINUXOSX-NEXT:    imull %r11d, %r12d
316; LINUXOSX-NEXT:    movl {{[0-9]+}}(%rsp), %r11d
317; LINUXOSX-NEXT:    addl %r8d, %r12d
318; LINUXOSX-NEXT:    movl %r15d, %r8d
319; LINUXOSX-NEXT:    subl %r11d, %r8d
320; LINUXOSX-NEXT:    imull %esi, %r8d
321; LINUXOSX-NEXT:    addl %r12d, %r8d
322; LINUXOSX-NEXT:    addl %ecx, %eax
323; LINUXOSX-NEXT:    imull %edi, %eax
324; LINUXOSX-NEXT:    imull %r10d, %r9d
325; LINUXOSX-NEXT:    addl %r9d, %eax
326; LINUXOSX-NEXT:    addl %r15d, %r11d
327; LINUXOSX-NEXT:    imull %edx, %r11d
328; LINUXOSX-NEXT:    addl %r11d, %eax
329; LINUXOSX-NEXT:    addl %r8d, %eax
330; LINUXOSX-NEXT:    retq
331                                      i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind {
332  %x1 = sub i32 %a1, %a2
333  %x2 = sub i32 %a3, %a4
334  %x3 = sub i32 %a5, %a6
335  %y1 = sub i32 %b1, %b2
336  %y2 = sub i32 %b3, %b4
337  %y3 = sub i32 %b5, %b6
338  %v1 = add i32 %a1, %a2
339  %v2 = add i32 %a3, %a4
340  %v3 = add i32 %a5, %a6
341  %w1 = add i32 %b1, %b2
342  %w2 = add i32 %b3, %b4
343  %w3 = add i32 %b5, %b6
344  %s1 = mul i32 %x1, %y1
345  %s2 = mul i32 %x2, %y2
346  %s3 = mul i32 %x3, %y3
347  %t1 = mul i32 %v1, %w1
348  %t2 = mul i32 %v2, %w2
349  %t3 = mul i32 %v3, %w3
350  %m1 = add i32 %s1, %s2
351  %m2 = add i32 %m1, %s3
352  %n1 = add i32 %t1, %t2
353  %n2 = add i32 %n1, %t3
354  %r1 = add i32 %m2, %n2
355  ret i32 %r1
356}
357
358; Test that parameters, overflowing register capacity, are passed through the stack
359define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind {
360; WIN32-LABEL: testf32_stack:
361; WIN32:       # %bb.0:
362; WIN32-NEXT:    pushl %ebp
363; WIN32-NEXT:    movl %esp, %ebp
364; WIN32-NEXT:    andl $-16, %esp
365; WIN32-NEXT:    subl $48, %esp
366; WIN32-NEXT:    movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
367; WIN32-NEXT:    movaps %xmm6, (%esp) # 16-byte Spill
368; WIN32-NEXT:    movaps %xmm5, %xmm6
369; WIN32-NEXT:    movaps %xmm4, %xmm5
370; WIN32-NEXT:    movaps %xmm3, %xmm4
371; WIN32-NEXT:    movaps %xmm2, %xmm3
372; WIN32-NEXT:    movaps %xmm1, %xmm2
373; WIN32-NEXT:    movaps %xmm0, %xmm1
374; WIN32-NEXT:    movups 120(%ebp), %xmm7
375; WIN32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
376; WIN32-NEXT:    addps %xmm7, %xmm0
377; WIN32-NEXT:    movups 248(%ebp), %xmm7
378; WIN32-NEXT:    addps %xmm7, %xmm0
379; WIN32-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
380; WIN32-NEXT:    movups 104(%ebp), %xmm7
381; WIN32-NEXT:    movaps (%esp), %xmm0 # 16-byte Reload
382; WIN32-NEXT:    addps %xmm7, %xmm0
383; WIN32-NEXT:    movups 232(%ebp), %xmm7
384; WIN32-NEXT:    addps %xmm7, %xmm0
385; WIN32-NEXT:    movaps %xmm0, (%esp) # 16-byte Spill
386; WIN32-NEXT:    movups 88(%ebp), %xmm7
387; WIN32-NEXT:    addps %xmm7, %xmm6
388; WIN32-NEXT:    movups 216(%ebp), %xmm7
389; WIN32-NEXT:    addps %xmm7, %xmm6
390; WIN32-NEXT:    movups 72(%ebp), %xmm7
391; WIN32-NEXT:    addps %xmm7, %xmm5
392; WIN32-NEXT:    movups 200(%ebp), %xmm7
393; WIN32-NEXT:    addps %xmm7, %xmm5
394; WIN32-NEXT:    movups 56(%ebp), %xmm7
395; WIN32-NEXT:    addps %xmm7, %xmm4
396; WIN32-NEXT:    movups 184(%ebp), %xmm7
397; WIN32-NEXT:    addps %xmm7, %xmm4
398; WIN32-NEXT:    movups 40(%ebp), %xmm7
399; WIN32-NEXT:    addps %xmm7, %xmm3
400; WIN32-NEXT:    movups 168(%ebp), %xmm7
401; WIN32-NEXT:    addps %xmm7, %xmm3
402; WIN32-NEXT:    movups 24(%ebp), %xmm7
403; WIN32-NEXT:    addps %xmm7, %xmm2
404; WIN32-NEXT:    movups 152(%ebp), %xmm7
405; WIN32-NEXT:    addps %xmm7, %xmm2
406; WIN32-NEXT:    movups 8(%ebp), %xmm7
407; WIN32-NEXT:    addps %xmm7, %xmm1
408; WIN32-NEXT:    movups 136(%ebp), %xmm7
409; WIN32-NEXT:    addps %xmm7, %xmm1
410; WIN32-NEXT:    movaps %xmm1, %xmm0
411; WIN32-NEXT:    movaps %xmm2, %xmm1
412; WIN32-NEXT:    movaps %xmm3, %xmm2
413; WIN32-NEXT:    movaps %xmm4, %xmm3
414; WIN32-NEXT:    movaps %xmm5, %xmm4
415; WIN32-NEXT:    movaps %xmm6, %xmm5
416; WIN32-NEXT:    movaps (%esp), %xmm6 # 16-byte Reload
417; WIN32-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
418; WIN32-NEXT:    movl %ebp, %esp
419; WIN32-NEXT:    popl %ebp
420; WIN32-NEXT:    retl
421;
422; WIN64-LABEL: testf32_stack:
423; WIN64:       # %bb.0:
424; WIN64-NEXT:    pushq %rax
425; WIN64-NEXT:    addps %xmm15, %xmm7
426; WIN64-NEXT:    addps %xmm14, %xmm6
427; WIN64-NEXT:    addps %xmm13, %xmm5
428; WIN64-NEXT:    addps %xmm12, %xmm4
429; WIN64-NEXT:    addps %xmm11, %xmm3
430; WIN64-NEXT:    addps %xmm10, %xmm2
431; WIN64-NEXT:    addps %xmm9, %xmm1
432; WIN64-NEXT:    addps %xmm8, %xmm0
433; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm0
434; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm1
435; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm2
436; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm3
437; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm4
438; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm5
439; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm6
440; WIN64-NEXT:    addps {{[0-9]+}}(%rsp), %xmm7
441; WIN64-NEXT:    popq %rax
442; WIN64-NEXT:    retq
443;
444; LINUXOSX-LABEL: testf32_stack:
445; LINUXOSX:       # %bb.0:
446; LINUXOSX-NEXT:    addps %xmm15, %xmm7
447; LINUXOSX-NEXT:    addps %xmm14, %xmm6
448; LINUXOSX-NEXT:    addps %xmm13, %xmm5
449; LINUXOSX-NEXT:    addps %xmm12, %xmm4
450; LINUXOSX-NEXT:    addps %xmm11, %xmm3
451; LINUXOSX-NEXT:    addps %xmm10, %xmm2
452; LINUXOSX-NEXT:    addps %xmm9, %xmm1
453; LINUXOSX-NEXT:    addps %xmm8, %xmm0
454; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm0
455; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm1
456; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm2
457; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm3
458; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm4
459; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm5
460; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm6
461; LINUXOSX-NEXT:    addps {{[0-9]+}}(%rsp), %xmm7
462; LINUXOSX-NEXT:    retq
463  %x1 = fadd <32 x float> %a, %b
464  %x2 = fadd <32 x float> %x1, %c
465  ret <32 x float> %x2
466}
467