1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s 3; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s 4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s 5 6; Test regcall when receiving/returning i1 7define x86_regcallcc i1 @test_argReti1(i1 %a) { 8; WIN32-LABEL: test_argReti1: 9; WIN32: # %bb.0: 10; WIN32-NEXT: incb %al 11; WIN32-NEXT: # kill: def $al killed $al killed $eax 12; WIN32-NEXT: retl 13; 14; WIN64-LABEL: test_argReti1: 15; WIN64: # %bb.0: 16; WIN64-NEXT: incb %al 17; WIN64-NEXT: # kill: def $al killed $al killed $eax 18; WIN64-NEXT: retq 19; 20; LINUXOSX-LABEL: test_argReti1: 21; LINUXOSX: # %bb.0: 22; LINUXOSX-NEXT: incb %al 23; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax 24; LINUXOSX-NEXT: retq 25 %add = add i1 %a, 1 26 ret i1 %add 27} 28 29; Test regcall when passing/retrieving i1 30define x86_regcallcc i1 @test_CallargReti1(i1 %a) { 31; WIN32-LABEL: test_CallargReti1: 32; WIN32: # %bb.0: 33; WIN32-NEXT: incb %al 34; WIN32-NEXT: movzbl %al, %eax 35; WIN32-NEXT: calll _test_argReti1 36; WIN32-NEXT: incb %al 37; WIN32-NEXT: retl 38; 39; WIN64-LABEL: test_CallargReti1: 40; WIN64: # %bb.0: 41; WIN64-NEXT: pushq %rax 42; WIN64-NEXT: .seh_stackalloc 8 43; WIN64-NEXT: .seh_endprologue 44; WIN64-NEXT: incb %al 45; WIN64-NEXT: movzbl %al, %eax 46; WIN64-NEXT: callq test_argReti1 47; WIN64-NEXT: incb %al 48; WIN64-NEXT: popq %rcx 49; WIN64-NEXT: retq 50; WIN64-NEXT: .seh_endproc 51; 52; LINUXOSX-LABEL: test_CallargReti1: 53; LINUXOSX: # %bb.0: 54; LINUXOSX-NEXT: pushq %rax 55; LINUXOSX-NEXT: .cfi_def_cfa_offset 16 56; LINUXOSX-NEXT: incb %al 57; LINUXOSX-NEXT: movzbl %al, %eax 58; LINUXOSX-NEXT: callq *test_argReti1@GOTPCREL(%rip) 59; LINUXOSX-NEXT: incb %al 60; LINUXOSX-NEXT: popq %rcx 61; LINUXOSX-NEXT: .cfi_def_cfa_offset 8 62; LINUXOSX-NEXT: retq 63 %b = add i1 %a, 1 64 %c = call x86_regcallcc i1 @test_argReti1(i1 %b) 65 %d = add i1 %c, 1 66 ret i1 %d 67} 68 69;test calling conventions - input parameters, callee saved xmms 70define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind { 71; WIN32-LABEL: testf32_inp: 72; WIN32: # %bb.0: 73; WIN32-NEXT: pushl %ebp 74; WIN32-NEXT: movl %esp, %ebp 75; WIN32-NEXT: andl $-16, %esp 76; WIN32-NEXT: subl $32, %esp 77; WIN32-NEXT: movaps %xmm7, (%esp) # 16-byte Spill 78; WIN32-NEXT: movaps %xmm6, %xmm7 79; WIN32-NEXT: movaps %xmm5, %xmm6 80; WIN32-NEXT: movaps %xmm4, %xmm5 81; WIN32-NEXT: movaps %xmm1, %xmm4 82; WIN32-NEXT: movaps %xmm0, %xmm1 83; WIN32-NEXT: addps %xmm5, %xmm0 84; WIN32-NEXT: mulps %xmm5, %xmm1 85; WIN32-NEXT: subps %xmm1, %xmm0 86; WIN32-NEXT: movups 8(%ebp), %xmm1 87; WIN32-NEXT: addps %xmm1, %xmm0 88; WIN32-NEXT: movaps %xmm4, %xmm1 89; WIN32-NEXT: addps %xmm6, %xmm1 90; WIN32-NEXT: mulps %xmm6, %xmm4 91; WIN32-NEXT: subps %xmm4, %xmm1 92; WIN32-NEXT: movups 24(%ebp), %xmm4 93; WIN32-NEXT: addps %xmm4, %xmm1 94; WIN32-NEXT: movaps %xmm2, %xmm4 95; WIN32-NEXT: addps %xmm7, %xmm4 96; WIN32-NEXT: mulps %xmm7, %xmm2 97; WIN32-NEXT: subps %xmm2, %xmm4 98; WIN32-NEXT: movups 40(%ebp), %xmm2 99; WIN32-NEXT: addps %xmm2, %xmm4 100; WIN32-NEXT: movaps %xmm3, %xmm5 101; WIN32-NEXT: movaps (%esp), %xmm2 # 16-byte Reload 102; WIN32-NEXT: addps %xmm2, %xmm5 103; WIN32-NEXT: mulps %xmm2, %xmm3 104; WIN32-NEXT: subps %xmm3, %xmm5 105; WIN32-NEXT: movups 56(%ebp), %xmm2 106; WIN32-NEXT: addps %xmm2, %xmm5 107; WIN32-NEXT: movaps %xmm4, %xmm2 108; WIN32-NEXT: movaps %xmm5, %xmm3 109; WIN32-NEXT: movl %ebp, %esp 110; WIN32-NEXT: popl %ebp 111; WIN32-NEXT: retl 112; 113; WIN64-LABEL: testf32_inp: 114; WIN64: # %bb.0: 115; WIN64-NEXT: subq $72, %rsp 116; WIN64-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 117; WIN64-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 118; WIN64-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 119; WIN64-NEXT: movaps %xmm12, (%rsp) # 16-byte Spill 120; WIN64-NEXT: movaps %xmm0, %xmm12 121; WIN64-NEXT: addps %xmm4, %xmm12 122; WIN64-NEXT: movaps %xmm1, %xmm13 123; WIN64-NEXT: addps %xmm5, %xmm13 124; WIN64-NEXT: movaps %xmm2, %xmm14 125; WIN64-NEXT: addps %xmm6, %xmm14 126; WIN64-NEXT: movaps %xmm3, %xmm15 127; WIN64-NEXT: addps %xmm7, %xmm15 128; WIN64-NEXT: mulps %xmm4, %xmm0 129; WIN64-NEXT: subps %xmm0, %xmm12 130; WIN64-NEXT: mulps %xmm5, %xmm1 131; WIN64-NEXT: subps %xmm1, %xmm13 132; WIN64-NEXT: mulps %xmm6, %xmm2 133; WIN64-NEXT: subps %xmm2, %xmm14 134; WIN64-NEXT: mulps %xmm7, %xmm3 135; WIN64-NEXT: subps %xmm3, %xmm15 136; WIN64-NEXT: addps %xmm8, %xmm12 137; WIN64-NEXT: addps %xmm9, %xmm13 138; WIN64-NEXT: addps %xmm10, %xmm14 139; WIN64-NEXT: addps %xmm11, %xmm15 140; WIN64-NEXT: movaps %xmm12, %xmm0 141; WIN64-NEXT: movaps %xmm13, %xmm1 142; WIN64-NEXT: movaps %xmm14, %xmm2 143; WIN64-NEXT: movaps %xmm15, %xmm3 144; WIN64-NEXT: movaps (%rsp), %xmm12 # 16-byte Reload 145; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 146; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 147; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 148; WIN64-NEXT: addq $72, %rsp 149; WIN64-NEXT: retq 150; 151; LINUXOSX-LABEL: testf32_inp: 152; LINUXOSX: # %bb.0: 153; LINUXOSX-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 154; LINUXOSX-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 155; LINUXOSX-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 156; LINUXOSX-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 157; LINUXOSX-NEXT: movaps %xmm0, %xmm12 158; LINUXOSX-NEXT: addps %xmm4, %xmm12 159; LINUXOSX-NEXT: movaps %xmm1, %xmm13 160; LINUXOSX-NEXT: addps %xmm5, %xmm13 161; LINUXOSX-NEXT: movaps %xmm2, %xmm14 162; LINUXOSX-NEXT: addps %xmm6, %xmm14 163; LINUXOSX-NEXT: movaps %xmm3, %xmm15 164; LINUXOSX-NEXT: addps %xmm7, %xmm15 165; LINUXOSX-NEXT: mulps %xmm4, %xmm0 166; LINUXOSX-NEXT: subps %xmm0, %xmm12 167; LINUXOSX-NEXT: mulps %xmm5, %xmm1 168; LINUXOSX-NEXT: subps %xmm1, %xmm13 169; LINUXOSX-NEXT: mulps %xmm6, %xmm2 170; LINUXOSX-NEXT: subps %xmm2, %xmm14 171; LINUXOSX-NEXT: mulps %xmm7, %xmm3 172; LINUXOSX-NEXT: subps %xmm3, %xmm15 173; LINUXOSX-NEXT: addps %xmm8, %xmm12 174; LINUXOSX-NEXT: addps %xmm9, %xmm13 175; LINUXOSX-NEXT: addps %xmm10, %xmm14 176; LINUXOSX-NEXT: addps %xmm11, %xmm15 177; LINUXOSX-NEXT: movaps %xmm12, %xmm0 178; LINUXOSX-NEXT: movaps %xmm13, %xmm1 179; LINUXOSX-NEXT: movaps %xmm14, %xmm2 180; LINUXOSX-NEXT: movaps %xmm15, %xmm3 181; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 182; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 183; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 184; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 185; LINUXOSX-NEXT: retq 186 %x1 = fadd <16 x float> %a, %b 187 %x2 = fmul <16 x float> %a, %b 188 %x3 = fsub <16 x float> %x1, %x2 189 %x4 = fadd <16 x float> %x3, %c 190 ret <16 x float> %x4 191} 192 193;test calling conventions - input parameters, callee saved GPRs 194define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, 195; WIN32-LABEL: testi32_inp: 196; WIN32: # %bb.0: 197; WIN32-NEXT: pushl %ebp 198; WIN32-NEXT: pushl %ebx 199; WIN32-NEXT: subl $12, %esp 200; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill 201; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 202; WIN32-NEXT: movl %eax, %ebp 203; WIN32-NEXT: leal (%edx,%edi), %eax 204; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 205; WIN32-NEXT: movl %edx, %eax 206; WIN32-NEXT: subl %edi, %eax 207; WIN32-NEXT: movl %ebp, %edx 208; WIN32-NEXT: subl %ecx, %edx 209; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 210; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx 211; WIN32-NEXT: imull %edx, %ebx 212; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi 213; WIN32-NEXT: movl %esi, %edx 214; WIN32-NEXT: subl {{[0-9]+}}(%esp), %edx 215; WIN32-NEXT: imull %eax, %edx 216; WIN32-NEXT: addl %ebx, %edx 217; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx 218; WIN32-NEXT: movl (%esp), %edi # 4-byte Reload 219; WIN32-NEXT: subl %ebx, %edi 220; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx 221; WIN32-NEXT: movl %ecx, %eax 222; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax 223; WIN32-NEXT: imull %edi, %eax 224; WIN32-NEXT: addl %edx, %eax 225; WIN32-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload 226; WIN32-NEXT: addl (%esp), %ebx # 4-byte Folded Reload 227; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx 228; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx 229; WIN32-NEXT: imull %edx, %ebp 230; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi 231; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload 232; WIN32-NEXT: addl %esi, %ebp 233; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ecx 234; WIN32-NEXT: imull %ebx, %ecx 235; WIN32-NEXT: addl %ecx, %ebp 236; WIN32-NEXT: addl %eax, %ebp 237; WIN32-NEXT: movl %ebp, %eax 238; WIN32-NEXT: addl $12, %esp 239; WIN32-NEXT: popl %ebx 240; WIN32-NEXT: popl %ebp 241; WIN32-NEXT: retl 242; 243; WIN64-LABEL: testi32_inp: 244; WIN64: # %bb.0: 245; WIN64-NEXT: pushq %rbp 246; WIN64-NEXT: pushq %rbx 247; WIN64-NEXT: # kill: def $edx killed $edx def $rdx 248; WIN64-NEXT: # kill: def $esi killed $esi def $rsi 249; WIN64-NEXT: # kill: def $r15d killed $r15d def $r15 250; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14 251; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12 252; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11 253; WIN64-NEXT: # kill: def $r10d killed $r10d def $r10 254; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9 255; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8 256; WIN64-NEXT: # kill: def $edi killed $edi def $rdi 257; WIN64-NEXT: leal (%rdx,%rdi), %ebx 258; WIN64-NEXT: movl %edx, %ebp 259; WIN64-NEXT: subl %edi, %ebp 260; WIN64-NEXT: leal (%rsi,%r8), %edx 261; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi 262; WIN64-NEXT: subl %r8d, %esi 263; WIN64-NEXT: leal (%r9,%r10), %edi 264; WIN64-NEXT: movl %r9d, %r8d 265; WIN64-NEXT: subl %r10d, %r8d 266; WIN64-NEXT: movl %eax, %r9d 267; WIN64-NEXT: subl %ecx, %r9d 268; WIN64-NEXT: imull %r9d, %r8d 269; WIN64-NEXT: leal (%r11,%r12), %r9d 270; WIN64-NEXT: movl %r11d, %r10d 271; WIN64-NEXT: subl %r12d, %r10d 272; WIN64-NEXT: imull %ebp, %r10d 273; WIN64-NEXT: addl %r8d, %r10d 274; WIN64-NEXT: leal (%r14,%r15), %r8d 275; WIN64-NEXT: movl %r14d, %r11d 276; WIN64-NEXT: subl %r15d, %r11d 277; WIN64-NEXT: imull %esi, %r11d 278; WIN64-NEXT: addl %r10d, %r11d 279; WIN64-NEXT: addl %ecx, %eax 280; WIN64-NEXT: imull %edi, %eax 281; WIN64-NEXT: imull %ebx, %r9d 282; WIN64-NEXT: addl %r9d, %eax 283; WIN64-NEXT: imull %edx, %r8d 284; WIN64-NEXT: addl %r8d, %eax 285; WIN64-NEXT: addl %r11d, %eax 286; WIN64-NEXT: popq %rbx 287; WIN64-NEXT: popq %rbp 288; WIN64-NEXT: retq 289; 290; LINUXOSX-LABEL: testi32_inp: 291; LINUXOSX: # %bb.0: 292; LINUXOSX-NEXT: # kill: def $edx killed $edx def $rdx 293; LINUXOSX-NEXT: # kill: def $esi killed $esi def $rsi 294; LINUXOSX-NEXT: # kill: def $r14d killed $r14d def $r14 295; LINUXOSX-NEXT: # kill: def $r13d killed $r13d def $r13 296; LINUXOSX-NEXT: # kill: def $r12d killed $r12d def $r12 297; LINUXOSX-NEXT: # kill: def $r9d killed $r9d def $r9 298; LINUXOSX-NEXT: # kill: def $r8d killed $r8d def $r8 299; LINUXOSX-NEXT: # kill: def $edi killed $edi def $rdi 300; LINUXOSX-NEXT: leal (%rdx,%rdi), %r10d 301; LINUXOSX-NEXT: movl %edx, %r11d 302; LINUXOSX-NEXT: subl %edi, %r11d 303; LINUXOSX-NEXT: leal (%rsi,%r8), %edx 304; LINUXOSX-NEXT: # kill: def $esi killed $esi killed $rsi 305; LINUXOSX-NEXT: subl %r8d, %esi 306; LINUXOSX-NEXT: leal (%r9,%r12), %edi 307; LINUXOSX-NEXT: movl %r9d, %r8d 308; LINUXOSX-NEXT: subl %r12d, %r8d 309; LINUXOSX-NEXT: movl %eax, %r9d 310; LINUXOSX-NEXT: subl %ecx, %r9d 311; LINUXOSX-NEXT: imull %r9d, %r8d 312; LINUXOSX-NEXT: leal (%r13,%r14), %r9d 313; LINUXOSX-NEXT: movl %r13d, %r12d 314; LINUXOSX-NEXT: subl %r14d, %r12d 315; LINUXOSX-NEXT: imull %r11d, %r12d 316; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %r11d 317; LINUXOSX-NEXT: addl %r8d, %r12d 318; LINUXOSX-NEXT: movl %r15d, %r8d 319; LINUXOSX-NEXT: subl %r11d, %r8d 320; LINUXOSX-NEXT: imull %esi, %r8d 321; LINUXOSX-NEXT: addl %r12d, %r8d 322; LINUXOSX-NEXT: addl %ecx, %eax 323; LINUXOSX-NEXT: imull %edi, %eax 324; LINUXOSX-NEXT: imull %r10d, %r9d 325; LINUXOSX-NEXT: addl %r9d, %eax 326; LINUXOSX-NEXT: addl %r15d, %r11d 327; LINUXOSX-NEXT: imull %edx, %r11d 328; LINUXOSX-NEXT: addl %r11d, %eax 329; LINUXOSX-NEXT: addl %r8d, %eax 330; LINUXOSX-NEXT: retq 331 i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind { 332 %x1 = sub i32 %a1, %a2 333 %x2 = sub i32 %a3, %a4 334 %x3 = sub i32 %a5, %a6 335 %y1 = sub i32 %b1, %b2 336 %y2 = sub i32 %b3, %b4 337 %y3 = sub i32 %b5, %b6 338 %v1 = add i32 %a1, %a2 339 %v2 = add i32 %a3, %a4 340 %v3 = add i32 %a5, %a6 341 %w1 = add i32 %b1, %b2 342 %w2 = add i32 %b3, %b4 343 %w3 = add i32 %b5, %b6 344 %s1 = mul i32 %x1, %y1 345 %s2 = mul i32 %x2, %y2 346 %s3 = mul i32 %x3, %y3 347 %t1 = mul i32 %v1, %w1 348 %t2 = mul i32 %v2, %w2 349 %t3 = mul i32 %v3, %w3 350 %m1 = add i32 %s1, %s2 351 %m2 = add i32 %m1, %s3 352 %n1 = add i32 %t1, %t2 353 %n2 = add i32 %n1, %t3 354 %r1 = add i32 %m2, %n2 355 ret i32 %r1 356} 357 358; Test that parameters, overflowing register capacity, are passed through the stack 359define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind { 360; WIN32-LABEL: testf32_stack: 361; WIN32: # %bb.0: 362; WIN32-NEXT: pushl %ebp 363; WIN32-NEXT: movl %esp, %ebp 364; WIN32-NEXT: andl $-16, %esp 365; WIN32-NEXT: subl $48, %esp 366; WIN32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 367; WIN32-NEXT: movaps %xmm6, (%esp) # 16-byte Spill 368; WIN32-NEXT: movaps %xmm5, %xmm6 369; WIN32-NEXT: movaps %xmm4, %xmm5 370; WIN32-NEXT: movaps %xmm3, %xmm4 371; WIN32-NEXT: movaps %xmm2, %xmm3 372; WIN32-NEXT: movaps %xmm1, %xmm2 373; WIN32-NEXT: movaps %xmm0, %xmm1 374; WIN32-NEXT: movups 120(%ebp), %xmm7 375; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 376; WIN32-NEXT: addps %xmm7, %xmm0 377; WIN32-NEXT: movups 248(%ebp), %xmm7 378; WIN32-NEXT: addps %xmm7, %xmm0 379; WIN32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 380; WIN32-NEXT: movups 104(%ebp), %xmm7 381; WIN32-NEXT: movaps (%esp), %xmm0 # 16-byte Reload 382; WIN32-NEXT: addps %xmm7, %xmm0 383; WIN32-NEXT: movups 232(%ebp), %xmm7 384; WIN32-NEXT: addps %xmm7, %xmm0 385; WIN32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill 386; WIN32-NEXT: movups 88(%ebp), %xmm7 387; WIN32-NEXT: addps %xmm7, %xmm6 388; WIN32-NEXT: movups 216(%ebp), %xmm7 389; WIN32-NEXT: addps %xmm7, %xmm6 390; WIN32-NEXT: movups 72(%ebp), %xmm7 391; WIN32-NEXT: addps %xmm7, %xmm5 392; WIN32-NEXT: movups 200(%ebp), %xmm7 393; WIN32-NEXT: addps %xmm7, %xmm5 394; WIN32-NEXT: movups 56(%ebp), %xmm7 395; WIN32-NEXT: addps %xmm7, %xmm4 396; WIN32-NEXT: movups 184(%ebp), %xmm7 397; WIN32-NEXT: addps %xmm7, %xmm4 398; WIN32-NEXT: movups 40(%ebp), %xmm7 399; WIN32-NEXT: addps %xmm7, %xmm3 400; WIN32-NEXT: movups 168(%ebp), %xmm7 401; WIN32-NEXT: addps %xmm7, %xmm3 402; WIN32-NEXT: movups 24(%ebp), %xmm7 403; WIN32-NEXT: addps %xmm7, %xmm2 404; WIN32-NEXT: movups 152(%ebp), %xmm7 405; WIN32-NEXT: addps %xmm7, %xmm2 406; WIN32-NEXT: movups 8(%ebp), %xmm7 407; WIN32-NEXT: addps %xmm7, %xmm1 408; WIN32-NEXT: movups 136(%ebp), %xmm7 409; WIN32-NEXT: addps %xmm7, %xmm1 410; WIN32-NEXT: movaps %xmm1, %xmm0 411; WIN32-NEXT: movaps %xmm2, %xmm1 412; WIN32-NEXT: movaps %xmm3, %xmm2 413; WIN32-NEXT: movaps %xmm4, %xmm3 414; WIN32-NEXT: movaps %xmm5, %xmm4 415; WIN32-NEXT: movaps %xmm6, %xmm5 416; WIN32-NEXT: movaps (%esp), %xmm6 # 16-byte Reload 417; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 418; WIN32-NEXT: movl %ebp, %esp 419; WIN32-NEXT: popl %ebp 420; WIN32-NEXT: retl 421; 422; WIN64-LABEL: testf32_stack: 423; WIN64: # %bb.0: 424; WIN64-NEXT: pushq %rax 425; WIN64-NEXT: addps %xmm15, %xmm7 426; WIN64-NEXT: addps %xmm14, %xmm6 427; WIN64-NEXT: addps %xmm13, %xmm5 428; WIN64-NEXT: addps %xmm12, %xmm4 429; WIN64-NEXT: addps %xmm11, %xmm3 430; WIN64-NEXT: addps %xmm10, %xmm2 431; WIN64-NEXT: addps %xmm9, %xmm1 432; WIN64-NEXT: addps %xmm8, %xmm0 433; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0 434; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1 435; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2 436; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3 437; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm4 438; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm5 439; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm6 440; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm7 441; WIN64-NEXT: popq %rax 442; WIN64-NEXT: retq 443; 444; LINUXOSX-LABEL: testf32_stack: 445; LINUXOSX: # %bb.0: 446; LINUXOSX-NEXT: addps %xmm15, %xmm7 447; LINUXOSX-NEXT: addps %xmm14, %xmm6 448; LINUXOSX-NEXT: addps %xmm13, %xmm5 449; LINUXOSX-NEXT: addps %xmm12, %xmm4 450; LINUXOSX-NEXT: addps %xmm11, %xmm3 451; LINUXOSX-NEXT: addps %xmm10, %xmm2 452; LINUXOSX-NEXT: addps %xmm9, %xmm1 453; LINUXOSX-NEXT: addps %xmm8, %xmm0 454; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm0 455; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm1 456; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm2 457; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm3 458; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm4 459; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm5 460; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm6 461; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm7 462; LINUXOSX-NEXT: retq 463 %x1 = fadd <32 x float> %a, %b 464 %x2 = fadd <32 x float> %x1, %c 465 ret <32 x float> %x2 466} 467