xref: /llvm-project/llvm/test/CodeGen/X86/statepoint-no-realign-stack.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mcpu=skylake < %s | FileCheck %s
3
4target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-pc-linux-gnu"
6
7; Shows a case where we spill a 32 byte value onto a stack which is only
8; 16 byte aligned.  With stack realignment, we can use an aligned spill slot
9; (if we think it's profitable), but without realignment, using a stack
10; slot which is 32 byte aligned or a store which expects 32 byte alignment
11; is incorrect.
12
13declare void @foo()
14define void @can_realign(ptr %p) {
15; CHECK-LABEL: can_realign:
16; CHECK:       # %bb.0:
17; CHECK-NEXT:    pushq %rbp
18; CHECK-NEXT:    .cfi_def_cfa_offset 16
19; CHECK-NEXT:    .cfi_offset %rbp, -16
20; CHECK-NEXT:    movq %rsp, %rbp
21; CHECK-NEXT:    .cfi_def_cfa_register %rbp
22; CHECK-NEXT:    andq $-32, %rsp
23; CHECK-NEXT:    subq $64, %rsp
24; CHECK-NEXT:    vmovaps (%rdi), %ymm0
25; CHECK-NEXT:    vmovaps %ymm0, (%rsp)
26; CHECK-NEXT:    vzeroupper
27; CHECK-NEXT:    callq foo@PLT
28; CHECK-NEXT:  .Ltmp0:
29; CHECK-NEXT:    movq %rbp, %rsp
30; CHECK-NEXT:    popq %rbp
31; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
32; CHECK-NEXT:    retq
33  %val = load <8 x i32>, ptr %p, align 32
34  call void @foo() ["deopt" (<8 x i32> %val)]
35  ret void
36}
37
38define void @no_realign(ptr %p) "no-realign-stack" {
39; CHECK-LABEL: no_realign:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    subq $40, %rsp
42; CHECK-NEXT:    .cfi_def_cfa_offset 48
43; CHECK-NEXT:    vmovaps (%rdi), %ymm0
44; CHECK-NEXT:    vmovups %ymm0, (%rsp)
45; CHECK-NEXT:    vzeroupper
46; CHECK-NEXT:    callq foo@PLT
47; CHECK-NEXT:  .Ltmp1:
48; CHECK-NEXT:    addq $40, %rsp
49; CHECK-NEXT:    .cfi_def_cfa_offset 8
50; CHECK-NEXT:    retq
51  %val = load <8 x i32>, ptr %p, align 32
52  call void @foo() ["deopt" (<8 x i32> %val)]
53  ret void
54}
55
56;; Next batch are similiar to the above, but require a reload of the
57;; spilled value as well.
58
59define <4 x ptr addrspace(1)> @spillfill_can_realign(<4 x ptr addrspace(1)> %obj) gc "statepoint-example" {
60; CHECK-LABEL: spillfill_can_realign:
61; CHECK:       # %bb.0: # %entry
62; CHECK-NEXT:    pushq %rbp
63; CHECK-NEXT:    .cfi_def_cfa_offset 16
64; CHECK-NEXT:    .cfi_offset %rbp, -16
65; CHECK-NEXT:    movq %rsp, %rbp
66; CHECK-NEXT:    .cfi_def_cfa_register %rbp
67; CHECK-NEXT:    andq $-32, %rsp
68; CHECK-NEXT:    subq $64, %rsp
69; CHECK-NEXT:    vmovaps %ymm0, (%rsp)
70; CHECK-NEXT:    vzeroupper
71; CHECK-NEXT:    callq do_safepoint@PLT
72; CHECK-NEXT:  .Ltmp2:
73; CHECK-NEXT:    vmovaps (%rsp), %ymm0
74; CHECK-NEXT:    movq %rbp, %rsp
75; CHECK-NEXT:    popq %rbp
76; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
77; CHECK-NEXT:    retq
78entry:
79  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<4 x ptr addrspace(1)> %obj)]
80  %obj.relocated = call coldcc <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj)
81  ret <4 x ptr addrspace(1)> %obj.relocated
82}
83
84define <4 x ptr addrspace(1)> @spillfill_no_realign(<4 x ptr addrspace(1)> %obj) "no-realign-stack" gc "statepoint-example" {
85; CHECK-LABEL: spillfill_no_realign:
86; CHECK:       # %bb.0: # %entry
87; CHECK-NEXT:    subq $40, %rsp
88; CHECK-NEXT:    .cfi_def_cfa_offset 48
89; CHECK-NEXT:    vmovups %ymm0, (%rsp)
90; CHECK-NEXT:    vzeroupper
91; CHECK-NEXT:    callq do_safepoint@PLT
92; CHECK-NEXT:  .Ltmp3:
93; CHECK-NEXT:    vmovups (%rsp), %ymm0
94; CHECK-NEXT:    addq $40, %rsp
95; CHECK-NEXT:    .cfi_def_cfa_offset 8
96; CHECK-NEXT:    retq
97entry:
98  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<4 x ptr addrspace(1)> %obj)]
99  %obj.relocated = call coldcc <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj)
100  ret <4 x ptr addrspace(1)> %obj.relocated
101}
102
103declare void @do_safepoint()
104
105declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...)
106declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32)
107declare <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token, i32, i32)
108