xref: /llvm-project/llvm/test/CodeGen/X86/statepoint-vector.ll (revision a21abc782a8e1cb718a10c471a3b634f3102fc1c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
3; REQUIRES: asserts
4
5target triple = "x86_64-pc-linux-gnu"
6
7; Can we lower a single vector?
8define <2 x ptr addrspace(1)> @test(<2 x ptr addrspace(1)> %obj) gc "statepoint-example" {
9; CHECK-LABEL: test:
10; CHECK:       # %bb.0: # %entry
11; CHECK-NEXT:    subq $24, %rsp
12; CHECK-NEXT:    .cfi_def_cfa_offset 32
13; CHECK-NEXT:    movaps %xmm0, (%rsp)
14; CHECK-NEXT:    callq do_safepoint@PLT
15; CHECK-NEXT:  .Ltmp0:
16; CHECK-NEXT:    movaps (%rsp), %xmm0
17; CHECK-NEXT:    addq $24, %rsp
18; CHECK-NEXT:    .cfi_def_cfa_offset 8
19; CHECK-NEXT:    retq
20entry:
21  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> %obj)]
22  %obj.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj)
23  ret <2 x ptr addrspace(1)> %obj.relocated
24}
25
26; Can we lower the base, derived pairs if both are vectors?
27define <2 x ptr addrspace(1)> @test2(<2 x ptr addrspace(1)> %obj, i64 %offset) gc "statepoint-example" {
28; CHECK-LABEL: test2:
29; CHECK:       # %bb.0: # %entry
30; CHECK-NEXT:    subq $40, %rsp
31; CHECK-NEXT:    .cfi_def_cfa_offset 48
32; CHECK-NEXT:    movq %rdi, %xmm1
33; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
34; CHECK-NEXT:    paddq %xmm0, %xmm1
35; CHECK-NEXT:    movdqa %xmm0, (%rsp)
36; CHECK-NEXT:    movdqa %xmm1, {{[0-9]+}}(%rsp)
37; CHECK-NEXT:    callq do_safepoint@PLT
38; CHECK-NEXT:  .Ltmp1:
39; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
40; CHECK-NEXT:    addq $40, %rsp
41; CHECK-NEXT:    .cfi_def_cfa_offset 8
42; CHECK-NEXT:    retq
43entry:
44  %derived = getelementptr i8, <2 x ptr addrspace(1)> %obj, i64 %offset
45  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> %obj, <2 x ptr addrspace(1)> %derived)]
46  %derived.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 0, i32 1) ; (%obj, %derived)
47  ret <2 x ptr addrspace(1)> %derived.relocated
48}
49
50; Originally, this was just a variant of @test2 above, but it ends up
51; covering a bunch of interesting missed optimizations.  Specifically:
52; - We waste a stack slot for a value that a backend transform pass
53;   CSEd to another spilled one.
54; - We don't remove the testb even though it serves no purpose
55; - We could in principal reuse the argument memory (%rsi) and do away
56;   with stack slots entirely.
57define <2 x ptr addrspace(1)> @test3(i1 %cnd, ptr %ptr) gc "statepoint-example" {
58; CHECK-LABEL: test3:
59; CHECK:       # %bb.0: # %entry
60; CHECK-NEXT:    testb $1, %dil
61; CHECK-NEXT:    movaps (%rsi), %xmm0
62; CHECK-NEXT:    subq $40, %rsp
63; CHECK-NEXT:    .cfi_def_cfa_offset 48
64; CHECK-NEXT:    movaps %xmm0, (%rsp)
65; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
66; CHECK-NEXT:    callq do_safepoint@PLT
67; CHECK-NEXT:  .Ltmp2:
68; CHECK-NEXT:    movaps (%rsp), %xmm0
69; CHECK-NEXT:    addq $40, %rsp
70; CHECK-NEXT:    .cfi_def_cfa_offset 8
71; CHECK-NEXT:    retq
72entry:
73  br i1 %cnd, label %taken, label %untaken
74
75taken:                                            ; preds = %entry
76  %obja = load <2 x ptr addrspace(1)>, ptr %ptr
77  br label %merge
78
79untaken:                                          ; preds = %entry
80  %objb = load <2 x ptr addrspace(1)>, ptr %ptr
81  br label %merge
82
83merge:                                            ; preds = %untaken, %taken
84  %obj.base = phi <2 x ptr addrspace(1)> [ %obja, %taken ], [ %objb, %untaken ]
85  %obj = phi <2 x ptr addrspace(1)> [ %obja, %taken ], [ %objb, %untaken ]
86  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> %obj, <2 x ptr addrspace(1)> %obj.base)]
87  %obj.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 1, i32 0) ; (%obj.base, %obj)
88  %obj.relocated.casted = bitcast <2 x ptr addrspace(1)> %obj.relocated to <2 x ptr addrspace(1)>
89  %obj.base.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 1, i32 1) ; (%obj.base, %obj.base)
90  %obj.base.relocated.casted = bitcast <2 x ptr addrspace(1)> %obj.base.relocated to <2 x ptr addrspace(1)>
91  ret <2 x ptr addrspace(1)> %obj.relocated.casted
92}
93
94; Can we handle vector constants?  At the moment, we don't appear to actually
95; get selection dag nodes for these.
96define <2 x ptr addrspace(1)> @test4() gc "statepoint-example" {
97; CHECK-LABEL: test4:
98; CHECK:       # %bb.0: # %entry
99; CHECK-NEXT:    subq $24, %rsp
100; CHECK-NEXT:    .cfi_def_cfa_offset 32
101; CHECK-NEXT:    xorps %xmm0, %xmm0
102; CHECK-NEXT:    movaps %xmm0, (%rsp)
103; CHECK-NEXT:    callq do_safepoint@PLT
104; CHECK-NEXT:  .Ltmp3:
105; CHECK-NEXT:    movaps (%rsp), %xmm0
106; CHECK-NEXT:    addq $24, %rsp
107; CHECK-NEXT:    .cfi_def_cfa_offset 8
108; CHECK-NEXT:    retq
109entry:
110  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> zeroinitializer)]
111  %obj.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 0, i32 0)
112  ret <2 x ptr addrspace(1)> %obj.relocated
113}
114
115; Check that we can lower a constant typed as i128 correctly.  We don't have
116; a representation of larger than 64 bit constant in the StackMap format. At
117; the moment, this simply means spilling them, but there's a potential
118; optimization for values representable as sext(Con64).
119define void @test5() gc "statepoint-example" {
120; CHECK-LABEL: test5:
121; CHECK:       # %bb.0: # %entry
122; CHECK-NEXT:    subq $40, %rsp
123; CHECK-NEXT:    .cfi_def_cfa_offset 48
124; CHECK-NEXT:    xorps %xmm0, %xmm0
125; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
126; CHECK-NEXT:    movq $-1, {{[0-9]+}}(%rsp)
127; CHECK-NEXT:    movq $-1, (%rsp)
128; CHECK-NEXT:    callq do_safepoint@PLT
129; CHECK-NEXT:  .Ltmp4:
130; CHECK-NEXT:    addq $40, %rsp
131; CHECK-NEXT:    .cfi_def_cfa_offset 8
132; CHECK-NEXT:    retq
133entry:
134  %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["deopt" (i128 0, i128 -1)]
135  ret void
136}
137
138; CHECK: __LLVM_StackMaps:
139
140; CHECK: .Ltmp0-test
141; Check for the two spill slots
142; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
143; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
144; CHECK: .byte	3
145; CHECK: .byte	0
146; CHECK: .short 16
147; CHECK: .short	7
148; CHECK: .short	0
149; CHECK: .long	0
150; CHECK: .byte	3
151; CHECK: .byte	0
152; CHECK: .short 16
153; CHECK: .short	7
154; CHECK: .short	0
155; CHECK: .long	0
156
157; CHECK: .Ltmp1-test2
158; Check for the two spill slots
159; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
160; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
161; CHECK: .byte	3
162; CHECK: .byte	0
163; CHECK: .short 16
164; CHECK: .short	7
165; CHECK: .short	0
166; CHECK: .long	0
167; CHECK: .byte	3
168; CHECK: .byte	0
169; CHECK: .short 16
170; CHECK: .short	7
171; CHECK: .short	0
172; CHECK: .long	16
173
174; CHECK: .Ltmp2-test3
175; Check for the four spill slots
176; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
177; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
178; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
179; Stack Maps: 		Loc 6: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
180; CHECK: .byte	3
181; CHECK: .byte	0
182; CHECK: .short 16
183; CHECK: .short	7
184; CHECK: .short	0
185; CHECK: .long	16
186; CHECK: .byte	3
187; CHECK: .byte	 0
188; CHECK: .short 16
189; CHECK: .short	7
190; CHECK: .short	0
191; CHECK: .long	16
192; CHECK: .byte	3
193; CHECK: .byte	 0
194; CHECK: .short 16
195; CHECK: .short	7
196; CHECK: .short	0
197; CHECK: .long	16
198; CHECK: .byte	3
199; CHECK: .byte	 0
200; CHECK: .short 16
201; CHECK: .short	7
202; CHECK: .short	0
203; CHECK: .long	0
204
205declare void @do_safepoint()
206
207declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...)
208declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32)
209declare <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token, i32, i32)
210