xref: /llvm-project/llvm/test/CodeGen/X86/fast-isel-x86-64.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s
2; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -pass-remarks-missed=isel 2>&1 >/dev/null | FileCheck %s --check-prefix=STDERR --allow-empty
3; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s --check-prefix=AVX
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
6target triple = "x86_64-apple-darwin10.0.0"
7
8; Make sure that fast-isel folds the immediate into the binop even though it
9; is non-canonical.
10define i32 @test1(i32 %i) nounwind ssp {
11  %and = and i32 8, %i
12  ret i32 %and
13}
14
15; CHECK-LABEL: test1:
16; CHECK: andl	$8,
17
18
19; rdar://9289512 - The load should fold into the compare.
20define void @test2(i64 %x) nounwind ssp {
21entry:
22  %x.addr = alloca i64, align 8
23  store i64 %x, ptr %x.addr, align 8
24  %tmp = load i64, ptr %x.addr, align 8
25  %cmp = icmp sgt i64 %tmp, 42
26  br i1 %cmp, label %if.then, label %if.end
27
28if.then:                                          ; preds = %entry
29  br label %if.end
30
31if.end:                                           ; preds = %if.then, %entry
32  ret void
33; CHECK-LABEL: test2:
34; CHECK: movq	%rdi, -8(%rsp)
35; CHECK: cmpq	$42, -8(%rsp)
36}
37
38
39
40
41@G = external global i32
42define i64 @test3() nounwind {
43  %A = ptrtoint ptr @G to i64
44  ret i64 %A
45; CHECK-LABEL: test3:
46; CHECK: movq _G@GOTPCREL(%rip), %rax
47; CHECK-NEXT: ret
48}
49
50
51
52; rdar://9289558
53@rtx_length = external global [153 x i8]
54
55define i32 @test4(i64 %idxprom9) nounwind {
56  %arrayidx10 = getelementptr inbounds [153 x i8], ptr @rtx_length, i32 0, i64 %idxprom9
57  %tmp11 = load i8, ptr %arrayidx10, align 1
58  %conv = zext i8 %tmp11 to i32
59  ret i32 %conv
60
61; CHECK-LABEL: test4:
62; CHECK: movq	_rtx_length@GOTPCREL(%rip), %rax
63; CHECK-NEXT: movzbl	(%rax,%rdi), %eax
64; CHECK-NEXT: ret
65}
66
67
68; PR3242 - Out of range shifts should not be folded by fastisel.
69define void @test5(i32 %x, ptr %p) nounwind {
70  %y = ashr i32 %x, 50000
71  store i32 %y, ptr %p
72  ret void
73
74; CHECK-LABEL: test5:
75; CHECK: movl	$50000, %ecx
76; CHECK: sarl	%cl, %edi
77; CHECK: ret
78}
79
80; rdar://9289501 - fast isel should fold trivial multiplies to shifts.
81define i64 @test6(i64 %x) nounwind ssp {
82entry:
83  %mul = mul nsw i64 %x, 8
84  ret i64 %mul
85
86; CHECK-LABEL: test6:
87; CHECK: shlq	$3, {{%r[a-z]+}}
88}
89
90define i32 @test7(i32 %x) nounwind ssp {
91entry:
92  %mul = mul nsw i32 %x, 8
93  ret i32 %mul
94; CHECK-LABEL: test7:
95; CHECK: shll	$3, {{%e[a-z]+}}
96}
97
98
99; rdar://9289507 - folding of immediates into 64-bit operations.
100define i64 @test8(i64 %x) nounwind ssp {
101entry:
102  %add = add nsw i64 %x, 7
103  ret i64 %add
104
105; CHECK-LABEL: test8:
106; CHECK: addq	$7, {{%r[a-z]+}}
107}
108
109define i64 @test9(i64 %x) nounwind ssp {
110entry:
111  %add = mul nsw i64 %x, 7
112  ret i64 %add
113; CHECK-LABEL: test9:
114; CHECK: imulq	$7, %rdi, %rax
115}
116
117; rdar://9297011 - Don't reject udiv by a power of 2.
118define i32 @test10(i32 %X) nounwind {
119  %Y = udiv i32 %X, 8
120  ret i32 %Y
121; CHECK-LABEL: test10:
122; CHECK: shrl	$3,
123}
124
125define i32 @test11(i32 %X) nounwind {
126  %Y = sdiv exact i32 %X, 8
127  ret i32 %Y
128; CHECK-LABEL: test11:
129; CHECK: sarl	$3,
130}
131
132
133; rdar://9297006 - Trunc to bool.
134define void @test12(i8 %tmp) nounwind ssp noredzone {
135entry:
136  %tobool = trunc i8 %tmp to i1
137  br i1 %tobool, label %if.then, label %if.end
138
139if.then:                                          ; preds = %entry
140  call void @test12(i8 0) noredzone
141  br label %if.end
142
143if.end:                                           ; preds = %if.then, %entry
144  ret void
145; CHECK-LABEL: test12:
146; CHECK: testb	$1,
147; CHECK-NEXT: je L
148; CHECK-NEXT: xorl %edi, %edi
149; CHECK-NEXT: callq
150}
151
152declare void @test13f(i1 %X)
153
154define void @test13() nounwind {
155  call void @test13f(i1 0)
156  ret void
157; CHECK-LABEL: test13:
158; CHECK: xorl %edi, %edi
159; CHECK-NEXT: callq
160}
161
162
163
164; rdar://9297003 - fast isel bails out on all functions taking bools
165define void @test14(i8 %tmp) nounwind ssp noredzone {
166entry:
167  %tobool = trunc i8 %tmp to i1
168  call void @test13f(i1 zeroext %tobool) noredzone
169  ret void
170; CHECK-LABEL: test14:
171; CHECK: andb	$1,
172; CHECK: callq
173}
174
175declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
176
177; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy
178define void @test15(ptr %a, ptr %b) nounwind {
179  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %b, i64 4, i1 false)
180  ret void
181; CHECK-LABEL: test15:
182; CHECK-NEXT: movl	(%rsi), %eax
183; CHECK-NEXT: movl	%eax, (%rdi)
184; CHECK-NEXT: ret
185}
186
187; Handling for varargs calls
188declare void @test16callee(...) nounwind
189define void @test16() nounwind {
190; CHECK-LABEL: test16:
191; CHECK: movl $1, %edi
192; CHECK: movb $0, %al
193; CHECK: callq _test16callee
194  call void (...) @test16callee(i32 1)
195  br label %block2
196
197block2:
198; CHECK: movsd LCP{{.*}}_{{.*}}(%rip), %xmm0
199; CHECK: movb $1, %al
200; CHECK: callq _test16callee
201
202; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0
203; AVX: movb $1, %al
204; AVX: callq _test16callee
205  call void (...) @test16callee(double 1.000000e+00)
206  ret void
207}
208
209
210declare void @foo() unnamed_addr ssp align 2
211
212; Verify that we don't fold the load into the compare here.  That would move it
213; w.r.t. the call.
214define i32 @test17(ptr%P) ssp nounwind {
215entry:
216  %tmp = load i32, ptr %P
217  %cmp = icmp ne i32 %tmp, 5
218  call void @foo()
219  br i1 %cmp, label %if.then, label %if.else
220
221if.then:                                          ; preds = %entry
222  ret i32 1
223
224if.else:                                          ; preds = %entry
225  ret i32 2
226; CHECK-LABEL: test17:
227; CHECK: movl	(%rdi), %eax
228; CHECK: callq _foo
229; CHECK: cmpl	$5, %eax
230; CHECK-NEXT: je
231}
232
233; Check that 0.0 is materialized using xorps
234define void @test18(ptr %p1) {
235  store float 0.0, ptr %p1
236  ret void
237; CHECK-LABEL: test18:
238; CHECK: xorps
239}
240
241; Without any type hints, doubles use the smaller xorps instead of xorpd.
242define void @test19(ptr %p1) {
243  store double 0.0, ptr %p1
244  ret void
245; CHECK-LABEL: test19:
246; CHECK: xorps
247}
248
249; Check that we fast-isel sret
250%struct.a = type { i64, i64, i64 }
251define void @test20() nounwind ssp {
252entry:
253  %tmp = alloca %struct.a, align 8
254  call void @test20sret(ptr sret(%struct.a) %tmp)
255  ret void
256; CHECK-LABEL: test20:
257; CHECK: movq %rsp, %rdi
258; CHECK: callq _test20sret
259}
260declare void @test20sret(ptr sret(%struct.a))
261
262; Check that -0.0 is not materialized using xor
263define void @test21(ptr %p1) {
264  store double -0.0, ptr %p1
265  ret void
266; CHECK-LABEL: test21:
267; CHECK-NOT: xor
268; CHECK: movsd	LCPI
269}
270
271; Check that immediate arguments to a function
272; do not cause massive spilling and are used
273; as immediates just before the call.
274define void @test22() nounwind {
275entry:
276  call void @foo22(i32 0)
277  call void @foo22(i32 1)
278  call void @foo22(i32 2)
279  call void @foo22(i32 3)
280  ret void
281; CHECK-LABEL: test22:
282; CHECK: xorl	%edi, %edi
283; CHECK: callq	_foo22
284; CHECK: movl	$1, %edi
285; CHECK: callq	_foo22
286; CHECK: movl	$2, %edi
287; CHECK: callq	_foo22
288; CHECK: movl	$3, %edi
289; CHECK: callq	_foo22
290}
291
292declare void @foo22(i32)
293
294; PR13563
295define void @test23(ptr noalias sret(i8) %result) {
296  %a = alloca i8
297  %b = call ptr @foo23()
298  ret void
299; CHECK-LABEL: test23:
300; CHECK: movq %rdi, [[STACK:[0-9]+\(%rsp\)]]
301; CHECK: call
302; CHECK-NEXT: movq [[STACK]], %rax
303; CHECK-NEXT: addq $24, %rsp
304; CHECK: ret
305}
306
307declare ptr @foo23()
308
309declare void @takesi32ptr(ptr %arg)
310
311; CHECK-LABEL: allocamaterialize
312define void @allocamaterialize() {
313  %a = alloca i32
314; CHECK: leaq {{.*}}, %rdi
315  call void @takesi32ptr(ptr %a)
316  ret void
317}
318
319; STDERR-NOT: FastISel missed terminator:   ret void
320; CHECK-LABEL: win64ccfun
321define win64cc void @win64ccfun(i32 %i) {
322; CHECK: ret
323  ret void
324}
325