1; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s 2; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -pass-remarks-missed=isel 2>&1 >/dev/null | FileCheck %s --check-prefix=STDERR --allow-empty 3; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s --check-prefix=AVX 4 5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 6target triple = "x86_64-apple-darwin10.0.0" 7 8; Make sure that fast-isel folds the immediate into the binop even though it 9; is non-canonical. 10define i32 @test1(i32 %i) nounwind ssp { 11 %and = and i32 8, %i 12 ret i32 %and 13} 14 15; CHECK-LABEL: test1: 16; CHECK: andl $8, 17 18 19; rdar://9289512 - The load should fold into the compare. 20define void @test2(i64 %x) nounwind ssp { 21entry: 22 %x.addr = alloca i64, align 8 23 store i64 %x, ptr %x.addr, align 8 24 %tmp = load i64, ptr %x.addr, align 8 25 %cmp = icmp sgt i64 %tmp, 42 26 br i1 %cmp, label %if.then, label %if.end 27 28if.then: ; preds = %entry 29 br label %if.end 30 31if.end: ; preds = %if.then, %entry 32 ret void 33; CHECK-LABEL: test2: 34; CHECK: movq %rdi, -8(%rsp) 35; CHECK: cmpq $42, -8(%rsp) 36} 37 38 39 40 41@G = external global i32 42define i64 @test3() nounwind { 43 %A = ptrtoint ptr @G to i64 44 ret i64 %A 45; CHECK-LABEL: test3: 46; CHECK: movq _G@GOTPCREL(%rip), %rax 47; CHECK-NEXT: ret 48} 49 50 51 52; rdar://9289558 53@rtx_length = external global [153 x i8] 54 55define i32 @test4(i64 %idxprom9) nounwind { 56 %arrayidx10 = getelementptr inbounds [153 x i8], ptr @rtx_length, i32 0, i64 %idxprom9 57 %tmp11 = load i8, ptr %arrayidx10, align 1 58 %conv = zext i8 %tmp11 to i32 59 ret i32 %conv 60 61; CHECK-LABEL: test4: 62; CHECK: movq _rtx_length@GOTPCREL(%rip), %rax 63; CHECK-NEXT: movzbl (%rax,%rdi), %eax 64; CHECK-NEXT: ret 65} 66 67 68; PR3242 - Out of range shifts should not be folded by fastisel. 69define void @test5(i32 %x, ptr %p) nounwind { 70 %y = ashr i32 %x, 50000 71 store i32 %y, ptr %p 72 ret void 73 74; CHECK-LABEL: test5: 75; CHECK: movl $50000, %ecx 76; CHECK: sarl %cl, %edi 77; CHECK: ret 78} 79 80; rdar://9289501 - fast isel should fold trivial multiplies to shifts. 81define i64 @test6(i64 %x) nounwind ssp { 82entry: 83 %mul = mul nsw i64 %x, 8 84 ret i64 %mul 85 86; CHECK-LABEL: test6: 87; CHECK: shlq $3, {{%r[a-z]+}} 88} 89 90define i32 @test7(i32 %x) nounwind ssp { 91entry: 92 %mul = mul nsw i32 %x, 8 93 ret i32 %mul 94; CHECK-LABEL: test7: 95; CHECK: shll $3, {{%e[a-z]+}} 96} 97 98 99; rdar://9289507 - folding of immediates into 64-bit operations. 100define i64 @test8(i64 %x) nounwind ssp { 101entry: 102 %add = add nsw i64 %x, 7 103 ret i64 %add 104 105; CHECK-LABEL: test8: 106; CHECK: addq $7, {{%r[a-z]+}} 107} 108 109define i64 @test9(i64 %x) nounwind ssp { 110entry: 111 %add = mul nsw i64 %x, 7 112 ret i64 %add 113; CHECK-LABEL: test9: 114; CHECK: imulq $7, %rdi, %rax 115} 116 117; rdar://9297011 - Don't reject udiv by a power of 2. 118define i32 @test10(i32 %X) nounwind { 119 %Y = udiv i32 %X, 8 120 ret i32 %Y 121; CHECK-LABEL: test10: 122; CHECK: shrl $3, 123} 124 125define i32 @test11(i32 %X) nounwind { 126 %Y = sdiv exact i32 %X, 8 127 ret i32 %Y 128; CHECK-LABEL: test11: 129; CHECK: sarl $3, 130} 131 132 133; rdar://9297006 - Trunc to bool. 134define void @test12(i8 %tmp) nounwind ssp noredzone { 135entry: 136 %tobool = trunc i8 %tmp to i1 137 br i1 %tobool, label %if.then, label %if.end 138 139if.then: ; preds = %entry 140 call void @test12(i8 0) noredzone 141 br label %if.end 142 143if.end: ; preds = %if.then, %entry 144 ret void 145; CHECK-LABEL: test12: 146; CHECK: testb $1, 147; CHECK-NEXT: je L 148; CHECK-NEXT: xorl %edi, %edi 149; CHECK-NEXT: callq 150} 151 152declare void @test13f(i1 %X) 153 154define void @test13() nounwind { 155 call void @test13f(i1 0) 156 ret void 157; CHECK-LABEL: test13: 158; CHECK: xorl %edi, %edi 159; CHECK-NEXT: callq 160} 161 162 163 164; rdar://9297003 - fast isel bails out on all functions taking bools 165define void @test14(i8 %tmp) nounwind ssp noredzone { 166entry: 167 %tobool = trunc i8 %tmp to i1 168 call void @test13f(i1 zeroext %tobool) noredzone 169 ret void 170; CHECK-LABEL: test14: 171; CHECK: andb $1, 172; CHECK: callq 173} 174 175declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) 176 177; rdar://9289488 - fast-isel shouldn't bail out on llvm.memcpy 178define void @test15(ptr %a, ptr %b) nounwind { 179 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %a, ptr align 4 %b, i64 4, i1 false) 180 ret void 181; CHECK-LABEL: test15: 182; CHECK-NEXT: movl (%rsi), %eax 183; CHECK-NEXT: movl %eax, (%rdi) 184; CHECK-NEXT: ret 185} 186 187; Handling for varargs calls 188declare void @test16callee(...) nounwind 189define void @test16() nounwind { 190; CHECK-LABEL: test16: 191; CHECK: movl $1, %edi 192; CHECK: movb $0, %al 193; CHECK: callq _test16callee 194 call void (...) @test16callee(i32 1) 195 br label %block2 196 197block2: 198; CHECK: movsd LCP{{.*}}_{{.*}}(%rip), %xmm0 199; CHECK: movb $1, %al 200; CHECK: callq _test16callee 201 202; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0 203; AVX: movb $1, %al 204; AVX: callq _test16callee 205 call void (...) @test16callee(double 1.000000e+00) 206 ret void 207} 208 209 210declare void @foo() unnamed_addr ssp align 2 211 212; Verify that we don't fold the load into the compare here. That would move it 213; w.r.t. the call. 214define i32 @test17(ptr%P) ssp nounwind { 215entry: 216 %tmp = load i32, ptr %P 217 %cmp = icmp ne i32 %tmp, 5 218 call void @foo() 219 br i1 %cmp, label %if.then, label %if.else 220 221if.then: ; preds = %entry 222 ret i32 1 223 224if.else: ; preds = %entry 225 ret i32 2 226; CHECK-LABEL: test17: 227; CHECK: movl (%rdi), %eax 228; CHECK: callq _foo 229; CHECK: cmpl $5, %eax 230; CHECK-NEXT: je 231} 232 233; Check that 0.0 is materialized using xorps 234define void @test18(ptr %p1) { 235 store float 0.0, ptr %p1 236 ret void 237; CHECK-LABEL: test18: 238; CHECK: xorps 239} 240 241; Without any type hints, doubles use the smaller xorps instead of xorpd. 242define void @test19(ptr %p1) { 243 store double 0.0, ptr %p1 244 ret void 245; CHECK-LABEL: test19: 246; CHECK: xorps 247} 248 249; Check that we fast-isel sret 250%struct.a = type { i64, i64, i64 } 251define void @test20() nounwind ssp { 252entry: 253 %tmp = alloca %struct.a, align 8 254 call void @test20sret(ptr sret(%struct.a) %tmp) 255 ret void 256; CHECK-LABEL: test20: 257; CHECK: movq %rsp, %rdi 258; CHECK: callq _test20sret 259} 260declare void @test20sret(ptr sret(%struct.a)) 261 262; Check that -0.0 is not materialized using xor 263define void @test21(ptr %p1) { 264 store double -0.0, ptr %p1 265 ret void 266; CHECK-LABEL: test21: 267; CHECK-NOT: xor 268; CHECK: movsd LCPI 269} 270 271; Check that immediate arguments to a function 272; do not cause massive spilling and are used 273; as immediates just before the call. 274define void @test22() nounwind { 275entry: 276 call void @foo22(i32 0) 277 call void @foo22(i32 1) 278 call void @foo22(i32 2) 279 call void @foo22(i32 3) 280 ret void 281; CHECK-LABEL: test22: 282; CHECK: xorl %edi, %edi 283; CHECK: callq _foo22 284; CHECK: movl $1, %edi 285; CHECK: callq _foo22 286; CHECK: movl $2, %edi 287; CHECK: callq _foo22 288; CHECK: movl $3, %edi 289; CHECK: callq _foo22 290} 291 292declare void @foo22(i32) 293 294; PR13563 295define void @test23(ptr noalias sret(i8) %result) { 296 %a = alloca i8 297 %b = call ptr @foo23() 298 ret void 299; CHECK-LABEL: test23: 300; CHECK: movq %rdi, [[STACK:[0-9]+\(%rsp\)]] 301; CHECK: call 302; CHECK-NEXT: movq [[STACK]], %rax 303; CHECK-NEXT: addq $24, %rsp 304; CHECK: ret 305} 306 307declare ptr @foo23() 308 309declare void @takesi32ptr(ptr %arg) 310 311; CHECK-LABEL: allocamaterialize 312define void @allocamaterialize() { 313 %a = alloca i32 314; CHECK: leaq {{.*}}, %rdi 315 call void @takesi32ptr(ptr %a) 316 ret void 317} 318 319; STDERR-NOT: FastISel missed terminator: ret void 320; CHECK-LABEL: win64ccfun 321define win64cc void @win64ccfun(i32 %i) { 322; CHECK: ret 323 ret void 324} 325