1; RUN: llc -mtriple=i686-windows -mattr=+sse2 < %s | FileCheck %s 2 3target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" 4target triple = "i686-pc-windows-msvc" 5 6; There is a conflict between lowering the X86 memory intrinsics and the "base" 7; register used to address stack locals. See X86RegisterInfo::hasBaseRegister 8; for when this is necessary. Typically, we chose ESI for the base register, 9; which all of the X86 string instructions use. 10 11declare void @escape_vla_and_icmp(ptr, i1 zeroext) 12declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture readonly, i32, i1) 13declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) 14 15define i32 @memcpy_novla_vector(ptr %vp0, ptr %a, ptr %b, i32 %n, i1 zeroext %cond) { 16 %foo = alloca <4 x i32>, align 16 17 call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 4 %b, i32 128, i1 false) 18 br i1 %cond, label %spill_vectors, label %no_vectors 19 20no_vectors: 21 ret i32 0 22 23spill_vectors: 24 %vp1 = getelementptr <4 x i32>, ptr %vp0, i32 1 25 %v0 = load <4 x i32>, ptr %vp0 26 %v1 = load <4 x i32>, ptr %vp1 27 %vicmp = icmp slt <4 x i32> %v0, %v1 28 %icmp = extractelement <4 x i1> %vicmp, i32 0 29 call void @escape_vla_and_icmp(ptr null, i1 zeroext %icmp) 30 %r = extractelement <4 x i32> %v0, i32 0 31 ret i32 %r 32} 33 34; CHECK-LABEL: _memcpy_novla_vector: 35; CHECK: andl $-16, %esp 36; CHECK-DAG: movl $32, %ecx 37; CHECK-DAG: movl {{.*}}, %esi 38; CHECK-DAG: movl {{.*}}, %edi 39; CHECK: rep;movsl 40 41define i32 @memcpy_vla_vector(ptr %vp0, ptr %a, ptr %b, i32 %n, i1 zeroext %cond) { 42 %foo = alloca <4 x i32>, align 16 43 call void @llvm.memcpy.p0.p0.i32(ptr align 4 %a, ptr align 4 %b, i32 128, i1 false) 44 br i1 %cond, label %spill_vectors, label %no_vectors 45 46no_vectors: 47 ret i32 0 48 49spill_vectors: 50 %vp1 = getelementptr <4 x i32>, ptr %vp0, i32 1 51 %v0 = load <4 x i32>, ptr %vp0 52 %v1 = load <4 x i32>, ptr %vp1 53 %vicmp = icmp slt <4 x i32> %v0, %v1 54 %icmp = extractelement <4 x i1> %vicmp, i32 0 55 %vla = alloca i8, i32 %n 56 call void @escape_vla_and_icmp(ptr %vla, i1 zeroext %icmp) 57 %r = extractelement <4 x i32> %v0, i32 0 58 ret i32 %r 59} 60 61; CHECK-LABEL: _memcpy_vla_vector: 62; CHECK: andl $-16, %esp 63; CHECK: movl %esp, %esi 64; CHECK: pushl $128 65; CHECK: calll _memcpy 66; CHECK: calll __chkstk 67 68; stosd doesn't clobber esi, so we can use it. 69 70define i32 @memset_vla_vector(ptr %vp0, ptr %a, i32 %n, i1 zeroext %cond) { 71 %foo = alloca <4 x i32>, align 16 72 call void @llvm.memset.p0.i32(ptr align 4 %a, i8 42, i32 128, i1 false) 73 br i1 %cond, label %spill_vectors, label %no_vectors 74 75no_vectors: 76 ret i32 0 77 78spill_vectors: 79 %vp1 = getelementptr <4 x i32>, ptr %vp0, i32 1 80 %v0 = load <4 x i32>, ptr %vp0 81 %v1 = load <4 x i32>, ptr %vp1 82 %vicmp = icmp slt <4 x i32> %v0, %v1 83 %icmp = extractelement <4 x i1> %vicmp, i32 0 84 %vla = alloca i8, i32 %n 85 call void @escape_vla_and_icmp(ptr %vla, i1 zeroext %icmp) 86 %r = extractelement <4 x i32> %v0, i32 0 87 ret i32 %r 88} 89 90; CHECK-LABEL: _memset_vla_vector: 91; CHECK: andl $-16, %esp 92; CHECK: movl %esp, %esi 93; CHECK-DAG: movl $707406378, %eax # imm = 0x2A2A2A2A 94; CHECK-DAG: movl $32, %ecx 95; CHECK-DAG: movl {{.*}}, %edi 96; CHECK-NOT: movl {{.*}}, %esi 97; CHECK: rep;stosl 98 99; Add a test for memcmp if we ever add a special lowering for it. 100