1; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-unknown" 5 6; pr18846 - needless avx spill/reload 7; Test for unnecessary repeated spills due to eliminateRedundantSpills failing 8; to recognise unaligned ymm load/stores to the stack. 9; Bugpoint reduced testcase. 10 11;CHECK-LABEL: _Z16opt_kernel_cachePfS_S_ 12;CHECK-NOT: vmovups {{.*#+}} 32-byte Folded Spill 13;CHECK-NOT: vmovups {{.*#+}} 32-byte Folded Reload 14 15; Function Attrs: uwtable 16define void @_Z16opt_kernel_cachePfS_S_() #0 { 17entry: 18 br label %for.body29 19 20for.body29: ; preds = %for.body29, %entry 21 br i1 undef, label %for.body29, label %for.body65 22 23for.body65: ; preds = %for.body29 24 %0 = load float, ptr undef, align 4, !tbaa !1 25 %vecinit7.i4448 = insertelement <8 x float> undef, float %0, i32 7 26 %1 = load float, ptr null, align 4, !tbaa !1 27 %vecinit7.i4304 = insertelement <8 x float> undef, float %1, i32 7 28 %2 = load float, ptr undef, align 4, !tbaa !1 29 %vecinit7.i4196 = insertelement <8 x float> undef, float %2, i32 7 30 %3 = or i64 0, 16 31 %add.ptr111.sum4096 = add i64 %3, 0 32 %4 = load <8 x float>, ptr null, align 16, !tbaa !5 33 %add.ptr162 = getelementptr inbounds [65536 x float], ptr null, i64 0, i64 %add.ptr111.sum4096 34 %5 = load <8 x float>, ptr %add.ptr162, align 16, !tbaa !5 35 %add.ptr158.sum40975066 = or i64 %add.ptr111.sum4096, 8 36 %add.ptr183 = getelementptr inbounds [65536 x float], ptr null, i64 0, i64 %add.ptr158.sum40975066 37 %6 = load <8 x float>, ptr %add.ptr183, align 16, !tbaa !5 38 %add.ptr200.sum40995067 = or i64 undef, 8 39 %add.ptr225 = getelementptr inbounds [65536 x float], ptr null, i64 0, i64 %add.ptr200.sum40995067 40 %7 = load <8 x float>, ptr %add.ptr225, align 4, !tbaa !5 41 %8 = load <8 x float>, ptr undef, align 16, !tbaa !5 42 %add.ptr242.sum41015068 = or i64 0, 8 43 %add.ptr267 = getelementptr inbounds [65536 x float], ptr null, i64 0, i64 %add.ptr242.sum41015068 44 %9 = load <8 x float>, ptr %add.ptr267, align 4, !tbaa !5 45 %mul.i4690 = fmul <8 x float> %7, undef 46 %add.i4665 = fadd <8 x float> undef, undef 47 %mul.i4616 = fmul <8 x float> %8, undef 48 %mul.i4598 = fmul <8 x float> undef, undef 49 %add.i4597 = fadd <8 x float> undef, %mul.i4598 50 %mul.i4594 = fmul <8 x float> %6, undef 51 %add.i4593 = fadd <8 x float> undef, %mul.i4594 52 %mul.i4578 = fmul <8 x float> %9, undef 53 %add.i4577 = fadd <8 x float> %add.i4593, %mul.i4578 54 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4577) #1 55 %10 = load <8 x float>, ptr null, align 16, !tbaa !5 56 %11 = load <8 x float>, ptr undef, align 16, !tbaa !5 57 %mul.i4564 = fmul <8 x float> %4, undef 58 %add.i4563 = fadd <8 x float> %10, %mul.i4564 59 %mul.i4560 = fmul <8 x float> %5, undef 60 %add.i4559 = fadd <8 x float> %11, %mul.i4560 61 %add.i4547 = fadd <8 x float> %add.i4563, undef 62 %mul.i4546 = fmul <8 x float> %7, undef 63 %add.i4545 = fadd <8 x float> undef, %mul.i4546 64 %mul.i4544 = fmul <8 x float> %8, undef 65 %add.i4543 = fadd <8 x float> %add.i4559, %mul.i4544 66 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4547) #1 67 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4545) #1 68 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4543) #1 69 %add.i4455 = fadd <8 x float> undef, undef 70 %mul.i4454 = fmul <8 x float> undef, undef 71 %add.i4453 = fadd <8 x float> undef, %mul.i4454 72 %mul.i4440 = fmul <8 x float> zeroinitializer, %vecinit7.i4448 73 %add.i4439 = fadd <8 x float> %add.i4455, %mul.i4440 74 %mul.i4438 = fmul <8 x float> %7, %vecinit7.i4448 75 %add.i4437 = fadd <8 x float> %add.i4453, %mul.i4438 76 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4439) #1 77 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4437) #1 78 %add.i4413 = fadd <8 x float> zeroinitializer, undef 79 %mul.i4400 = fmul <8 x float> %8, undef 80 %add.i4399 = fadd <8 x float> undef, %mul.i4400 81 %add.i4397 = fadd <8 x float> %add.i4413, zeroinitializer 82 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> zeroinitializer) #1 83 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4399) #1 84 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4397) #1 85 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> undef) #1 86 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> undef) #1 87 %mul.i4330 = fmul <8 x float> %7, undef 88 %add.i4329 = fadd <8 x float> undef, %mul.i4330 89 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4329) #1 90 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> undef) #1 91 %mul.i4312 = fmul <8 x float> %4, undef 92 %add.i4311 = fadd <8 x float> undef, %mul.i4312 93 %mul.i4306 = fmul <8 x float> %6, undef 94 %add.i4305 = fadd <8 x float> undef, %mul.i4306 95 %add.i4295 = fadd <8 x float> %add.i4311, undef 96 %mul.i4294 = fmul <8 x float> %7, %vecinit7.i4304 97 %add.i4293 = fadd <8 x float> undef, %mul.i4294 98 %mul.i4292 = fmul <8 x float> %8, %vecinit7.i4304 99 %add.i4291 = fadd <8 x float> undef, %mul.i4292 100 %mul.i4290 = fmul <8 x float> %9, %vecinit7.i4304 101 %add.i4289 = fadd <8 x float> %add.i4305, %mul.i4290 102 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4295) #1 103 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4293) #1 104 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4291) #1 105 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4289) #1 106 %12 = load <8 x float>, ptr undef, align 16, !tbaa !5 107 %mul.i4274 = fmul <8 x float> undef, undef 108 %add.i4273 = fadd <8 x float> %12, %mul.i4274 109 %mul.i4258 = fmul <8 x float> %7, undef 110 %add.i4257 = fadd <8 x float> %add.i4273, %mul.i4258 111 %mul.i4254 = fmul <8 x float> %9, undef 112 %add.i4253 = fadd <8 x float> undef, %mul.i4254 113 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4257) #1 114 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i4253) #1 115 %mul.i = fmul <8 x float> %9, %vecinit7.i4196 116 %add.i = fadd <8 x float> undef, %mul.i 117 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> zeroinitializer) #1 118 call void @llvm.x86.avx.storeu.ps.256(ptr undef, <8 x float> %add.i) #1 119 unreachable 120} 121 122; Function Attrs: nounwind 123declare void @llvm.x86.avx.storeu.ps.256(ptr, <8 x float>) #1 124 125attributes #0 = { uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 126attributes #1 = { nounwind } 127 128!llvm.ident = !{!0} 129 130!0 = !{!"clang version 3.5 "} 131!1 = !{!2, !2, i64 0} 132!2 = !{!"float", !3, i64 0} 133!3 = !{!"omnipotent char", !4, i64 0} 134!4 = !{!"Simple C/C++ TBAA"} 135!5 = !{!3, !3, i64 0} 136