1; PR26374: Check no stack slots are allocated for vregs which have no real reference. 2; RUN: llc < %s | FileCheck %s 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4target triple = "x86_64-unknown-linux-gnu" 5 6%struct.ImageParameters = type { i32, i32, [0 x [16 x i16]] } 7%struct.InputParameters = type { i32, i32 } 8 9@c = common global ptr null, align 8 10@a = common global ptr null, align 8 11@d = common global [6 x i32] zeroinitializer, align 16 12@b = common global ptr null, align 8 13@e = common global [4 x i32] zeroinitializer, align 16 14 15; It is not easy to check there is no unused holes in stack allocated for spills, 16; so simply check the size of stack allocated cannot exceed 350. 17; (408 is used before the fix for PR26374. 344 is used after the fix). 18; 19; CHECK-LABEL: @fn 20; CHECK: subq {{\$3[0-4][0-9]}}, %rsp 21 22; Function Attrs: nounwind uwtable 23define i32 @fn() #0 { 24entry: 25 %n = alloca [8 x [8 x i32]], align 16 26 call void @llvm.lifetime.start.p0(i64 256, ptr %n) #3 27 %arraydecay.1 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 1, i64 0 28 %arraydecay.2 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 2, i64 0 29 %arraydecay.3 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 3, i64 0 30 %arraydecay.4 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 4, i64 0 31 %arraydecay.5 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 5, i64 0 32 %arraydecay.6 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 6, i64 0 33 %arraydecay.7 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 7, i64 0 34 br label %for.body 35 36for.body: ; preds = %for.inc73, %entry 37 %q.0131 = phi i32 [ 0, %entry ], [ %inc74, %for.inc73 ] 38 %m.0130 = phi i32 [ 0, %entry ], [ %m.4, %for.inc73 ] 39 %div = sdiv i32 %q.0131, 2 40 %shl = shl i32 %div, 3 41 %rem = srem i32 %q.0131, 2 42 %shl1 = shl nsw i32 %rem, 3 43 %tmp9 = sext i32 %shl1 to i64 44 %tmp10 = sext i32 %shl to i64 45 %tmp11 = or disjoint i32 %shl1, 4 46 %tmp12 = sext i32 %tmp11 to i64 47 %tmp13 = or disjoint i32 %shl, 4 48 %tmp14 = sext i32 %tmp13 to i64 49 br label %for.body4 50 51for.body4: ; preds = %for.inc48, %for.body 52 %indvars.iv148 = phi i64 [ %tmp10, %for.body ], [ %indvars.iv.next149, %for.inc48 ] 53 %m.1126 = phi i32 [ %m.0130, %for.body ], [ %m.3.lcssa, %for.inc48 ] 54 %tmp15 = load ptr, ptr @c, align 8 55 %opix_y = getelementptr inbounds %struct.ImageParameters, ptr %tmp15, i64 0, i32 1 56 %tmp16 = load i32, ptr %opix_y, align 4 57 %tmp17 = trunc i64 %indvars.iv148 to i32 58 %add5 = add nsw i32 %tmp16, %tmp17 59 %tmp18 = sub nuw nsw i64 %indvars.iv148, %tmp10 60 %tmp19 = sext i32 %add5 to i64 61 %tmp20 = add nsw i64 %tmp19, 1 62 %tmp21 = or disjoint i64 %indvars.iv148, 1 63 %tmp22 = or disjoint i64 %tmp18, 1 64 %tmp23 = add nsw i64 %tmp19, 2 65 %tmp24 = or disjoint i64 %indvars.iv148, 2 66 %tmp25 = or disjoint i64 %tmp18, 2 67 %tmp26 = add nsw i64 %tmp19, 3 68 %tmp27 = or disjoint i64 %indvars.iv148, 3 69 %tmp28 = or disjoint i64 %tmp18, 3 70 br label %for.body9 71 72for.body9: ; preds = %for.inc45.for.body9_crit_edge, %for.body4 73 %tmp29 = phi ptr [ %tmp15, %for.body4 ], [ %.pre, %for.inc45.for.body9_crit_edge ] 74 %indvars.iv145 = phi i64 [ %tmp9, %for.body4 ], [ %indvars.iv.next146, %for.inc45.for.body9_crit_edge ] 75 %m.2124 = phi i32 [ %m.1126, %for.body4 ], [ %m.3, %for.inc45.for.body9_crit_edge ] 76 %tmp30 = load i32, ptr %tmp29, align 4 77 %tmp31 = trunc i64 %indvars.iv145 to i32 78 %add10 = add nsw i32 %tmp30, %tmp31 79 tail call void @LumaPrediction4x4(i32 %tmp31, i32 %tmp17, i32 0, i32 0, i32 0, i16 signext 0, i16 signext 0) #3 80 %tmp32 = load ptr, ptr @a, align 8 81 %tmp33 = load ptr, ptr @c, align 8 82 %tmp34 = sub nuw nsw i64 %indvars.iv145, %tmp9 83 %tmp35 = sext i32 %add10 to i64 84 br label %for.cond14.preheader 85 86for.cond14.preheader: ; preds = %for.body9 87 %arrayidx = getelementptr inbounds ptr, ptr %tmp32, i64 %tmp19 88 %tmp36 = load ptr, ptr %arrayidx, align 8 89 %arrayidx20 = getelementptr inbounds i16, ptr %tmp36, i64 %tmp35 90 %arrayidx26 = getelementptr inbounds %struct.ImageParameters, ptr %tmp33, i64 0, i32 2, i64 %indvars.iv148, i64 %indvars.iv145 91 %arrayidx35 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 %tmp18, i64 %tmp34 92 %tmp38 = load <4 x i16>, ptr %arrayidx20, align 2 93 %tmp39 = zext <4 x i16> %tmp38 to <4 x i32> 94 %tmp41 = load <4 x i16>, ptr %arrayidx26, align 2 95 %tmp42 = zext <4 x i16> %tmp41 to <4 x i32> 96 %tmp43 = sub nsw <4 x i32> %tmp39, %tmp42 97 store <4 x i32> %tmp43, ptr %arrayidx35, align 16 98 store <4 x i32> %tmp43, ptr @d, align 16 99 %arrayidx.1 = getelementptr inbounds ptr, ptr %tmp32, i64 %tmp20 100 %tmp45 = load ptr, ptr %arrayidx.1, align 8 101 %arrayidx20.1 = getelementptr inbounds i16, ptr %tmp45, i64 %tmp35 102 %arrayidx26.1 = getelementptr inbounds %struct.ImageParameters, ptr %tmp33, i64 0, i32 2, i64 %tmp21, i64 %indvars.iv145 103 %arrayidx35.1 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 %tmp22, i64 %tmp34 104 %tmp47 = load <4 x i16>, ptr %arrayidx20.1, align 2 105 %tmp48 = zext <4 x i16> %tmp47 to <4 x i32> 106 %tmp50 = load <4 x i16>, ptr %arrayidx26.1, align 2 107 %tmp51 = zext <4 x i16> %tmp50 to <4 x i32> 108 %tmp52 = sub nsw <4 x i32> %tmp48, %tmp51 109 store <4 x i32> %tmp52, ptr %arrayidx35.1, align 16 110 store <4 x i32> %tmp52, ptr getelementptr inbounds ([6 x i32], ptr @d, i64 0, i64 4), align 16 111 %arrayidx.2 = getelementptr inbounds ptr, ptr %tmp32, i64 %tmp23 112 %tmp54 = load ptr, ptr %arrayidx.2, align 8 113 %arrayidx20.2 = getelementptr inbounds i16, ptr %tmp54, i64 %tmp35 114 %arrayidx26.2 = getelementptr inbounds %struct.ImageParameters, ptr %tmp33, i64 0, i32 2, i64 %tmp24, i64 %indvars.iv145 115 %arrayidx35.2 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 %tmp25, i64 %tmp34 116 %tmp56 = load <4 x i16>, ptr %arrayidx20.2, align 2 117 %tmp57 = zext <4 x i16> %tmp56 to <4 x i32> 118 %tmp59 = load <4 x i16>, ptr %arrayidx26.2, align 2 119 %tmp60 = zext <4 x i16> %tmp59 to <4 x i32> 120 %tmp61 = sub nsw <4 x i32> %tmp57, %tmp60 121 store <4 x i32> %tmp61, ptr %arrayidx35.2, align 16 122 store <4 x i32> %tmp61, ptr getelementptr ([6 x i32], ptr @d, i64 1, i64 2), align 16 123 %arrayidx.3 = getelementptr inbounds ptr, ptr %tmp32, i64 %tmp26 124 %tmp63 = load ptr, ptr %arrayidx.3, align 8 125 %arrayidx20.3 = getelementptr inbounds i16, ptr %tmp63, i64 %tmp35 126 %arrayidx26.3 = getelementptr inbounds %struct.ImageParameters, ptr %tmp33, i64 0, i32 2, i64 %tmp27, i64 %indvars.iv145 127 %arrayidx35.3 = getelementptr inbounds [8 x [8 x i32]], ptr %n, i64 0, i64 %tmp28, i64 %tmp34 128 %tmp65 = load <4 x i16>, ptr %arrayidx20.3, align 2 129 %tmp66 = zext <4 x i16> %tmp65 to <4 x i32> 130 %tmp68 = load <4 x i16>, ptr %arrayidx26.3, align 2 131 %tmp69 = zext <4 x i16> %tmp68 to <4 x i32> 132 %tmp70 = sub nsw <4 x i32> %tmp66, %tmp69 133 store <4 x i32> %tmp70, ptr %arrayidx35.3, align 16 134 store <4 x i32> %tmp70, ptr getelementptr ([6 x i32], ptr @d, i64 2, i64 0), align 16 135 %tmp72 = load ptr, ptr @b, align 8 136 %tmp73 = load i32, ptr %tmp72, align 4 137 %cmp42 = icmp eq i32 %tmp73, 0 138 br i1 %cmp42, label %land.lhs.true, label %if.then 139 140land.lhs.true: ; preds = %for.cond14.preheader 141 %Transform8x8Mode = getelementptr inbounds %struct.InputParameters, ptr %tmp72, i64 0, i32 1 142 %tmp74 = load i32, ptr %Transform8x8Mode, align 4 143 %tobool = icmp eq i32 %tmp74, 0 144 br i1 %tobool, label %if.then, label %for.inc45 145 146if.then: ; preds = %land.lhs.true, %for.cond14.preheader 147 %call = tail call i32 @distortion4x4(ptr nonnull @d) #3 148 %add44 = add nsw i32 %call, %m.2124 149 br label %for.inc45 150 151for.inc45: ; preds = %if.then, %land.lhs.true 152 %m.3 = phi i32 [ %m.2124, %land.lhs.true ], [ %add44, %if.then ] 153 %cmp8 = icmp slt i64 %indvars.iv145, %tmp12 154 br i1 %cmp8, label %for.inc45.for.body9_crit_edge, label %for.inc48 155 156for.inc45.for.body9_crit_edge: ; preds = %for.inc45 157 %indvars.iv.next146 = add nsw i64 %indvars.iv145, 4 158 %.pre = load ptr, ptr @c, align 8 159 br label %for.body9 160 161for.inc48: ; preds = %for.inc45 162 %m.3.lcssa = phi i32 [ %m.3, %for.inc45 ] 163 %indvars.iv.next149 = add nsw i64 %indvars.iv148, 4 164 %cmp3 = icmp slt i64 %indvars.iv148, %tmp14 165 br i1 %cmp3, label %for.body4, label %for.end50 166 167for.end50: ; preds = %for.inc48 168 %m.3.lcssa.lcssa = phi i32 [ %m.3.lcssa, %for.inc48 ] 169 %tmp75 = load ptr, ptr @b, align 8 170 %tmp76 = load i32, ptr %tmp75, align 4 171 %cmp52 = icmp eq i32 %tmp76, 0 172 br i1 %cmp52, label %land.lhs.true54, label %for.inc73 173 174land.lhs.true54: ; preds = %for.end50 175 %Transform8x8Mode55 = getelementptr inbounds %struct.InputParameters, ptr %tmp75, i64 0, i32 1 176 %tmp77 = load i32, ptr %Transform8x8Mode55, align 4 177 %tobool56 = icmp eq i32 %tmp77, 0 178 br i1 %tobool56, label %for.inc73, label %for.body61.preheader 179 180for.body61.preheader: ; preds = %land.lhs.true54 181 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 4, i64 0), ptr align 16 %n, i64 32, i1 false) 182 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 6, i64 0), ptr align 16 %arraydecay.1, i64 32, i1 false) 183 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 8, i64 0), ptr align 16 %arraydecay.2, i64 32, i1 false) 184 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 10, i64 0), ptr align 16 %arraydecay.3, i64 32, i1 false) 185 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 12, i64 0), ptr align 16 %arraydecay.4, i64 32, i1 false) 186 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 14, i64 0), ptr align 16 %arraydecay.5, i64 32, i1 false) 187 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 16, i64 0), ptr align 16 %arraydecay.6, i64 32, i1 false) 188 call void @llvm.memcpy.p0.p0.i64(ptr align 16 nonnull getelementptr ([4 x i32], ptr @e, i64 18, i64 0), ptr align 16 %arraydecay.7, i64 32, i1 false) 189 %call70 = tail call i32 @distortion4x4(ptr nonnull @e) #3 190 %add71 = add nsw i32 %call70, %m.3.lcssa.lcssa 191 br label %for.inc73 192 193for.inc73: ; preds = %for.body61.preheader, %land.lhs.true54, %for.end50 194 %m.4 = phi i32 [ %add71, %for.body61.preheader ], [ %m.3.lcssa.lcssa, %land.lhs.true54 ], [ %m.3.lcssa.lcssa, %for.end50 ] 195 %inc74 = add nuw nsw i32 %q.0131, 1 196 %exitcond156 = icmp eq i32 %inc74, 4 197 br i1 %exitcond156, label %for.end75, label %for.body 198 199for.end75: ; preds = %for.inc73 200 %m.4.lcssa = phi i32 [ %m.4, %for.inc73 ] 201 call void @llvm.lifetime.end.p0(i64 256, ptr %n) #3 202 ret i32 %m.4.lcssa 203} 204 205; Function Attrs: argmemonly nounwind 206declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 207 208declare void @LumaPrediction4x4(i32, i32, i32, i32, i32, i16 signext, i16 signext) #2 209 210declare i32 @distortion4x4(ptr) #2 211 212; Function Attrs: argmemonly nounwind 213declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1) #1 214 215; Function Attrs: argmemonly nounwind 216declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 217 218attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } 219attributes #1 = { argmemonly nounwind } 220attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } 221attributes #3 = { nounwind } 222 223