1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 2; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" 5 6define void @vscale_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 7; CHECK-LABEL: 'vscale_slt' 8; CHECK-NEXT: Classifying expressions for: @vscale_slt 9; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 10; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 11; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 12; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } 13; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 14; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } 15; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale 16; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } 17; CHECK-NEXT: Determining loop execution counts for: @vscale_slt 18; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) 19; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822 20; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) 21; CHECK-NEXT: Loop %for.body: Trip multiple is 1 22; 23entry: 24 %vscale = call i32 @llvm.vscale.i32() 25 %cmp4 = icmp sgt i32 %n, 0 26 br i1 %cmp4, label %for.body, label %for.end 27 28for.body: ; preds = %entry, %for.body 29 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 30 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 31 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 32 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 33 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 34 %add = add nsw i32 %i.05, %vscale 35 %cmp = icmp slt i32 %add, %n 36 br i1 %cmp, label %for.body, label %for.end 37 38for.end: ; preds = %for.body, %entry 39 ret void 40} 41 42define void @vscale_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 43; CHECK-LABEL: 'vscale_ult' 44; CHECK-NEXT: Classifying expressions for: @vscale_ult 45; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 46; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 47; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 48; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } 49; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 50; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } 51; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale 52; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } 53; CHECK-NEXT: Determining loop execution counts for: @vscale_ult 54; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) 55; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 56; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) 57; CHECK-NEXT: Loop %for.body: Trip multiple is 1 58; 59entry: 60 %vscale = call i32 @llvm.vscale.i32() 61 %cmp4 = icmp sgt i32 %n, 0 62 br i1 %cmp4, label %for.body, label %for.end 63 64for.body: ; preds = %entry, %for.body 65 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 66 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 67 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 68 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 69 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 70 %add = add nsw i32 %i.05, %vscale 71 %cmp = icmp ult i32 %add, %n 72 br i1 %cmp, label %for.body, label %for.end 73 74for.end: ; preds = %for.body, %entry 75 ret void 76} 77 78define void @vscale_ule(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 79; CHECK-LABEL: 'vscale_ule' 80; CHECK-NEXT: Classifying expressions for: @vscale_ule 81; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 82; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 83; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 84; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } 85; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 86; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 + vscale)<nsw> umax %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } 87; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale 88; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } 89; CHECK-NEXT: Determining loop execution counts for: @vscale_ule 90; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale) 91; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 92; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale) 93; CHECK-NEXT: Loop %for.body: Trip multiple is 1 94; 95entry: 96 %vscale = call i32 @llvm.vscale.i32() 97 %cmp4 = icmp sgt i32 %n, 0 98 br i1 %cmp4, label %for.body, label %for.end 99 100for.body: ; preds = %entry, %for.body 101 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 102 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 103 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 104 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 105 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 106 %add = add nsw i32 %i.05, %vscale 107 %cmp = icmp ule i32 %add, %n 108 br i1 %cmp, label %for.body, label %for.end 109 110for.end: ; preds = %for.body, %entry 111 ret void 112} 113 114define void @vscale_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 115; CHECK-LABEL: 'vscale_ne' 116; CHECK-NEXT: Classifying expressions for: @vscale_ne 117; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 118; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 119; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 120; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 * vscale)<nsw> + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } 121; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 122; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 * vscale)<nsw> + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } 123; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale 124; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 * vscale)<nsw> + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } 125; CHECK-NEXT: Determining loop execution counts for: @vscale_ne 126; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale) 127; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 128; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale) 129; CHECK-NEXT: Loop %for.body: Trip multiple is 1 130; 131entry: 132 %vscale = call i32 @llvm.vscale.i32() 133 %cmp4 = icmp sgt i32 %n, 0 134 br i1 %cmp4, label %for.body, label %for.end 135 136for.body: ; preds = %entry, %for.body 137 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 138 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 139 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 140 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 141 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 142 %add = add nsw i32 %i.05, %vscale 143 %cmp = icmp ne i32 %add, %n 144 br i1 %cmp, label %for.body, label %for.end 145 146for.end: ; preds = %for.body, %entry 147 ret void 148} 149 150 151define void @vscalex4_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 152; CHECK-LABEL: 'vscalex4_slt' 153; CHECK-NEXT: Classifying expressions for: @vscalex4_slt 154; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 155; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 156; CHECK-NEXT: %VF = mul i32 %vscale, 4 157; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 158; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 159; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } 160; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 161; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } 162; CHECK-NEXT: %add = add nsw i32 %i.05, %VF 163; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } 164; CHECK-NEXT: Determining loop execution counts for: @vscalex4_slt 165; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 166; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454 167; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 168; CHECK-NEXT: Loop %for.body: Trip multiple is 1 169; 170entry: 171 %vscale = call i32 @llvm.vscale.i32() 172 %VF = mul i32 %vscale, 4 173 %cmp4 = icmp sgt i32 %n, 0 174 br i1 %cmp4, label %for.body, label %for.end 175 176for.body: ; preds = %entry, %for.body 177 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 178 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 179 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 180 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 181 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 182 %add = add nsw i32 %i.05, %VF 183 %cmp = icmp slt i32 %add, %n 184 br i1 %cmp, label %for.body, label %for.end 185 186for.end: ; preds = %for.body, %entry 187 ret void 188} 189 190define void @vscalex4_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 191; CHECK-LABEL: 'vscalex4_ult' 192; CHECK-NEXT: Classifying expressions for: @vscalex4_ult 193; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 194; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 195; CHECK-NEXT: %VF = mul i32 %vscale, 4 196; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 197; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 198; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } 199; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 200; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } 201; CHECK-NEXT: %add = add nsw i32 %i.05, %VF 202; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } 203; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult 204; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 205; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910 206; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 207; CHECK-NEXT: Loop %for.body: Trip multiple is 1 208; 209entry: 210 %vscale = call i32 @llvm.vscale.i32() 211 %VF = mul i32 %vscale, 4 212 %cmp4 = icmp sgt i32 %n, 0 213 br i1 %cmp4, label %for.body, label %for.end 214 215for.body: ; preds = %entry, %for.body 216 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 217 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 218 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 219 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 220 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 221 %add = add nsw i32 %i.05, %VF 222 %cmp = icmp ult i32 %add, %n 223 br i1 %cmp, label %for.body, label %for.end 224 225for.end: ; preds = %for.body, %entry 226 ret void 227} 228 229 230define void @vscale_slt_with_vp_plain(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 231; CHECK-LABEL: 'vscale_slt_with_vp_plain' 232; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_plain 233; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 234; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 235; CHECK-NEXT: %VF = mul i32 %vscale, 4 236; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 237; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 238; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } 239; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 240; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } 241; CHECK-NEXT: %add = add nsw i32 %i.05, %VF 242; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } 243; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_plain 244; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 245; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454 246; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 247; CHECK-NEXT: Loop %for.body: Trip multiple is 1 248; 249entry: 250 %vscale = call i32 @llvm.vscale.i32() 251 %VF = mul i32 %vscale, 4 252 %cmp4 = icmp sgt i32 %n, 0 253 br i1 %cmp4, label %for.body, label %for.end 254 255for.body: ; preds = %entry, %for.body 256 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 257 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 258 259 %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF) 260 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 261 call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF) 262 263 %add = add nsw i32 %i.05, %VF 264 %cmp = icmp slt i32 %add, %n 265 br i1 %cmp, label %for.body, label %for.end 266 267for.end: ; preds = %for.body, %entry 268 ret void 269} 270 271define void @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 272; CHECK-LABEL: 'vscale_slt_with_vp_umin' 273; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_umin 274; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 275; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 276; CHECK-NEXT: %VF = mul i32 %vscale, 4 277; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 278; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 279; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } 280; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 281; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } 282; CHECK-NEXT: %left = sub i32 %n, %i.05 283; CHECK-NEXT: --> {%n,+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) LoopDispositions: { %for.body: Computable } 284; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) 285; CHECK-NEXT: --> ((4 * vscale)<nuw><nsw> umin {%n,+,(-4 * vscale)<nsw>}<nw><%for.body>) U: [0,4097) S: [0,4097) Exits: (((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) umin (4 * vscale)<nuw><nsw>) LoopDispositions: { %for.body: Computable } 286; CHECK-NEXT: %add = add nsw i32 %i.05, %VF 287; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } 288; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_umin 289; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 290; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454 291; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 292; CHECK-NEXT: Loop %for.body: Trip multiple is 1 293; 294entry: 295 %vscale = call i32 @llvm.vscale.i32() 296 %VF = mul i32 %vscale, 4 297 %cmp4 = icmp sgt i32 %n, 0 298 br i1 %cmp4, label %for.body, label %for.end 299 300for.body: ; preds = %entry, %for.body 301 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 302 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 303 304 %left = sub i32 %n, %i.05 305 %VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left) 306 307 %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) 308 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 309 call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) 310 311 %add = add nsw i32 %i.05, %VF 312 %cmp = icmp slt i32 %add, %n 313 br i1 %cmp, label %for.body, label %for.end 314 315for.end: ; preds = %for.body, %entry 316 ret void 317} 318 319define void @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 320; CHECK-LABEL: 'vscale_slt_with_vp_umin2' 321; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_umin2 322; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 323; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 324; CHECK-NEXT: %VF = mul i32 %vscale, 4 325; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 326; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 327; CHECK-NEXT: --> %i.05 U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } 328; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 329; CHECK-NEXT: --> ((4 * %i.05) + %A) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } 330; CHECK-NEXT: %left = sub i32 %n, %i.05 331; CHECK-NEXT: --> ((-1 * %i.05)<nsw> + %n) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } 332; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) 333; CHECK-NEXT: --> (((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) U: [0,4097) S: [0,4097) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } 334; CHECK-NEXT: %add = add nsw i32 %i.05, %VF.capped 335; CHECK-NEXT: --> ((((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) + %i.05)<nuw><nsw> U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } 336; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_umin2 337; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. 338; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. 339; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. 340; 341entry: 342 %vscale = call i32 @llvm.vscale.i32() 343 %VF = mul i32 %vscale, 4 344 %cmp4 = icmp sgt i32 %n, 0 345 br i1 %cmp4, label %for.body, label %for.end 346 347for.body: ; preds = %entry, %for.body 348 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 349 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 350 351 %left = sub i32 %n, %i.05 352 %VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left) 353 354 %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) 355 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 356 call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) 357 358 %add = add nsw i32 %i.05, %VF.capped 359 %cmp = icmp slt i32 %add, %n 360 br i1 %cmp, label %for.body, label %for.end 361 362for.end: ; preds = %for.body, %entry 363 ret void 364} 365 366; The next two cases check to see if we can infer the flags on the IV 367; of a countup loop using vscale strides. vscale is a power of two 368; and these are finite loops by assumption. 369 370define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 371; CHECK-LABEL: 'vscale_slt_noflags' 372; CHECK-NEXT: Classifying expressions for: @vscale_slt_noflags 373; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 374; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 375; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 376; CHECK-NEXT: --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } 377; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 378; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } 379; CHECK-NEXT: %add = add i32 %i.05, %vscale 380; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } 381; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_noflags 382; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) 383; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822 384; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) 385; CHECK-NEXT: Loop %for.body: Trip multiple is 1 386; 387entry: 388 %vscale = call i32 @llvm.vscale.i32() 389 %cmp4 = icmp sgt i32 %n, 0 390 br i1 %cmp4, label %for.body, label %for.end 391 392for.body: ; preds = %entry, %for.body 393 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 394 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 395 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 396 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 397 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 398 %add = add i32 %i.05, %vscale 399 %cmp = icmp slt i32 %add, %n 400 br i1 %cmp, label %for.body, label %for.end 401 402for.end: ; preds = %for.body, %entry 403 ret void 404} 405 406define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 407; CHECK-LABEL: 'vscalex4_ult_noflags' 408; CHECK-NEXT: Classifying expressions for: @vscalex4_ult_noflags 409; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 410; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 411; CHECK-NEXT: %VF = mul i32 %vscale, 4 412; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 413; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 414; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } 415; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 416; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } 417; CHECK-NEXT: %add = add i32 %i.05, %VF 418; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: [8,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } 419; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult_noflags 420; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 421; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910 422; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) 423; CHECK-NEXT: Loop %for.body: Trip multiple is 1 424; 425entry: 426 %vscale = call i32 @llvm.vscale.i32() 427 %VF = mul i32 %vscale, 4 428 %cmp4 = icmp sgt i32 %n, 0 429 br i1 %cmp4, label %for.body, label %for.end 430 431for.body: ; preds = %entry, %for.body 432 %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] 433 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 434 %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 435 %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) 436 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 437 %add = add i32 %i.05, %VF 438 %cmp = icmp ult i32 %add, %n 439 br i1 %cmp, label %for.body, label %for.end 440 441for.end: ; preds = %for.body, %entry 442 ret void 443} 444 445; The next two cases check to see if we can infer the flags on the IV 446; of a countdown loop using vscale strides. 447 448define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 449; CHECK-LABEL: 'vscale_countdown_ne' 450; CHECK-NEXT: Classifying expressions for: @vscale_countdown_ne 451; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 452; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 453; CHECK-NEXT: %start = sub i32 %n, %vscale 454; CHECK-NEXT: --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set 455; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] 456; CHECK-NEXT: --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable } 457; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv 458; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable } 459; CHECK-NEXT: %sub = sub i32 %iv, %vscale 460; CHECK-NEXT: --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable } 461; CHECK-NEXT: Determining loop execution counts for: @vscale_countdown_ne 462; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale) 463; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 464; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale) 465; CHECK-NEXT: Loop %for.body: Trip multiple is 1 466; 467entry: 468 %vscale = call i32 @llvm.vscale.i32() 469 %cmp4 = icmp sgt i32 %n, 0 470 %start = sub i32 %n, %vscale 471 br i1 %cmp4, label %for.body, label %for.end 472 473for.body: ; preds = %entry, %for.body 474 %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] 475 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv 476 %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4 477 %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1) 478 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 479 %sub = sub i32 %iv, %vscale 480 %cmp = icmp ne i32 %sub, 0 481 br i1 %cmp, label %for.body, label %for.end 482 483for.end: ; preds = %for.body, %entry 484 ret void 485} 486 487define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { 488; CHECK-LABEL: 'vscalex4_countdown_ne' 489; CHECK-NEXT: Classifying expressions for: @vscalex4_countdown_ne 490; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() 491; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) 492; CHECK-NEXT: %VF = shl i32 %vscale, 2 493; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) 494; CHECK-NEXT: %start = sub i32 %n, %VF 495; CHECK-NEXT: --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set 496; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] 497; CHECK-NEXT: --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable } 498; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv 499; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable } 500; CHECK-NEXT: %sub = sub i32 %iv, %VF 501; CHECK-NEXT: --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable } 502; CHECK-NEXT: Determining loop execution counts for: @vscalex4_countdown_ne 503; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>) 504; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870911 505; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>) 506; CHECK-NEXT: Loop %for.body: Trip multiple is 1 507; 508entry: 509 %vscale = call i32 @llvm.vscale.i32() 510 %VF = shl i32 %vscale, 2 511 %cmp4 = icmp sgt i32 %n, 0 512 %start = sub i32 %n, %VF 513 br i1 %cmp4, label %for.body, label %for.end 514 515for.body: ; preds = %entry, %for.body 516 %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] 517 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv 518 %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4 519 %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1) 520 store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 521 %sub = sub i32 %iv, %VF 522 %cmp = icmp ne i32 %sub, 0 523 br i1 %cmp, label %for.body, label %for.end 524 525for.end: ; preds = %for.body, %entry 526 ret void 527} 528