1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 63; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 64; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 65; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 66; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 67; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 68; TAILFOLD: pred.store.if: 69; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 70; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 71; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 72; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 73; TAILFOLD: pred.store.continue: 74; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 75; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 76; TAILFOLD: pred.store.if1: 77; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 78; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] 79; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 80; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 81; TAILFOLD: pred.store.continue2: 82; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 83; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 84; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 85; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 86; TAILFOLD: middle.block: 87; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 88; TAILFOLD: scalar.ph: 89; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 90; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 91; TAILFOLD: for.cond: 92; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 93; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 94; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 95; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 96; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 97; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 98; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]] 99; TAILFOLD: if.end: 100; TAILFOLD-NEXT: ret void 101; 102entry: 103 br label %for.cond 104 105for.cond: 106 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 107 %iprom = sext i32 %i to i64 108 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 109 store i16 0, i16* %b, align 4 110 %inc = add nsw i32 %i, 1 111 %cmp = icmp slt i32 %i, %n 112 br i1 %cmp, label %for.cond, label %if.end 113 114if.end: 115 ret void 116} 117 118define void @early_exit(i16* %p, i32 %n) { 119; CHECK-LABEL: @early_exit( 120; CHECK-NEXT: entry: 121; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 122; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 123; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 124; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 125; CHECK: vector.ph: 126; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 127; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 128; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 129; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 130; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 131; CHECK: vector.body: 132; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 133; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 134; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 135; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 136; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 137; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 138; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 139; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 140; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 141; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 142; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 143; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 144; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 145; CHECK: middle.block: 146; CHECK-NEXT: br label [[SCALAR_PH]] 147; CHECK: scalar.ph: 148; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 149; CHECK-NEXT: br label [[FOR_COND:%.*]] 150; CHECK: for.cond: 151; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 152; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 153; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 154; CHECK: for.body: 155; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 156; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 157; CHECK-NEXT: store i16 0, i16* [[B]], align 4 158; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 159; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] 160; CHECK: if.end: 161; CHECK-NEXT: ret void 162; 163; TAILFOLD-LABEL: @early_exit( 164; TAILFOLD-NEXT: entry: 165; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 166; TAILFOLD: for.cond: 167; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 168; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 169; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 170; TAILFOLD: for.body: 171; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 172; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 173; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 174; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 175; TAILFOLD-NEXT: br label [[FOR_COND]] 176; TAILFOLD: if.end: 177; TAILFOLD-NEXT: ret void 178; 179entry: 180 br label %for.cond 181 182for.cond: 183 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 184 %cmp = icmp slt i32 %i, %n 185 br i1 %cmp, label %for.body, label %if.end 186 187for.body: 188 %iprom = sext i32 %i to i64 189 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 190 store i16 0, i16* %b, align 4 191 %inc = add nsw i32 %i, 1 192 br label %for.cond 193 194if.end: 195 ret void 196} 197 198; Same as early_exit, but with optsize to prevent the use of 199; a scalar epilogue. -- Can't vectorize this in either case. 200define void @optsize(i16* %p, i32 %n) optsize { 201; CHECK-LABEL: @optsize( 202; CHECK-NEXT: entry: 203; CHECK-NEXT: br label [[FOR_COND:%.*]] 204; CHECK: for.cond: 205; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 206; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 207; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 208; CHECK: for.body: 209; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 210; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 211; CHECK-NEXT: store i16 0, i16* [[B]], align 4 212; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 213; CHECK-NEXT: br label [[FOR_COND]] 214; CHECK: if.end: 215; CHECK-NEXT: ret void 216; 217; TAILFOLD-LABEL: @optsize( 218; TAILFOLD-NEXT: entry: 219; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 220; TAILFOLD: for.cond: 221; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 222; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 223; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 224; TAILFOLD: for.body: 225; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 226; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 227; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 228; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 229; TAILFOLD-NEXT: br label [[FOR_COND]] 230; TAILFOLD: if.end: 231; TAILFOLD-NEXT: ret void 232; 233entry: 234 br label %for.cond 235 236for.cond: 237 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 238 %cmp = icmp slt i32 %i, %n 239 br i1 %cmp, label %for.body, label %if.end 240 241for.body: 242 %iprom = sext i32 %i to i64 243 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 244 store i16 0, i16* %b, align 4 245 %inc = add nsw i32 %i, 1 246 br label %for.cond 247 248if.end: 249 ret void 250} 251 252 253; multiple exit - no values inside the loop used outside 254define void @multiple_unique_exit(i16* %p, i32 %n) { 255; CHECK-LABEL: @multiple_unique_exit( 256; CHECK-NEXT: entry: 257; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 258; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 259; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 260; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 261; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 262; CHECK: vector.ph: 263; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 264; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 265; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 266; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 267; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 268; CHECK: vector.body: 269; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 270; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 271; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 272; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 273; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 274; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 275; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 276; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 277; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 278; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 279; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 280; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 281; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 282; CHECK: middle.block: 283; CHECK-NEXT: br label [[SCALAR_PH]] 284; CHECK: scalar.ph: 285; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 286; CHECK-NEXT: br label [[FOR_COND:%.*]] 287; CHECK: for.cond: 288; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 289; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 290; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 291; CHECK: for.body: 292; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 293; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 294; CHECK-NEXT: store i16 0, i16* [[B]], align 4 295; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 296; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 297; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]] 298; CHECK: if.end: 299; CHECK-NEXT: ret void 300; 301; TAILFOLD-LABEL: @multiple_unique_exit( 302; TAILFOLD-NEXT: entry: 303; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 304; TAILFOLD: for.cond: 305; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 306; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 307; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 308; TAILFOLD: for.body: 309; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 310; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 311; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 312; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 313; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 314; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 315; TAILFOLD: if.end: 316; TAILFOLD-NEXT: ret void 317; 318entry: 319 br label %for.cond 320 321for.cond: 322 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 323 %cmp = icmp slt i32 %i, %n 324 br i1 %cmp, label %for.body, label %if.end 325 326for.body: 327 %iprom = sext i32 %i to i64 328 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 329 store i16 0, i16* %b, align 4 330 %inc = add nsw i32 %i, 1 331 %cmp2 = icmp slt i32 %i, 2096 332 br i1 %cmp2, label %for.cond, label %if.end 333 334if.end: 335 ret void 336} 337 338; multiple exit - with an lcssa phi 339define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 340; CHECK-LABEL: @multiple_unique_exit2( 341; CHECK-NEXT: entry: 342; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 343; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 344; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 345; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 346; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 347; CHECK: vector.ph: 348; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 349; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 350; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 351; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 352; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 353; CHECK: vector.body: 354; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 355; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 356; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 357; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 358; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 359; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 360; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 361; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 362; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 363; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 364; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 365; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 366; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 367; CHECK: middle.block: 368; CHECK-NEXT: br label [[SCALAR_PH]] 369; CHECK: scalar.ph: 370; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 371; CHECK-NEXT: br label [[FOR_COND:%.*]] 372; CHECK: for.cond: 373; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 374; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 375; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 376; CHECK: for.body: 377; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 378; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 379; CHECK-NEXT: store i16 0, i16* [[B]], align 4 380; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 381; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 382; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]] 383; CHECK: if.end: 384; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 385; CHECK-NEXT: ret i32 [[I_LCSSA]] 386; 387; TAILFOLD-LABEL: @multiple_unique_exit2( 388; TAILFOLD-NEXT: entry: 389; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 390; TAILFOLD: for.cond: 391; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 392; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 393; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 394; TAILFOLD: for.body: 395; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 396; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 397; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 398; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 399; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 400; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 401; TAILFOLD: if.end: 402; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 403; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 404; 405entry: 406 br label %for.cond 407 408for.cond: 409 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 410 %cmp = icmp slt i32 %i, %n 411 br i1 %cmp, label %for.body, label %if.end 412 413for.body: 414 %iprom = sext i32 %i to i64 415 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 416 store i16 0, i16* %b, align 4 417 %inc = add nsw i32 %i, 1 418 %cmp2 = icmp slt i32 %i, 2096 419 br i1 %cmp2, label %for.cond, label %if.end 420 421if.end: 422 ret i32 %i 423} 424 425; multiple exit w/a non lcssa phi 426define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 427; CHECK-LABEL: @multiple_unique_exit3( 428; CHECK-NEXT: entry: 429; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 430; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 431; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 432; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 433; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 434; CHECK: vector.ph: 435; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 436; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 437; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 438; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 439; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 440; CHECK: vector.body: 441; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 442; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 443; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 444; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 445; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 446; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 447; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 448; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 449; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 450; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 451; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 452; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 453; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 454; CHECK: middle.block: 455; CHECK-NEXT: br label [[SCALAR_PH]] 456; CHECK: scalar.ph: 457; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 458; CHECK-NEXT: br label [[FOR_COND:%.*]] 459; CHECK: for.cond: 460; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 461; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 462; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 463; CHECK: for.body: 464; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 465; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 466; CHECK-NEXT: store i16 0, i16* [[B]], align 4 467; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 468; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 469; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]] 470; CHECK: if.end: 471; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 472; CHECK-NEXT: ret i32 [[EXIT]] 473; 474; TAILFOLD-LABEL: @multiple_unique_exit3( 475; TAILFOLD-NEXT: entry: 476; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 477; TAILFOLD: for.cond: 478; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 479; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 480; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 481; TAILFOLD: for.body: 482; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 483; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 484; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 485; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 486; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 487; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 488; TAILFOLD: if.end: 489; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 490; TAILFOLD-NEXT: ret i32 [[EXIT]] 491; 492entry: 493 br label %for.cond 494 495for.cond: 496 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 497 %cmp = icmp slt i32 %i, %n 498 br i1 %cmp, label %for.body, label %if.end 499 500for.body: 501 %iprom = sext i32 %i to i64 502 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 503 store i16 0, i16* %b, align 4 504 %inc = add nsw i32 %i, 1 505 %cmp2 = icmp slt i32 %i, 2096 506 br i1 %cmp2, label %for.cond, label %if.end 507 508if.end: 509 %exit = phi i32 [0, %for.cond], [1, %for.body] 510 ret i32 %exit 511} 512 513; multiple exits w/distinct target blocks 514define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 515; CHECK-LABEL: @multiple_exit_blocks( 516; CHECK-NEXT: entry: 517; CHECK-NEXT: br label [[FOR_COND:%.*]] 518; CHECK: for.cond: 519; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 520; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 521; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 522; CHECK: for.body: 523; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 524; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 525; CHECK-NEXT: store i16 0, i16* [[B]], align 4 526; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 527; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 528; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 529; CHECK: if.end: 530; CHECK-NEXT: ret i32 0 531; CHECK: if.end2: 532; CHECK-NEXT: ret i32 1 533; 534; TAILFOLD-LABEL: @multiple_exit_blocks( 535; TAILFOLD-NEXT: entry: 536; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 537; TAILFOLD: for.cond: 538; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 539; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 540; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 541; TAILFOLD: for.body: 542; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 543; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 544; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 545; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 546; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 547; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 548; TAILFOLD: if.end: 549; TAILFOLD-NEXT: ret i32 0 550; TAILFOLD: if.end2: 551; TAILFOLD-NEXT: ret i32 1 552; 553entry: 554 br label %for.cond 555 556for.cond: 557 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 558 %cmp = icmp slt i32 %i, %n 559 br i1 %cmp, label %for.body, label %if.end 560 561for.body: 562 %iprom = sext i32 %i to i64 563 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 564 store i16 0, i16* %b, align 4 565 %inc = add nsw i32 %i, 1 566 %cmp2 = icmp slt i32 %i, 2096 567 br i1 %cmp2, label %for.cond, label %if.end2 568 569if.end: 570 ret i32 0 571 572if.end2: 573 ret i32 1 574} 575 576; LCSSA, common value each exit 577define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 578; CHECK-LABEL: @multiple_exit_blocks2( 579; CHECK-NEXT: entry: 580; CHECK-NEXT: br label [[FOR_COND:%.*]] 581; CHECK: for.cond: 582; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 583; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 584; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 585; CHECK: for.body: 586; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 587; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 588; CHECK-NEXT: store i16 0, i16* [[B]], align 4 589; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 590; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 591; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 592; CHECK: if.end: 593; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 594; CHECK-NEXT: ret i32 [[I_LCSSA]] 595; CHECK: if.end2: 596; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 597; CHECK-NEXT: ret i32 [[I_LCSSA1]] 598; 599; TAILFOLD-LABEL: @multiple_exit_blocks2( 600; TAILFOLD-NEXT: entry: 601; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 602; TAILFOLD: for.cond: 603; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 604; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 605; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 606; TAILFOLD: for.body: 607; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 608; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 609; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 610; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 611; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 612; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 613; TAILFOLD: if.end: 614; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 615; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 616; TAILFOLD: if.end2: 617; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 618; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 619; 620entry: 621 br label %for.cond 622 623for.cond: 624 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 625 %cmp = icmp slt i32 %i, %n 626 br i1 %cmp, label %for.body, label %if.end 627 628for.body: 629 %iprom = sext i32 %i to i64 630 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 631 store i16 0, i16* %b, align 4 632 %inc = add nsw i32 %i, 1 633 %cmp2 = icmp slt i32 %i, 2096 634 br i1 %cmp2, label %for.cond, label %if.end2 635 636if.end: 637 ret i32 %i 638 639if.end2: 640 ret i32 %i 641} 642 643; LCSSA, distinct value each exit 644define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 645; CHECK-LABEL: @multiple_exit_blocks3( 646; CHECK-NEXT: entry: 647; CHECK-NEXT: br label [[FOR_COND:%.*]] 648; CHECK: for.cond: 649; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 650; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 651; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 652; CHECK: for.body: 653; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 654; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 655; CHECK-NEXT: store i16 0, i16* [[B]], align 4 656; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 657; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 658; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 659; CHECK: if.end: 660; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 661; CHECK-NEXT: ret i32 [[I_LCSSA]] 662; CHECK: if.end2: 663; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 664; CHECK-NEXT: ret i32 [[INC_LCSSA]] 665; 666; TAILFOLD-LABEL: @multiple_exit_blocks3( 667; TAILFOLD-NEXT: entry: 668; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 669; TAILFOLD: for.cond: 670; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 671; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 672; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 673; TAILFOLD: for.body: 674; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 675; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 676; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 677; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 678; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 679; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 680; TAILFOLD: if.end: 681; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 682; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 683; TAILFOLD: if.end2: 684; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 685; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 686; 687entry: 688 br label %for.cond 689 690for.cond: 691 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 692 %cmp = icmp slt i32 %i, %n 693 br i1 %cmp, label %for.body, label %if.end 694 695for.body: 696 %iprom = sext i32 %i to i64 697 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 698 store i16 0, i16* %b, align 4 699 %inc = add nsw i32 %i, 1 700 %cmp2 = icmp slt i32 %i, 2096 701 br i1 %cmp2, label %for.cond, label %if.end2 702 703if.end: 704 ret i32 %i 705 706if.end2: 707 ret i32 %inc 708} 709 710; unique exit case but with a switch as two edges between the same pair of 711; blocks is an often missed edge case 712define i32 @multiple_exit_switch(i16* %p, i32 %n) { 713; CHECK-LABEL: @multiple_exit_switch( 714; CHECK-NEXT: entry: 715; CHECK-NEXT: br label [[FOR_COND:%.*]] 716; CHECK: for.cond: 717; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 718; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 719; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 720; CHECK-NEXT: store i16 0, i16* [[B]], align 4 721; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 722; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 723; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 724; CHECK-NEXT: i32 2097, label [[IF_END]] 725; CHECK-NEXT: ] 726; CHECK: if.end: 727; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 728; CHECK-NEXT: ret i32 [[I_LCSSA]] 729; 730; TAILFOLD-LABEL: @multiple_exit_switch( 731; TAILFOLD-NEXT: entry: 732; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 733; TAILFOLD: for.cond: 734; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 735; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 736; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 737; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 738; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 739; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 740; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 741; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 742; TAILFOLD-NEXT: ] 743; TAILFOLD: if.end: 744; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 745; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 746; 747entry: 748 br label %for.cond 749 750for.cond: 751 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 752 %iprom = sext i32 %i to i64 753 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 754 store i16 0, i16* %b, align 4 755 %inc = add nsw i32 %i, 1 756 switch i32 %i, label %for.cond [ 757 i32 2096, label %if.end 758 i32 2097, label %if.end 759 ] 760 761if.end: 762 ret i32 %i 763} 764 765; multiple exit case but with a switch as multiple exiting edges from 766; a single block is a commonly missed edge case 767define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 768; CHECK-LABEL: @multiple_exit_switch2( 769; CHECK-NEXT: entry: 770; CHECK-NEXT: br label [[FOR_COND:%.*]] 771; CHECK: for.cond: 772; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 773; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 774; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 775; CHECK-NEXT: store i16 0, i16* [[B]], align 4 776; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 777; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 778; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 779; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 780; CHECK-NEXT: ] 781; CHECK: if.end: 782; CHECK-NEXT: ret i32 0 783; CHECK: if.end2: 784; CHECK-NEXT: ret i32 1 785; 786; TAILFOLD-LABEL: @multiple_exit_switch2( 787; TAILFOLD-NEXT: entry: 788; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 789; TAILFOLD: for.cond: 790; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 791; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 792; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 793; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 794; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 795; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 796; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 797; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 798; TAILFOLD-NEXT: ] 799; TAILFOLD: if.end: 800; TAILFOLD-NEXT: ret i32 0 801; TAILFOLD: if.end2: 802; TAILFOLD-NEXT: ret i32 1 803; 804entry: 805 br label %for.cond 806 807for.cond: 808 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 809 %iprom = sext i32 %i to i64 810 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 811 store i16 0, i16* %b, align 4 812 %inc = add nsw i32 %i, 1 813 switch i32 %i, label %for.cond [ 814 i32 2096, label %if.end 815 i32 2097, label %if.end2 816 ] 817 818if.end: 819 ret i32 0 820 821if.end2: 822 ret i32 1 823} 824 825define i32 @multiple_latch1(i16* %p) { 826; CHECK-LABEL: @multiple_latch1( 827; CHECK-NEXT: entry: 828; CHECK-NEXT: br label [[FOR_BODY:%.*]] 829; CHECK: for.body: 830; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 831; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 832; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 833; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 834; CHECK: for.second: 835; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 836; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 837; CHECK-NEXT: store i16 0, i16* [[B]], align 4 838; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 839; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 840; CHECK: for.body.backedge: 841; CHECK-NEXT: br label [[FOR_BODY]] 842; CHECK: for.end: 843; CHECK-NEXT: ret i32 0 844; 845; TAILFOLD-LABEL: @multiple_latch1( 846; TAILFOLD-NEXT: entry: 847; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 848; TAILFOLD: for.body: 849; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 850; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 851; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 852; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 853; TAILFOLD: for.second: 854; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 855; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 856; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 857; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 858; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 859; TAILFOLD: for.body.backedge: 860; TAILFOLD-NEXT: br label [[FOR_BODY]] 861; TAILFOLD: for.end: 862; TAILFOLD-NEXT: ret i32 0 863; 864entry: 865 br label %for.body 866 867for.body: 868 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 869 %inc = add nsw i32 %i.02, 1 870 %cmp = icmp slt i32 %inc, 16 871 br i1 %cmp, label %for.body.backedge, label %for.second 872 873for.second: 874 %iprom = sext i32 %i.02 to i64 875 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 876 store i16 0, i16* %b, align 4 877 %cmps = icmp sgt i32 %inc, 16 878 br i1 %cmps, label %for.body.backedge, label %for.end 879 880for.body.backedge: 881 br label %for.body 882 883for.end: 884 ret i32 0 885} 886 887 888; two back branches - loop simplify with convert this to the same form 889; as previous before vectorizer sees it, but show that. 890define i32 @multiple_latch2(i16* %p) { 891; CHECK-LABEL: @multiple_latch2( 892; CHECK-NEXT: entry: 893; CHECK-NEXT: br label [[FOR_BODY:%.*]] 894; CHECK: for.body: 895; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 896; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 897; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 898; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 899; CHECK: for.body.backedge: 900; CHECK-NEXT: br label [[FOR_BODY]] 901; CHECK: for.second: 902; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 903; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 904; CHECK-NEXT: store i16 0, i16* [[B]], align 4 905; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 906; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 907; CHECK: for.end: 908; CHECK-NEXT: ret i32 0 909; 910; TAILFOLD-LABEL: @multiple_latch2( 911; TAILFOLD-NEXT: entry: 912; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 913; TAILFOLD: for.body: 914; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 915; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 916; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 917; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 918; TAILFOLD: for.body.backedge: 919; TAILFOLD-NEXT: br label [[FOR_BODY]] 920; TAILFOLD: for.second: 921; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 922; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 923; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 924; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 925; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 926; TAILFOLD: for.end: 927; TAILFOLD-NEXT: ret i32 0 928; 929entry: 930 br label %for.body 931 932for.body: 933 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 934 %inc = add nsw i32 %i.02, 1 935 %cmp = icmp slt i32 %inc, 16 936 br i1 %cmp, label %for.body, label %for.second 937 938for.second: 939 %iprom = sext i32 %i.02 to i64 940 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 941 store i16 0, i16* %b, align 4 942 %cmps = icmp sgt i32 %inc, 16 943 br i1 %cmps, label %for.body, label %for.end 944 945for.end: 946 ret i32 0 947} 948 949 950; Check interaction between block predication and early exits. We need the 951; condition on the early exit to remain dead (i.e. not be used when forming 952; the predicate mask). 953define void @scalar_predication(float* %addr) { 954; CHECK-LABEL: @scalar_predication( 955; CHECK-NEXT: entry: 956; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 957; CHECK: vector.ph: 958; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 959; CHECK: vector.body: 960; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 961; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 962; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 963; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 964; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 965; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 966; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 967; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 968; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 969; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 970; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 971; CHECK: pred.store.if: 972; CHECK-NEXT: store float 1.000000e+01, float* [[TMP1]], align 4 973; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 974; CHECK: pred.store.continue: 975; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 976; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 977; CHECK: pred.store.if1: 978; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 979; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]] 980; CHECK-NEXT: store float 1.000000e+01, float* [[TMP9]], align 4 981; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 982; CHECK: pred.store.continue2: 983; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 984; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 985; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 986; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 987; CHECK: middle.block: 988; CHECK-NEXT: br label [[SCALAR_PH]] 989; CHECK: scalar.ph: 990; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 991; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 992; CHECK: loop.header: 993; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 994; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 995; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 996; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 997; CHECK: loop.body: 998; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[GEP]], align 4 999; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00 1000; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1001; CHECK: then: 1002; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1003; CHECK-NEXT: br label [[LOOP_LATCH]] 1004; CHECK: loop.latch: 1005; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1006; CHECK-NEXT: br label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]] 1007; CHECK: exit: 1008; CHECK-NEXT: ret void 1009; 1010; TAILFOLD-LABEL: @scalar_predication( 1011; TAILFOLD-NEXT: entry: 1012; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1013; TAILFOLD: loop.header: 1014; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1015; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1016; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1017; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1018; TAILFOLD: loop.body: 1019; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1020; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1021; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1022; TAILFOLD: then: 1023; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1024; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1025; TAILFOLD: loop.latch: 1026; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1027; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1028; TAILFOLD: exit: 1029; TAILFOLD-NEXT: ret void 1030; 1031entry: 1032 br label %loop.header 1033 1034loop.header: 1035 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1036 %gep = getelementptr float, float* %addr, i64 %iv 1037 %exitcond.not = icmp eq i64 %iv, 200 1038 br i1 %exitcond.not, label %exit, label %loop.body 1039 1040loop.body: 1041 %0 = load float, float* %gep, align 4 1042 %pred = fcmp oeq float %0, 0.0 1043 br i1 %pred, label %loop.latch, label %then 1044 1045then: 1046 store float 10.0, float* %gep, align 4 1047 br label %loop.latch 1048 1049loop.latch: 1050 %iv.next = add nuw nsw i64 %iv, 1 1051 br label %loop.header 1052 1053exit: 1054 ret void 1055} 1056 1057define i32 @me_reduction(i32* %addr) { 1058; CHECK-LABEL: @me_reduction( 1059; CHECK-NEXT: entry: 1060; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1061; CHECK: vector.ph: 1062; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1063; CHECK: vector.body: 1064; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1065; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1066; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1067; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1068; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 1069; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1070; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 1071; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 1072; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 1073; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1074; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 1075; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1076; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1077; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 1078; CHECK: middle.block: 1079; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) 1080; CHECK-NEXT: br label [[SCALAR_PH]] 1081; CHECK: scalar.ph: 1082; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1083; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1084; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1085; CHECK: loop.header: 1086; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1087; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1088; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1089; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1090; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1091; CHECK: loop.latch: 1092; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 1093; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 1094; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1095; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1096; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]] 1097; CHECK: exit: 1098; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1099; CHECK-NEXT: ret i32 [[LCSSA]] 1100; 1101; TAILFOLD-LABEL: @me_reduction( 1102; TAILFOLD-NEXT: entry: 1103; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1104; TAILFOLD: loop.header: 1105; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1106; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1107; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1108; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1109; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1110; TAILFOLD: loop.latch: 1111; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1112; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1113; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1114; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1115; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1116; TAILFOLD: exit: 1117; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1118; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1119; 1120entry: 1121 br label %loop.header 1122 1123loop.header: 1124 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1125 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1126 %gep = getelementptr i32, i32* %addr, i64 %iv 1127 %exitcond.not = icmp eq i64 %iv, 200 1128 br i1 %exitcond.not, label %exit, label %loop.latch 1129 1130loop.latch: 1131 %0 = load i32, i32* %gep, align 4 1132 %accum.next = add i32 %accum, %0 1133 %iv.next = add nuw nsw i64 %iv, 1 1134 %exitcond2.not = icmp eq i64 %iv, 400 1135 br i1 %exitcond2.not, label %exit, label %loop.header 1136 1137exit: 1138 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1139 ret i32 %lcssa 1140} 1141 1142; TODO: The current definition of reduction is too strict, we can vectorize 1143; this. There's an analogous single exit case where we extract the N-1 1144; value of the reduction that we can also handle. If we fix the later, the 1145; multiple exit case probably falls out. 1146define i32 @me_reduction2(i32* %addr) { 1147; CHECK-LABEL: @me_reduction2( 1148; CHECK-NEXT: entry: 1149; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1150; CHECK: loop.header: 1151; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1152; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1153; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1154; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1155; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1156; CHECK: loop.latch: 1157; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1158; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1159; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1160; CHECK-NEXT: br label [[LOOP_HEADER]] 1161; CHECK: exit: 1162; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1163; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1164; 1165; TAILFOLD-LABEL: @me_reduction2( 1166; TAILFOLD-NEXT: entry: 1167; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1168; TAILFOLD: loop.header: 1169; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1170; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1171; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1172; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1173; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1174; TAILFOLD: loop.latch: 1175; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1176; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1177; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1178; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1179; TAILFOLD: exit: 1180; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1181; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1182; 1183entry: 1184 br label %loop.header 1185 1186loop.header: 1187 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1188 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1189 %gep = getelementptr i32, i32* %addr, i64 %iv 1190 %exitcond.not = icmp eq i64 %iv, 200 1191 br i1 %exitcond.not, label %exit, label %loop.latch 1192 1193loop.latch: 1194 %0 = load i32, i32* %gep, align 4 1195 %accum.next = add i32 %accum, %0 1196 %iv.next = add nuw nsw i64 %iv, 1 1197 br label %loop.header 1198 1199exit: 1200 ret i32 %accum 1201} 1202 1203