1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s 3; RUN: opt -S -loop-vectorize -force-vector-width=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s 4 5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 6 7define void @bottom_tested(i16* %p, i32 %n) { 8; CHECK-LABEL: @bottom_tested( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 11; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 21; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 22; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]] 23; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 24; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>* 25; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4 26; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 27; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 28; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 29; CHECK: middle.block: 30; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 31; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 32; CHECK: scalar.ph: 33; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 34; CHECK-NEXT: br label [[FOR_COND:%.*]] 35; CHECK: for.cond: 36; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 37; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 38; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 39; CHECK-NEXT: store i16 0, i16* [[B]], align 4 40; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 41; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 42; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] 43; CHECK: if.end: 44; CHECK-NEXT: ret void 45; 46; TAILFOLD-LABEL: @bottom_tested( 47; TAILFOLD-NEXT: entry: 48; TAILFOLD-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 49; TAILFOLD-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 50; TAILFOLD-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 51; TAILFOLD: vector.ph: 52; TAILFOLD-NEXT: [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1 53; TAILFOLD-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2 54; TAILFOLD-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] 55; TAILFOLD-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1 56; TAILFOLD-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0 57; TAILFOLD-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer 58; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] 59; TAILFOLD: vector.body: 60; TAILFOLD-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 61; TAILFOLD-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 62; TAILFOLD-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 63; TAILFOLD-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 1 64; TAILFOLD-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 65; TAILFOLD-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64> 66; TAILFOLD-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 67; TAILFOLD-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 68; TAILFOLD: pred.store.if: 69; TAILFOLD-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0 70; TAILFOLD-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]] 71; TAILFOLD-NEXT: store i16 0, i16* [[TMP7]], align 4 72; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE]] 73; TAILFOLD: pred.store.continue: 74; TAILFOLD-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 75; TAILFOLD-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 76; TAILFOLD: pred.store.if1: 77; TAILFOLD-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1 78; TAILFOLD-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]] 79; TAILFOLD-NEXT: store i16 0, i16* [[TMP10]], align 4 80; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] 81; TAILFOLD: pred.store.continue2: 82; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 83; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 84; TAILFOLD-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 85; TAILFOLD-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 86; TAILFOLD: middle.block: 87; TAILFOLD-NEXT: br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]] 88; TAILFOLD: scalar.ph: 89; TAILFOLD-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 90; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 91; TAILFOLD: for.cond: 92; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ] 93; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 94; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 95; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 96; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 97; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 98; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]] 99; TAILFOLD: if.end: 100; TAILFOLD-NEXT: ret void 101; 102entry: 103 br label %for.cond 104 105for.cond: 106 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 107 %iprom = sext i32 %i to i64 108 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 109 store i16 0, i16* %b, align 4 110 %inc = add nsw i32 %i, 1 111 %cmp = icmp slt i32 %i, %n 112 br i1 %cmp, label %for.cond, label %if.end 113 114if.end: 115 ret void 116} 117 118define void @early_exit(i16* %p, i32 %n) { 119; CHECK-LABEL: @early_exit( 120; CHECK-NEXT: entry: 121; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 122; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 123; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 124; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 125; CHECK: vector.ph: 126; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 127; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 128; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 129; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 130; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 131; CHECK: vector.body: 132; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 133; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 134; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 135; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 136; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 137; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 138; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 139; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 140; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 141; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 142; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 143; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 144; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]] 145; CHECK: middle.block: 146; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 147; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 148; CHECK: scalar.ph: 149; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 150; CHECK-NEXT: br label [[FOR_COND:%.*]] 151; CHECK: for.cond: 152; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 153; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 154; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 155; CHECK: for.body: 156; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 157; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 158; CHECK-NEXT: store i16 0, i16* [[B]], align 4 159; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 160; CHECK-NEXT: br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]] 161; CHECK: if.end: 162; CHECK-NEXT: ret void 163; 164; TAILFOLD-LABEL: @early_exit( 165; TAILFOLD-NEXT: entry: 166; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 167; TAILFOLD: for.cond: 168; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 169; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 170; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 171; TAILFOLD: for.body: 172; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 173; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 174; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 175; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 176; TAILFOLD-NEXT: br label [[FOR_COND]] 177; TAILFOLD: if.end: 178; TAILFOLD-NEXT: ret void 179; 180entry: 181 br label %for.cond 182 183for.cond: 184 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 185 %cmp = icmp slt i32 %i, %n 186 br i1 %cmp, label %for.body, label %if.end 187 188for.body: 189 %iprom = sext i32 %i to i64 190 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 191 store i16 0, i16* %b, align 4 192 %inc = add nsw i32 %i, 1 193 br label %for.cond 194 195if.end: 196 ret void 197} 198 199; Same as early_exit, but with optsize to prevent the use of 200; a scalar epilogue. -- Can't vectorize this in either case. 201define void @optsize(i16* %p, i32 %n) optsize { 202; CHECK-LABEL: @optsize( 203; CHECK-NEXT: entry: 204; CHECK-NEXT: br label [[FOR_COND:%.*]] 205; CHECK: for.cond: 206; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 207; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 208; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 209; CHECK: for.body: 210; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 211; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 212; CHECK-NEXT: store i16 0, i16* [[B]], align 4 213; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 214; CHECK-NEXT: br label [[FOR_COND]] 215; CHECK: if.end: 216; CHECK-NEXT: ret void 217; 218; TAILFOLD-LABEL: @optsize( 219; TAILFOLD-NEXT: entry: 220; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 221; TAILFOLD: for.cond: 222; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 223; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 224; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 225; TAILFOLD: for.body: 226; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 227; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 228; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 229; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 230; TAILFOLD-NEXT: br label [[FOR_COND]] 231; TAILFOLD: if.end: 232; TAILFOLD-NEXT: ret void 233; 234entry: 235 br label %for.cond 236 237for.cond: 238 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 239 %cmp = icmp slt i32 %i, %n 240 br i1 %cmp, label %for.body, label %if.end 241 242for.body: 243 %iprom = sext i32 %i to i64 244 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 245 store i16 0, i16* %b, align 4 246 %inc = add nsw i32 %i, 1 247 br label %for.cond 248 249if.end: 250 ret void 251} 252 253 254; multiple exit - no values inside the loop used outside 255define void @multiple_unique_exit(i16* %p, i32 %n) { 256; CHECK-LABEL: @multiple_unique_exit( 257; CHECK-NEXT: entry: 258; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 259; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 260; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 261; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 262; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 263; CHECK: vector.ph: 264; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 265; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 266; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 267; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 268; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 269; CHECK: vector.body: 270; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 271; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 272; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 273; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 274; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 275; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 276; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 277; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 278; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 279; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 280; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 281; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 282; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]] 283; CHECK: middle.block: 284; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 285; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 286; CHECK: scalar.ph: 287; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 288; CHECK-NEXT: br label [[FOR_COND:%.*]] 289; CHECK: for.cond: 290; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 291; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 292; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 293; CHECK: for.body: 294; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 295; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 296; CHECK-NEXT: store i16 0, i16* [[B]], align 4 297; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 298; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 299; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]] 300; CHECK: if.end: 301; CHECK-NEXT: ret void 302; 303; TAILFOLD-LABEL: @multiple_unique_exit( 304; TAILFOLD-NEXT: entry: 305; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 306; TAILFOLD: for.cond: 307; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 308; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 309; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 310; TAILFOLD: for.body: 311; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 312; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 313; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 314; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 315; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 316; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 317; TAILFOLD: if.end: 318; TAILFOLD-NEXT: ret void 319; 320entry: 321 br label %for.cond 322 323for.cond: 324 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 325 %cmp = icmp slt i32 %i, %n 326 br i1 %cmp, label %for.body, label %if.end 327 328for.body: 329 %iprom = sext i32 %i to i64 330 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 331 store i16 0, i16* %b, align 4 332 %inc = add nsw i32 %i, 1 333 %cmp2 = icmp slt i32 %i, 2096 334 br i1 %cmp2, label %for.cond, label %if.end 335 336if.end: 337 ret void 338} 339 340; multiple exit - with an lcssa phi 341define i32 @multiple_unique_exit2(i16* %p, i32 %n) { 342; CHECK-LABEL: @multiple_unique_exit2( 343; CHECK-NEXT: entry: 344; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 345; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 346; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 347; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 348; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 349; CHECK: vector.ph: 350; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 351; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 352; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 353; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 354; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 355; CHECK: vector.body: 356; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 357; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 358; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 359; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 360; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 361; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 362; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 363; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 364; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 365; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 366; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 367; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 368; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] 369; CHECK: middle.block: 370; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 371; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1 372; CHECK-NEXT: [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1 373; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 374; CHECK: scalar.ph: 375; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 376; CHECK-NEXT: br label [[FOR_COND:%.*]] 377; CHECK: for.cond: 378; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 379; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 380; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 381; CHECK: for.body: 382; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 383; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 384; CHECK-NEXT: store i16 0, i16* [[B]], align 4 385; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 386; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 387; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]] 388; CHECK: if.end: 389; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ] 390; CHECK-NEXT: ret i32 [[I_LCSSA]] 391; 392; TAILFOLD-LABEL: @multiple_unique_exit2( 393; TAILFOLD-NEXT: entry: 394; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 395; TAILFOLD: for.cond: 396; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 397; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 398; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 399; TAILFOLD: for.body: 400; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 401; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 402; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 403; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 404; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 405; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 406; TAILFOLD: if.end: 407; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ] 408; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 409; 410entry: 411 br label %for.cond 412 413for.cond: 414 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 415 %cmp = icmp slt i32 %i, %n 416 br i1 %cmp, label %for.body, label %if.end 417 418for.body: 419 %iprom = sext i32 %i to i64 420 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 421 store i16 0, i16* %b, align 4 422 %inc = add nsw i32 %i, 1 423 %cmp2 = icmp slt i32 %i, 2096 424 br i1 %cmp2, label %for.cond, label %if.end 425 426if.end: 427 ret i32 %i 428} 429 430; multiple exit w/a non lcssa phi 431define i32 @multiple_unique_exit3(i16* %p, i32 %n) { 432; CHECK-LABEL: @multiple_unique_exit3( 433; CHECK-NEXT: entry: 434; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0) 435; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096) 436; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 437; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2 438; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 439; CHECK: vector.ph: 440; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 441; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 442; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]] 443; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]] 444; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 445; CHECK: vector.body: 446; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 447; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 448; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 449; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 450; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP3]] to i64 451; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]] 452; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 453; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>* 454; CHECK-NEXT: store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4 455; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 456; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2> 457; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 458; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]] 459; CHECK: middle.block: 460; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] 461; CHECK-NEXT: br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]] 462; CHECK: scalar.ph: 463; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 464; CHECK-NEXT: br label [[FOR_COND:%.*]] 465; CHECK: for.cond: 466; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 467; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N]] 468; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]] 469; CHECK: for.body: 470; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 471; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]] 472; CHECK-NEXT: store i16 0, i16* [[B]], align 4 473; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 474; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 475; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]] 476; CHECK: if.end: 477; CHECK-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ] 478; CHECK-NEXT: ret i32 [[EXIT]] 479; 480; TAILFOLD-LABEL: @multiple_unique_exit3( 481; TAILFOLD-NEXT: entry: 482; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 483; TAILFOLD: for.cond: 484; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 485; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 486; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 487; TAILFOLD: for.body: 488; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 489; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 490; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 491; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 492; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 493; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]] 494; TAILFOLD: if.end: 495; TAILFOLD-NEXT: [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ] 496; TAILFOLD-NEXT: ret i32 [[EXIT]] 497; 498entry: 499 br label %for.cond 500 501for.cond: 502 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 503 %cmp = icmp slt i32 %i, %n 504 br i1 %cmp, label %for.body, label %if.end 505 506for.body: 507 %iprom = sext i32 %i to i64 508 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 509 store i16 0, i16* %b, align 4 510 %inc = add nsw i32 %i, 1 511 %cmp2 = icmp slt i32 %i, 2096 512 br i1 %cmp2, label %for.cond, label %if.end 513 514if.end: 515 %exit = phi i32 [0, %for.cond], [1, %for.body] 516 ret i32 %exit 517} 518 519; multiple exits w/distinct target blocks 520define i32 @multiple_exit_blocks(i16* %p, i32 %n) { 521; CHECK-LABEL: @multiple_exit_blocks( 522; CHECK-NEXT: entry: 523; CHECK-NEXT: br label [[FOR_COND:%.*]] 524; CHECK: for.cond: 525; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 526; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 527; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 528; CHECK: for.body: 529; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 530; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 531; CHECK-NEXT: store i16 0, i16* [[B]], align 4 532; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 533; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 534; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 535; CHECK: if.end: 536; CHECK-NEXT: ret i32 0 537; CHECK: if.end2: 538; CHECK-NEXT: ret i32 1 539; 540; TAILFOLD-LABEL: @multiple_exit_blocks( 541; TAILFOLD-NEXT: entry: 542; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 543; TAILFOLD: for.cond: 544; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 545; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 546; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 547; TAILFOLD: for.body: 548; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 549; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 550; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 551; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 552; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 553; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 554; TAILFOLD: if.end: 555; TAILFOLD-NEXT: ret i32 0 556; TAILFOLD: if.end2: 557; TAILFOLD-NEXT: ret i32 1 558; 559entry: 560 br label %for.cond 561 562for.cond: 563 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 564 %cmp = icmp slt i32 %i, %n 565 br i1 %cmp, label %for.body, label %if.end 566 567for.body: 568 %iprom = sext i32 %i to i64 569 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 570 store i16 0, i16* %b, align 4 571 %inc = add nsw i32 %i, 1 572 %cmp2 = icmp slt i32 %i, 2096 573 br i1 %cmp2, label %for.cond, label %if.end2 574 575if.end: 576 ret i32 0 577 578if.end2: 579 ret i32 1 580} 581 582; LCSSA, common value each exit 583define i32 @multiple_exit_blocks2(i16* %p, i32 %n) { 584; CHECK-LABEL: @multiple_exit_blocks2( 585; CHECK-NEXT: entry: 586; CHECK-NEXT: br label [[FOR_COND:%.*]] 587; CHECK: for.cond: 588; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 589; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 590; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 591; CHECK: for.body: 592; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 593; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 594; CHECK-NEXT: store i16 0, i16* [[B]], align 4 595; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 596; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 597; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 598; CHECK: if.end: 599; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 600; CHECK-NEXT: ret i32 [[I_LCSSA]] 601; CHECK: if.end2: 602; CHECK-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 603; CHECK-NEXT: ret i32 [[I_LCSSA1]] 604; 605; TAILFOLD-LABEL: @multiple_exit_blocks2( 606; TAILFOLD-NEXT: entry: 607; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 608; TAILFOLD: for.cond: 609; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 610; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 611; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 612; TAILFOLD: for.body: 613; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 614; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 615; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 616; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 617; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 618; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 619; TAILFOLD: if.end: 620; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 621; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 622; TAILFOLD: if.end2: 623; TAILFOLD-NEXT: [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ] 624; TAILFOLD-NEXT: ret i32 [[I_LCSSA1]] 625; 626entry: 627 br label %for.cond 628 629for.cond: 630 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 631 %cmp = icmp slt i32 %i, %n 632 br i1 %cmp, label %for.body, label %if.end 633 634for.body: 635 %iprom = sext i32 %i to i64 636 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 637 store i16 0, i16* %b, align 4 638 %inc = add nsw i32 %i, 1 639 %cmp2 = icmp slt i32 %i, 2096 640 br i1 %cmp2, label %for.cond, label %if.end2 641 642if.end: 643 ret i32 %i 644 645if.end2: 646 ret i32 %i 647} 648 649; LCSSA, distinct value each exit 650define i32 @multiple_exit_blocks3(i16* %p, i32 %n) { 651; CHECK-LABEL: @multiple_exit_blocks3( 652; CHECK-NEXT: entry: 653; CHECK-NEXT: br label [[FOR_COND:%.*]] 654; CHECK: for.cond: 655; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 656; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 657; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 658; CHECK: for.body: 659; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 660; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 661; CHECK-NEXT: store i16 0, i16* [[B]], align 4 662; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 663; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 664; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 665; CHECK: if.end: 666; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 667; CHECK-NEXT: ret i32 [[I_LCSSA]] 668; CHECK: if.end2: 669; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 670; CHECK-NEXT: ret i32 [[INC_LCSSA]] 671; 672; TAILFOLD-LABEL: @multiple_exit_blocks3( 673; TAILFOLD-NEXT: entry: 674; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 675; TAILFOLD: for.cond: 676; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] 677; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]] 678; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]] 679; TAILFOLD: for.body: 680; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 681; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 682; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 683; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 684; TAILFOLD-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 2096 685; TAILFOLD-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]] 686; TAILFOLD: if.end: 687; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ] 688; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 689; TAILFOLD: if.end2: 690; TAILFOLD-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] 691; TAILFOLD-NEXT: ret i32 [[INC_LCSSA]] 692; 693entry: 694 br label %for.cond 695 696for.cond: 697 %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] 698 %cmp = icmp slt i32 %i, %n 699 br i1 %cmp, label %for.body, label %if.end 700 701for.body: 702 %iprom = sext i32 %i to i64 703 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 704 store i16 0, i16* %b, align 4 705 %inc = add nsw i32 %i, 1 706 %cmp2 = icmp slt i32 %i, 2096 707 br i1 %cmp2, label %for.cond, label %if.end2 708 709if.end: 710 ret i32 %i 711 712if.end2: 713 ret i32 %inc 714} 715 716; unique exit case but with a switch as two edges between the same pair of 717; blocks is an often missed edge case 718define i32 @multiple_exit_switch(i16* %p, i32 %n) { 719; CHECK-LABEL: @multiple_exit_switch( 720; CHECK-NEXT: entry: 721; CHECK-NEXT: br label [[FOR_COND:%.*]] 722; CHECK: for.cond: 723; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 724; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 725; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 726; CHECK-NEXT: store i16 0, i16* [[B]], align 4 727; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 728; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 729; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 730; CHECK-NEXT: i32 2097, label [[IF_END]] 731; CHECK-NEXT: ] 732; CHECK: if.end: 733; CHECK-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 734; CHECK-NEXT: ret i32 [[I_LCSSA]] 735; 736; TAILFOLD-LABEL: @multiple_exit_switch( 737; TAILFOLD-NEXT: entry: 738; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 739; TAILFOLD: for.cond: 740; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 741; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 742; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 743; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 744; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 745; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 746; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 747; TAILFOLD-NEXT: i32 2097, label [[IF_END]] 748; TAILFOLD-NEXT: ] 749; TAILFOLD: if.end: 750; TAILFOLD-NEXT: [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ] 751; TAILFOLD-NEXT: ret i32 [[I_LCSSA]] 752; 753entry: 754 br label %for.cond 755 756for.cond: 757 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 758 %iprom = sext i32 %i to i64 759 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 760 store i16 0, i16* %b, align 4 761 %inc = add nsw i32 %i, 1 762 switch i32 %i, label %for.cond [ 763 i32 2096, label %if.end 764 i32 2097, label %if.end 765 ] 766 767if.end: 768 ret i32 %i 769} 770 771; multiple exit case but with a switch as multiple exiting edges from 772; a single block is a commonly missed edge case 773define i32 @multiple_exit_switch2(i16* %p, i32 %n) { 774; CHECK-LABEL: @multiple_exit_switch2( 775; CHECK-NEXT: entry: 776; CHECK-NEXT: br label [[FOR_COND:%.*]] 777; CHECK: for.cond: 778; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 779; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 780; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 781; CHECK-NEXT: store i16 0, i16* [[B]], align 4 782; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 783; CHECK-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 784; CHECK-NEXT: i32 2096, label [[IF_END:%.*]] 785; CHECK-NEXT: i32 2097, label [[IF_END2:%.*]] 786; CHECK-NEXT: ] 787; CHECK: if.end: 788; CHECK-NEXT: ret i32 0 789; CHECK: if.end2: 790; CHECK-NEXT: ret i32 1 791; 792; TAILFOLD-LABEL: @multiple_exit_switch2( 793; TAILFOLD-NEXT: entry: 794; TAILFOLD-NEXT: br label [[FOR_COND:%.*]] 795; TAILFOLD: for.cond: 796; TAILFOLD-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ] 797; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I]] to i64 798; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 799; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 800; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I]], 1 801; TAILFOLD-NEXT: switch i32 [[I]], label [[FOR_COND]] [ 802; TAILFOLD-NEXT: i32 2096, label [[IF_END:%.*]] 803; TAILFOLD-NEXT: i32 2097, label [[IF_END2:%.*]] 804; TAILFOLD-NEXT: ] 805; TAILFOLD: if.end: 806; TAILFOLD-NEXT: ret i32 0 807; TAILFOLD: if.end2: 808; TAILFOLD-NEXT: ret i32 1 809; 810entry: 811 br label %for.cond 812 813for.cond: 814 %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ] 815 %iprom = sext i32 %i to i64 816 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 817 store i16 0, i16* %b, align 4 818 %inc = add nsw i32 %i, 1 819 switch i32 %i, label %for.cond [ 820 i32 2096, label %if.end 821 i32 2097, label %if.end2 822 ] 823 824if.end: 825 ret i32 0 826 827if.end2: 828 ret i32 1 829} 830 831define i32 @multiple_latch1(i16* %p) { 832; CHECK-LABEL: @multiple_latch1( 833; CHECK-NEXT: entry: 834; CHECK-NEXT: br label [[FOR_BODY:%.*]] 835; CHECK: for.body: 836; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 837; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 838; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 839; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 840; CHECK: for.second: 841; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 842; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 843; CHECK-NEXT: store i16 0, i16* [[B]], align 4 844; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 845; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 846; CHECK: for.body.backedge: 847; CHECK-NEXT: br label [[FOR_BODY]] 848; CHECK: for.end: 849; CHECK-NEXT: ret i32 0 850; 851; TAILFOLD-LABEL: @multiple_latch1( 852; TAILFOLD-NEXT: entry: 853; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 854; TAILFOLD: for.body: 855; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 856; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 857; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 858; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 859; TAILFOLD: for.second: 860; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 861; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 862; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 863; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 864; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 865; TAILFOLD: for.body.backedge: 866; TAILFOLD-NEXT: br label [[FOR_BODY]] 867; TAILFOLD: for.end: 868; TAILFOLD-NEXT: ret i32 0 869; 870entry: 871 br label %for.body 872 873for.body: 874 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge] 875 %inc = add nsw i32 %i.02, 1 876 %cmp = icmp slt i32 %inc, 16 877 br i1 %cmp, label %for.body.backedge, label %for.second 878 879for.second: 880 %iprom = sext i32 %i.02 to i64 881 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 882 store i16 0, i16* %b, align 4 883 %cmps = icmp sgt i32 %inc, 16 884 br i1 %cmps, label %for.body.backedge, label %for.end 885 886for.body.backedge: 887 br label %for.body 888 889for.end: 890 ret i32 0 891} 892 893 894; two back branches - loop simplify with convert this to the same form 895; as previous before vectorizer sees it, but show that. 896define i32 @multiple_latch2(i16* %p) { 897; CHECK-LABEL: @multiple_latch2( 898; CHECK-NEXT: entry: 899; CHECK-NEXT: br label [[FOR_BODY:%.*]] 900; CHECK: for.body: 901; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 902; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 903; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 904; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 905; CHECK: for.body.backedge: 906; CHECK-NEXT: br label [[FOR_BODY]] 907; CHECK: for.second: 908; CHECK-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 909; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 910; CHECK-NEXT: store i16 0, i16* [[B]], align 4 911; CHECK-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 912; CHECK-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 913; CHECK: for.end: 914; CHECK-NEXT: ret i32 0 915; 916; TAILFOLD-LABEL: @multiple_latch2( 917; TAILFOLD-NEXT: entry: 918; TAILFOLD-NEXT: br label [[FOR_BODY:%.*]] 919; TAILFOLD: for.body: 920; TAILFOLD-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ] 921; TAILFOLD-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 922; TAILFOLD-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 16 923; TAILFOLD-NEXT: br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]] 924; TAILFOLD: for.body.backedge: 925; TAILFOLD-NEXT: br label [[FOR_BODY]] 926; TAILFOLD: for.second: 927; TAILFOLD-NEXT: [[IPROM:%.*]] = sext i32 [[I_02]] to i64 928; TAILFOLD-NEXT: [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]] 929; TAILFOLD-NEXT: store i16 0, i16* [[B]], align 4 930; TAILFOLD-NEXT: [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16 931; TAILFOLD-NEXT: br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]] 932; TAILFOLD: for.end: 933; TAILFOLD-NEXT: ret i32 0 934; 935entry: 936 br label %for.body 937 938for.body: 939 %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second] 940 %inc = add nsw i32 %i.02, 1 941 %cmp = icmp slt i32 %inc, 16 942 br i1 %cmp, label %for.body, label %for.second 943 944for.second: 945 %iprom = sext i32 %i.02 to i64 946 %b = getelementptr inbounds i16, i16* %p, i64 %iprom 947 store i16 0, i16* %b, align 4 948 %cmps = icmp sgt i32 %inc, 16 949 br i1 %cmps, label %for.body, label %for.end 950 951for.end: 952 ret i32 0 953} 954 955 956; Check interaction between block predication and early exits. We need the 957; condition on the early exit to remain dead (i.e. not be used when forming 958; the predicate mask). 959define void @scalar_predication(float* %addr) { 960; CHECK-LABEL: @scalar_predication( 961; CHECK-NEXT: entry: 962; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 963; CHECK: vector.ph: 964; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 965; CHECK: vector.body: 966; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] 967; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] 968; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 969; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]] 970; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 971; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>* 972; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4 973; CHECK-NEXT: [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 974; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true> 975; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 976; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 977; CHECK: pred.store.if: 978; CHECK-NEXT: store float 1.000000e+01, float* [[TMP1]], align 4 979; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 980; CHECK: pred.store.continue: 981; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 982; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] 983; CHECK: pred.store.if1: 984; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 985; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]] 986; CHECK-NEXT: store float 1.000000e+01, float* [[TMP9]], align 4 987; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 988; CHECK: pred.store.continue2: 989; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 990; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 991; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 992; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] 993; CHECK: middle.block: 994; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200 995; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 996; CHECK: scalar.ph: 997; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 998; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 999; CHECK: loop.header: 1000; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1001; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]] 1002; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1003; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]] 1004; CHECK: loop.body: 1005; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[GEP]], align 4 1006; CHECK-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00 1007; CHECK-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1008; CHECK: then: 1009; CHECK-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1010; CHECK-NEXT: br label [[LOOP_LATCH]] 1011; CHECK: loop.latch: 1012; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1013; CHECK-NEXT: br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]] 1014; CHECK: exit: 1015; CHECK-NEXT: ret void 1016; 1017; TAILFOLD-LABEL: @scalar_predication( 1018; TAILFOLD-NEXT: entry: 1019; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1020; TAILFOLD: loop.header: 1021; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1022; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]] 1023; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1024; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]] 1025; TAILFOLD: loop.body: 1026; TAILFOLD-NEXT: [[TMP0:%.*]] = load float, float* [[GEP]], align 4 1027; TAILFOLD-NEXT: [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00 1028; TAILFOLD-NEXT: br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]] 1029; TAILFOLD: then: 1030; TAILFOLD-NEXT: store float 1.000000e+01, float* [[GEP]], align 4 1031; TAILFOLD-NEXT: br label [[LOOP_LATCH]] 1032; TAILFOLD: loop.latch: 1033; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1034; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1035; TAILFOLD: exit: 1036; TAILFOLD-NEXT: ret void 1037; 1038entry: 1039 br label %loop.header 1040 1041loop.header: 1042 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1043 %gep = getelementptr float, float* %addr, i64 %iv 1044 %exitcond.not = icmp eq i64 %iv, 200 1045 br i1 %exitcond.not, label %exit, label %loop.body 1046 1047loop.body: 1048 %0 = load float, float* %gep, align 4 1049 %pred = fcmp oeq float %0, 0.0 1050 br i1 %pred, label %loop.latch, label %then 1051 1052then: 1053 store float 10.0, float* %gep, align 4 1054 br label %loop.latch 1055 1056loop.latch: 1057 %iv.next = add nuw nsw i64 %iv, 1 1058 br label %loop.header 1059 1060exit: 1061 ret void 1062} 1063 1064define i32 @me_reduction(i32* %addr) { 1065; CHECK-LABEL: @me_reduction( 1066; CHECK-NEXT: entry: 1067; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1068; CHECK: vector.ph: 1069; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1070; CHECK: vector.body: 1071; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1072; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1073; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 1074; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1075; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 1076; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]] 1077; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0 1078; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>* 1079; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 1080; CHECK-NEXT: [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 1081; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 1082; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 1083; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1084; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] 1085; CHECK: middle.block: 1086; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]]) 1087; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 201, 200 1088; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1089; CHECK: scalar.ph: 1090; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1091; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1092; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1093; CHECK: loop.header: 1094; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1095; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1096; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]] 1097; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1098; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]] 1099; CHECK: loop.latch: 1100; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4 1101; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]] 1102; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1103; CHECK-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1104; CHECK-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]] 1105; CHECK: exit: 1106; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 1107; CHECK-NEXT: ret i32 [[LCSSA]] 1108; 1109; TAILFOLD-LABEL: @me_reduction( 1110; TAILFOLD-NEXT: entry: 1111; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1112; TAILFOLD: loop.header: 1113; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1114; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1115; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1116; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1117; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1118; TAILFOLD: loop.latch: 1119; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1120; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1121; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1122; TAILFOLD-NEXT: [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400 1123; TAILFOLD-NEXT: br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]] 1124; TAILFOLD: exit: 1125; TAILFOLD-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ] 1126; TAILFOLD-NEXT: ret i32 [[LCSSA]] 1127; 1128entry: 1129 br label %loop.header 1130 1131loop.header: 1132 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1133 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1134 %gep = getelementptr i32, i32* %addr, i64 %iv 1135 %exitcond.not = icmp eq i64 %iv, 200 1136 br i1 %exitcond.not, label %exit, label %loop.latch 1137 1138loop.latch: 1139 %0 = load i32, i32* %gep, align 4 1140 %accum.next = add i32 %accum, %0 1141 %iv.next = add nuw nsw i64 %iv, 1 1142 %exitcond2.not = icmp eq i64 %iv, 400 1143 br i1 %exitcond2.not, label %exit, label %loop.header 1144 1145exit: 1146 %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch] 1147 ret i32 %lcssa 1148} 1149 1150; TODO: The current definition of reduction is too strict, we can vectorize 1151; this. There's an analogous single exit case where we extract the N-1 1152; value of the reduction that we can also handle. If we fix the later, the 1153; multiple exit case probably falls out. 1154define i32 @me_reduction2(i32* %addr) { 1155; CHECK-LABEL: @me_reduction2( 1156; CHECK-NEXT: entry: 1157; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1158; CHECK: loop.header: 1159; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1160; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1161; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1162; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1163; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1164; CHECK: loop.latch: 1165; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1166; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1167; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1168; CHECK-NEXT: br label [[LOOP_HEADER]] 1169; CHECK: exit: 1170; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1171; CHECK-NEXT: ret i32 [[ACCUM_LCSSA]] 1172; 1173; TAILFOLD-LABEL: @me_reduction2( 1174; TAILFOLD-NEXT: entry: 1175; TAILFOLD-NEXT: br label [[LOOP_HEADER:%.*]] 1176; TAILFOLD: loop.header: 1177; TAILFOLD-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1178; TAILFOLD-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1179; TAILFOLD-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]] 1180; TAILFOLD-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200 1181; TAILFOLD-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]] 1182; TAILFOLD: loop.latch: 1183; TAILFOLD-NEXT: [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4 1184; TAILFOLD-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]] 1185; TAILFOLD-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 1186; TAILFOLD-NEXT: br label [[LOOP_HEADER]] 1187; TAILFOLD: exit: 1188; TAILFOLD-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ] 1189; TAILFOLD-NEXT: ret i32 [[ACCUM_LCSSA]] 1190; 1191entry: 1192 br label %loop.header 1193 1194loop.header: 1195 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] 1196 %accum = phi i32 [0, %entry], [%accum.next, %loop.latch] 1197 %gep = getelementptr i32, i32* %addr, i64 %iv 1198 %exitcond.not = icmp eq i64 %iv, 200 1199 br i1 %exitcond.not, label %exit, label %loop.latch 1200 1201loop.latch: 1202 %0 = load i32, i32* %gep, align 4 1203 %accum.next = add i32 %accum, %0 1204 %iv.next = add nuw nsw i64 %iv, 1 1205 br label %loop.header 1206 1207exit: 1208 ret i32 %accum 1209} 1210 1211