1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s 3 4; Tests for PR54266. 5define i32 @one_direct_branch(ptr %src) { 6; CHECK-LABEL: @one_direct_branch( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 9; CHECK: vector.ph: 10; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 11; CHECK: vector.body: 12; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 13; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 14; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] 15; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 16; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 17; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] 18; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 19; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 20; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 21; CHECK: middle.block: 22; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 23; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 24; CHECK: scalar.ph: 25; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 26; CHECK-NEXT: br label [[LOOP:%.*]] 27; CHECK: loop: 28; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 29; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] 30; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 31; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] 32; CHECK-NEXT: br label [[LOOP_LATCH]] 33; CHECK: loop.latch: 34; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ] 35; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 36; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 37; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] 38; CHECK: exit: 39; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 40; CHECK-NEXT: ret i32 [[XOR_LCSSA]] 41; 42entry: 43 br label %loop 44 45loop: 46 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] 47 %src.gep = getelementptr inbounds i32, ptr %src, i32 %iv 48 %lv = load i32, ptr %src.gep 49 %xor = xor i32 25500, %lv 50 br label %loop.latch 51 52loop.latch: 53 %phi.xor = phi i32 [ %xor, %loop ] 54 %iv.next = add nsw i32 %iv, 1 55 %tobool.not = icmp eq i32 %iv.next, 1000 56 br i1 %tobool.not, label %exit, label %loop 57 58exit: 59 %xor.lcssa = phi i32 [ %phi.xor, %loop.latch ] 60 ret i32 %xor.lcssa 61} 62 63define i32 @two_direct_branch(ptr %src) { 64; CHECK-LABEL: @two_direct_branch( 65; CHECK-NEXT: entry: 66; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 67; CHECK: vector.ph: 68; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 69; CHECK: vector.body: 70; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 71; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 72; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] 73; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 74; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 75; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] 76; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 77; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 78; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 79; CHECK: middle.block: 80; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 81; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 82; CHECK: scalar.ph: 83; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 84; CHECK-NEXT: br label [[LOOP:%.*]] 85; CHECK: loop: 86; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 87; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] 88; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 89; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] 90; CHECK-NEXT: br label [[BB:%.*]] 91; CHECK: bb: 92; CHECK-NEXT: [[PHI_XOR_1:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ] 93; CHECK-NEXT: br label [[LOOP_LATCH]] 94; CHECK: loop.latch: 95; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[PHI_XOR_1]], [[BB]] ] 96; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 97; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 98; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] 99; CHECK: exit: 100; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 101; CHECK-NEXT: ret i32 [[XOR_LCSSA]] 102; 103entry: 104 br label %loop 105 106loop: ; preds = %for.inc3, %entry 107 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] 108 %src.gep = getelementptr inbounds i32, ptr %src, i32 %iv 109 %lv = load i32, ptr %src.gep 110 %xor = xor i32 25500, %lv 111 br label %bb 112 113bb: 114 %phi.xor.1 = phi i32 [ %xor, %loop ] 115 br label %loop.latch 116 117loop.latch: 118 %phi.xor = phi i32 [ %phi.xor.1, %bb ] 119 %iv.next = add nsw i32 %iv, 1 120 %tobool.not = icmp eq i32 %iv.next, 1000 121 br i1 %tobool.not, label %exit, label %loop 122 123exit: 124 %xor.lcssa = phi i32 [ %phi.xor, %loop.latch ] 125 ret i32 %xor.lcssa 126} 127 128define i32 @cond_branch(i32 %a, ptr %src) { 129; CHECK-LABEL: @cond_branch( 130; CHECK-NEXT: entry: 131; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 132; CHECK: vector.ph: 133; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 134; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 135; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 136; CHECK: vector.body: 137; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 138; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 139; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 140; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]] 141; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 142; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 143; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]] 144; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] 145; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) 146; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 10), <4 x i32> [[TMP3]] 147; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 148; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 149; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 150; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 151; CHECK: middle.block: 152; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3 153; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 154; CHECK: scalar.ph: 155; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 156; CHECK-NEXT: br label [[LOOP:%.*]] 157; CHECK: loop: 158; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 159; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] 160; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[SRC_GEP]], align 4 161; CHECK-NEXT: [[XOR:%.*]] = xor i32 25500, [[LV]] 162; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[IV]], [[A]] 163; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_LATCH]], label [[THEN:%.*]] 164; CHECK: then: 165; CHECK-NEXT: br label [[LOOP_LATCH]] 166; CHECK: loop.latch: 167; CHECK-NEXT: [[PHI_XOR:%.*]] = phi i32 [ [[XOR]], [[LOOP]] ], [ 10, [[THEN]] ] 168; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 169; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 170; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] 171; CHECK: exit: 172; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[PHI_XOR]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 173; CHECK-NEXT: ret i32 [[XOR_LCSSA]] 174; 175entry: 176 br label %loop 177 178loop: 179 %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] 180 %src.gep = getelementptr inbounds i32, ptr %src, i32 %iv 181 %lv = load i32, ptr %src.gep 182 %xor = xor i32 25500, %lv 183 %cmp = icmp ne i32 %iv, %a 184 br i1 %cmp, label %loop.latch, label %then 185 186then: 187 br label %loop.latch 188 189loop.latch: 190 %phi.xor = phi i32 [ %xor, %loop ], [ 10, %then ] 191 %iv.next = add nsw i32 %iv, 1 192 %tobool.not = icmp eq i32 %iv.next, 1000 193 br i1 %tobool.not, label %exit, label %loop 194 195exit: 196 %xor.lcssa = phi i32 [ %phi.xor, %loop.latch ] 197 ret i32 %xor.lcssa 198} 199 200; Test case for PR54370. 201; TODO: Should either compute the final value of the truncated IV independent 202; of loop or scalarize the vector IV. 203define i32 @optimizable_trunc_used_outside() { 204; CHECK-LABEL: @optimizable_trunc_used_outside( 205; CHECK-NEXT: entry: 206; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 207; CHECK: vector.ph: 208; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 209; CHECK: vector.body: 210; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 211; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 212; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 213; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 214; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 215; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 216; CHECK: middle.block: 217; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 218; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 219; CHECK: scalar.ph: 220; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 221; CHECK-NEXT: br label [[LOOP:%.*]] 222; CHECK: loop: 223; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] 224; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 225; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 226; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 227; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] 228; CHECK: exit: 229; CHECK-NEXT: [[IV_TRUNC_LCSSA:%.*]] = phi i32 [ [[IV_TRUNC]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] 230; CHECK-NEXT: ret i32 [[IV_TRUNC_LCSSA]] 231; 232entry: 233 br label %loop 234 235loop: 236 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 237 %iv.trunc = trunc i64 %iv to i32 238 %iv.next = add nuw nsw i64 %iv, 1 239 %exitcond.not.i.i = icmp eq i64 %iv.next, 1000 240 br i1 %exitcond.not.i.i, label %exit, label %loop 241 242exit: 243 %iv.trunc.lcssa = phi i32 [ %iv.trunc, %loop ] 244 ret i32 %iv.trunc.lcssa 245} 246