1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop-vectorize,dce,instcombine,loop-mssa(licm)' -force-vector-width=4 -S | FileCheck %s 3 4; First licm pass is to hoist/sink invariant stores if possible. Today LICM does 5; not hoist/sink the invariant stores. Even if that changes, we should still 6; vectorize this loop in case licm is not run. 7 8; The next licm pass after vectorization is to hoist/sink loop invariant 9; instructions. 10target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 11 12; all tests check that it is legal to vectorize the stores to invariant 13; address. 14 15 16; memory check is found.conflict = b[max(n-1,1)] > a && (ptr a)+1 > (ptr b) 17 18 19define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) { 20; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction( 21; CHECK-NEXT: entry: 22; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 23; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) 24; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4 25; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 26; CHECK: vector.memcheck: 27; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4 28; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2 29; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP0]] 30; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] 31; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] 32; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 33; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 34; CHECK: vector.ph: 35; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 36; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 37; CHECK: vector.body: 38; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 39; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] 40; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 41; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]] 42; CHECK-NEXT: [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 43; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] 44; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 45; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 46; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 47; CHECK: middle.block: 48; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_BODY]] ] 49; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) 50; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] 51; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 52; CHECK: scalar.ph: 53; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 54; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] 55; CHECK-NEXT: br label [[FOR_BODY:%.*]] 56; CHECK: for.body: 57; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 58; CHECK-NEXT: [[I0:%.*]] = phi i32 [ [[I3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 59; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]] 60; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[I1]], align 8 61; CHECK-NEXT: [[I3]] = add i32 [[I0]], [[I2]] 62; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4 63; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 64; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 65; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]] 66; CHECK: for.end.loopexit: 67; CHECK-NEXT: [[I3_LCSSA:%.*]] = phi i32 [ [[I3]], [[FOR_BODY]] ] 68; CHECK-NEXT: br label [[FOR_END]] 69; CHECK: for.end: 70; CHECK-NEXT: [[I4:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[I3_LCSSA]], [[FOR_END_LOOPEXIT]] ] 71; CHECK-NEXT: ret i32 [[I4]] 72; 73entry: 74 %ntrunc = trunc i64 %n to i32 75 br label %for.body 76 77for.body: ; preds = %for.body, %entry 78 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 79 %i0 = phi i32 [ %i3, %for.body ], [ 0, %entry ] 80 %i1 = getelementptr inbounds i32, ptr %b, i64 %i 81 %i2 = load i32, ptr %i1, align 8 82 %i3 = add i32 %i0, %i2 83 store i32 %ntrunc, ptr %a 84 %i.next = add nuw nsw i64 %i, 1 85 %cond = icmp slt i64 %i.next, %n 86 br i1 %cond, label %for.body, label %for.end 87 88for.end: ; preds = %for.body 89 %i4 = phi i32 [ %i3, %for.body ] 90 ret i32 %i4 91} 92 93define void @inv_val_store_to_inv_address(ptr %a, i64 %n, ptr %b) { 94; CHECK-LABEL: @inv_val_store_to_inv_address( 95; CHECK-NEXT: entry: 96; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 97; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) 98; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4 99; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 100; CHECK: vector.memcheck: 101; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4 102; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2 103; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP0]] 104; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] 105; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] 106; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 107; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 108; CHECK: vector.ph: 109; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 110; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 111; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 112; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 113; CHECK: vector.body: 114; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 115; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 116; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] 117; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 4, !alias.scope [[META12]] 118; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 119; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 120; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 121; CHECK: middle.block: 122; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] 123; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 124; CHECK: scalar.ph: 125; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 126; CHECK-NEXT: br label [[FOR_BODY:%.*]] 127; CHECK: for.body: 128; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 129; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]] 130; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4 131; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[I1]], align 4 132; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 133; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 134; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 135; CHECK: for.end.loopexit: 136; CHECK-NEXT: br label [[FOR_END]] 137; CHECK: for.end: 138; CHECK-NEXT: ret void 139; 140entry: 141 %ntrunc = trunc i64 %n to i32 142 br label %for.body 143 144for.body: ; preds = %for.body, %entry 145 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 146 %i1 = getelementptr inbounds i32, ptr %b, i64 %i 147 %i2 = load i32, ptr %i1, align 8 148 store i32 %ntrunc, ptr %a 149 store i32 %ntrunc, ptr %i1 150 %i.next = add nuw nsw i64 %i, 1 151 %cond = icmp slt i64 %i.next, %n 152 br i1 %cond, label %for.body, label %for.end 153 154for.end: ; preds = %for.body 155 ret void 156} 157 158 159; Both of these tests below are handled as predicated stores. 160 161; Conditional store 162; if (b[i] == k) a = ntrunc 163; TODO: We can be better with the code gen for the first test and we can have 164; just one scalar store if vector.or.reduce(vector_cmp(b[i] == k)) is 1. 165 166 167 168define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { 169; CHECK-LABEL: @inv_val_store_to_inv_address_conditional( 170; CHECK-NEXT: entry: 171; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 172; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) 173; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4 174; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 175; CHECK: vector.memcheck: 176; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2 177; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP0]] 178; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 4 179; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]] 180; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] 181; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 182; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 183; CHECK: vector.ph: 184; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 185; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i64 0 186; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 187; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 188; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer 189; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 190; CHECK: vector.body: 191; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] 192; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 193; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META16:![0-9]+]], !noalias [[META19:![0-9]+]] 194; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 195; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT4]], ptr [[TMP1]], align 4, !alias.scope [[META16]], !noalias [[META19]] 196; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 197; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 198; CHECK: pred.store.if: 199; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] 200; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 201; CHECK: pred.store.continue: 202; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 203; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] 204; CHECK: pred.store.if5: 205; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] 206; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] 207; CHECK: pred.store.continue6: 208; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 209; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] 210; CHECK: pred.store.if7: 211; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] 212; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] 213; CHECK: pred.store.continue8: 214; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 215; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] 216; CHECK: pred.store.if9: 217; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] 218; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] 219; CHECK: pred.store.continue10: 220; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 221; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 222; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 223; CHECK: middle.block: 224; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX2]], [[N_VEC]] 225; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 226; CHECK: scalar.ph: 227; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ] 228; CHECK-NEXT: br label [[FOR_BODY:%.*]] 229; CHECK: for.body: 230; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 231; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]] 232; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[I1]], align 8 233; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I2]], [[K]] 234; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[I1]], align 4 235; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[LATCH]] 236; CHECK: cond_store: 237; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4 238; CHECK-NEXT: br label [[LATCH]] 239; CHECK: latch: 240; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 241; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 242; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop [[LOOP22:![0-9]+]] 243; CHECK: for.end.loopexit: 244; CHECK-NEXT: br label [[FOR_END]] 245; CHECK: for.end: 246; CHECK-NEXT: ret void 247; 248entry: 249 %ntrunc = trunc i64 %n to i32 250 br label %for.body 251 252for.body: ; preds = %for.body, %entry 253 %i = phi i64 [ %i.next, %latch ], [ 0, %entry ] 254 %i1 = getelementptr inbounds i32, ptr %b, i64 %i 255 %i2 = load i32, ptr %i1, align 8 256 %cmp = icmp eq i32 %i2, %k 257 store i32 %ntrunc, ptr %i1 258 br i1 %cmp, label %cond_store, label %latch 259 260cond_store: 261 store i32 %ntrunc, ptr %a 262 br label %latch 263 264latch: 265 %i.next = add nuw nsw i64 %i, 1 266 %cond = icmp slt i64 %i.next, %n 267 br i1 %cond, label %for.body, label %for.end 268 269for.end: ; preds = %for.body 270 ret void 271} 272 273; if (b[i] == k) 274; a = ntrunc 275; else a = k; 276; TODO: We could vectorize this once we support multiple uniform stores to the 277; same address. 278define void @inv_val_store_to_inv_address_conditional_diff_values(ptr %a, i64 %n, ptr %b, i32 %k) { 279; CHECK-LABEL: @inv_val_store_to_inv_address_conditional_diff_values( 280; CHECK-NEXT: entry: 281; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32 282; CHECK-NEXT: br label [[FOR_BODY:%.*]] 283; CHECK: for.body: 284; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] 285; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[I]] 286; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[I1]], align 8 287; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I2]], [[K:%.*]] 288; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[I1]], align 4 289; CHECK-NEXT: br i1 [[CMP]], label [[COND_STORE:%.*]], label [[COND_STORE_K:%.*]] 290; CHECK: cond_store: 291; CHECK-NEXT: br label [[LATCH]] 292; CHECK: cond_store_k: 293; CHECK-NEXT: br label [[LATCH]] 294; CHECK: latch: 295; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ [[K]], [[COND_STORE_K]] ], [ [[NTRUNC]], [[COND_STORE]] ] 296; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[A:%.*]], align 4 297; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 298; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 299; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]] 300; CHECK: for.end: 301; CHECK-NEXT: ret void 302; 303entry: 304 %ntrunc = trunc i64 %n to i32 305 br label %for.body 306 307for.body: ; preds = %for.body, %entry 308 %i = phi i64 [ %i.next, %latch ], [ 0, %entry ] 309 %i1 = getelementptr inbounds i32, ptr %b, i64 %i 310 %i2 = load i32, ptr %i1, align 8 311 %cmp = icmp eq i32 %i2, %k 312 store i32 %ntrunc, ptr %i1 313 br i1 %cmp, label %cond_store, label %cond_store_k 314 315cond_store: 316 store i32 %ntrunc, ptr %a 317 br label %latch 318 319cond_store_k: 320 store i32 %k, ptr %a 321 br label %latch 322 323latch: 324 %i.next = add nuw nsw i64 %i, 1 325 %cond = icmp slt i64 %i.next, %n 326 br i1 %cond, label %for.body, label %for.end 327 328for.end: ; preds = %for.body 329 ret void 330} 331 332; Multiple variant stores to the same uniform address 333; for(; i < itr; i++) { 334; for(; j < itr; j++) { 335; var1[i] = var2[j] + var1[i]; 336; var1[i]++; 337; } 338; } 339 340define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { 341; CHECK-LABEL: @multiple_uniform_stores( 342; CHECK-NEXT: entry: 343; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 344; CHECK-NEXT: br i1 [[CMP20]], label [[FOR_END10:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]] 345; CHECK: for.cond1.preheader.preheader: 346; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ITR]], -1 347; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 348; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 349; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 [[TMP2]] 350; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 4 351; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[VAR2:%.*]], i64 4 352; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] 353; CHECK: for.cond1.preheader: 354; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] 355; CHECK-NEXT: [[J_022:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC8]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] 356; CHECK-NEXT: [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]] 357; CHECK-NEXT: br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]] 358; CHECK: for.body3.lr.ph: 359; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1]], i64 [[INDVARS_IV23]] 360; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[J_022]] to i64 361; CHECK-NEXT: [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 362; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[J_022]], -1 363; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[ITR]], [[TMP5]] 364; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 365; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 1 366; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP6]], 3 367; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 368; CHECK: vector.memcheck: 369; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP4]], 2 370; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[VAR2]], i64 [[TMP9]] 371; CHECK-NEXT: [[TMP10:%.*]] = xor i32 [[J_022]], -1 372; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[ITR]], [[TMP10]] 373; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 374; CHECK-NEXT: [[TMP13:%.*]] = add nuw nsw i64 [[TMP4]], [[TMP12]] 375; CHECK-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2 376; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 [[TMP14]] 377; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[VAR1]], [[SCEVGEP3]] 378; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]] 379; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 380; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 381; CHECK: vector.ph: 382; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], 8589934588 383; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP4]] 384; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARRAYIDX5_PROMOTED]], i64 0 385; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i32, ptr [[VAR2]], i64 [[TMP4]] 386; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 387; CHECK: vector.body: 388; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 389; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP15]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] 390; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]] 391; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[GEP]], align 4, !alias.scope [[META23:![0-9]+]] 392; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 393; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[TMP16]], splat (i32 1) 394; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 395; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 396; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 397; CHECK: middle.block: 398; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ] 399; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) 400; CHECK-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]] 401; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] 402; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] 403; CHECK: scalar.ph: 404; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ] 405; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VECTOR_MEMCHECK]] ], [ [[TMP4]], [[FOR_BODY3_LR_PH]] ] 406; CHECK-NEXT: br label [[FOR_BODY3:%.*]] 407; CHECK: for.body3: 408; CHECK-NEXT: [[TMP20:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[FOR_BODY3]] ] 409; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ] 410; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[INDVARS_IV]] 411; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 412; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] 413; CHECK-NEXT: [[TMP22]] = add nsw i32 [[ADD]], 1 414; CHECK-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX5]], align 4 415; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 416; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 417; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV]] 418; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP29:![0-9]+]] 419; CHECK: for.inc8.loopexit.loopexit: 420; CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT]] 421; CHECK: for.inc8.loopexit: 422; CHECK-NEXT: br label [[FOR_INC8]] 423; CHECK: for.inc8: 424; CHECK-NEXT: [[J_1_LCSSA]] = phi i32 [ [[J_022]], [[FOR_COND1_PREHEADER]] ], [ [[ITR]], [[FOR_INC8_LOOPEXIT]] ] 425; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1 426; CHECK-NEXT: [[LFTR_WIDEIV25:%.*]] = trunc i64 [[INDVARS_IV_NEXT24]] to i32 427; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV25]] 428; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_END10_LOOPEXIT:%.*]], label [[FOR_COND1_PREHEADER]] 429; CHECK: for.end10.loopexit: 430; CHECK-NEXT: br label [[FOR_END10]] 431; CHECK: for.end10: 432; CHECK-NEXT: ret i32 undef 433; 434entry: 435 %cmp20 = icmp eq i32 %itr, 0 436 br i1 %cmp20, label %for.end10, label %for.cond1.preheader 437 438for.cond1.preheader: ; preds = %entry, %for.inc8 439 %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ] 440 %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ] 441 %cmp218 = icmp ult i32 %j.022, %itr 442 br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8 443 444for.body3.lr.ph: ; preds = %for.cond1.preheader 445 %arrayidx5 = getelementptr inbounds i32, ptr %var1, i64 %indvars.iv23 446 %0 = zext i32 %j.022 to i64 447 br label %for.body3 448 449for.body3: ; preds = %for.body3, %for.body3.lr.ph 450 %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ] 451 %arrayidx = getelementptr inbounds i32, ptr %var2, i64 %indvars.iv 452 %1 = load i32, ptr %arrayidx, align 4 453 %2 = load i32, ptr %arrayidx5, align 4 454 %add = add nsw i32 %2, %1 455 store i32 %add, ptr %arrayidx5, align 4 456 %3 = load i32, ptr %arrayidx5, align 4 457 %4 = add nsw i32 %3, 1 458 store i32 %4, ptr %arrayidx5, align 4 459 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 460 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 461 %exitcond = icmp eq i32 %lftr.wideiv, %itr 462 br i1 %exitcond, label %for.inc8, label %for.body3 463 464for.inc8: ; preds = %for.body3, %for.cond1.preheader 465 %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %for.body3 ] 466 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 467 %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 468 %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr 469 br i1 %exitcond26, label %for.end10, label %for.cond1.preheader 470 471for.end10: ; preds = %for.inc8, %entry 472 ret i32 undef 473} 474 475; second uniform store to the same address is conditional. 476; we do not vectorize this. 477define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { 478; CHECK-LABEL: @multiple_uniform_stores_conditional( 479; CHECK-NEXT: entry: 480; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 481; CHECK-NEXT: br i1 [[CMP20]], label [[FOR_END10:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]] 482; CHECK: for.cond1.preheader.preheader: 483; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] 484; CHECK: for.cond1.preheader: 485; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] 486; CHECK-NEXT: [[J_022:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC8]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] 487; CHECK-NEXT: [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]] 488; CHECK-NEXT: br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]] 489; CHECK: for.body3.lr.ph: 490; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]] 491; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[J_022]] to i64 492; CHECK-NEXT: [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 493; CHECK-NEXT: br label [[FOR_BODY3:%.*]] 494; CHECK: for.body3: 495; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ] 496; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH]] ] 497; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]] 498; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 499; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] 500; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[ADD]], 42 501; CHECK-NEXT: br i1 [[TMP3]], label [[COND_STORE:%.*]], label [[LATCH]] 502; CHECK: cond_store: 503; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[ADD]], 1 504; CHECK-NEXT: br label [[LATCH]] 505; CHECK: latch: 506; CHECK-NEXT: [[TMP5]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ] 507; CHECK-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX5]], align 4 508; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 509; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 510; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV]] 511; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[FOR_BODY3]] 512; CHECK: for.inc8.loopexit: 513; CHECK-NEXT: br label [[FOR_INC8]] 514; CHECK: for.inc8: 515; CHECK-NEXT: [[J_1_LCSSA]] = phi i32 [ [[J_022]], [[FOR_COND1_PREHEADER]] ], [ [[ITR]], [[FOR_INC8_LOOPEXIT]] ] 516; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1 517; CHECK-NEXT: [[LFTR_WIDEIV25:%.*]] = trunc i64 [[INDVARS_IV_NEXT24]] to i32 518; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV25]] 519; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_END10_LOOPEXIT:%.*]], label [[FOR_COND1_PREHEADER]] 520; CHECK: for.end10.loopexit: 521; CHECK-NEXT: br label [[FOR_END10]] 522; CHECK: for.end10: 523; CHECK-NEXT: ret i32 undef 524; 525entry: 526 %cmp20 = icmp eq i32 %itr, 0 527 br i1 %cmp20, label %for.end10, label %for.cond1.preheader 528 529for.cond1.preheader: ; preds = %entry, %for.inc8 530 %indvars.iv23 = phi i64 [ %indvars.iv.next24, %for.inc8 ], [ 0, %entry ] 531 %j.022 = phi i32 [ %j.1.lcssa, %for.inc8 ], [ 0, %entry ] 532 %cmp218 = icmp ult i32 %j.022, %itr 533 br i1 %cmp218, label %for.body3.lr.ph, label %for.inc8 534 535for.body3.lr.ph: ; preds = %for.cond1.preheader 536 %arrayidx5 = getelementptr inbounds i32, ptr %var1, i64 %indvars.iv23 537 %0 = zext i32 %j.022 to i64 538 br label %for.body3 539 540for.body3: ; preds = %for.body3, %for.body3.lr.ph 541 %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %latch ] 542 %arrayidx = getelementptr inbounds i32, ptr %var2, i64 %indvars.iv 543 %1 = load i32, ptr %arrayidx, align 4 544 %2 = load i32, ptr %arrayidx5, align 4 545 %add = add nsw i32 %2, %1 546 store i32 %add, ptr %arrayidx5, align 4 547 %3 = load i32, ptr %arrayidx5, align 4 548 %4 = add nsw i32 %3, 1 549 %5 = icmp ugt i32 %3, 42 550 br i1 %5, label %cond_store, label %latch 551 552cond_store: 553 store i32 %4, ptr %arrayidx5, align 4 554 br label %latch 555 556latch: 557 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 558 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 559 %exitcond = icmp eq i32 %lftr.wideiv, %itr 560 br i1 %exitcond, label %for.inc8, label %for.body3 561 562for.inc8: ; preds = %for.body3, %for.cond1.preheader 563 %j.1.lcssa = phi i32 [ %j.022, %for.cond1.preheader ], [ %itr, %latch ] 564 %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1 565 %lftr.wideiv25 = trunc i64 %indvars.iv.next24 to i32 566 %exitcond26 = icmp eq i32 %lftr.wideiv25, %itr 567 br i1 %exitcond26, label %for.end10, label %for.cond1.preheader 568 569for.end10: ; preds = %for.inc8, %entry 570 ret i32 undef 571} 572 573; cannot vectorize loop with unsafe dependency between uniform load (%i10) and store 574; (%i12) to the same address 575; PR39653 576; Note: %i10 could be replaced by phi(%arg4, %i12), a potentially vectorizable 577; 1st-order-recurrence 578define void @unsafe_dep_uniform_load_store(i32 %arg, i32 %arg1, i64 %arg2, ptr %arg3, i32 %arg4, i64 %arg5) { 579; CHECK-LABEL: @unsafe_dep_uniform_load_store( 580; CHECK-NEXT: bb: 581; CHECK-NEXT: [[I6:%.*]] = getelementptr inbounds i16, ptr [[ARG3:%.*]], i64 [[ARG5:%.*]] 582; CHECK-NEXT: br label [[BB7:%.*]] 583; CHECK: bb7: 584; CHECK-NEXT: [[I121:%.*]] = phi i32 [ [[ARG4:%.*]], [[BB:%.*]] ], [ [[I12:%.*]], [[BB7]] ] 585; CHECK-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB]] ], [ [[I24:%.*]], [[BB7]] ] 586; CHECK-NEXT: [[I9:%.*]] = phi i32 [ [[ARG1:%.*]], [[BB]] ], [ [[I23:%.*]], [[BB7]] ] 587; CHECK-NEXT: [[I11:%.*]] = mul nsw i32 [[I9]], [[I121]] 588; CHECK-NEXT: [[I12]] = srem i32 [[I11]], 65536 589; CHECK-NEXT: [[I13:%.*]] = add nsw i32 [[I12]], [[I9]] 590; CHECK-NEXT: [[I14:%.*]] = trunc i32 [[I13]] to i16 591; CHECK-NEXT: [[I15:%.*]] = trunc i64 [[I8]] to i32 592; CHECK-NEXT: [[I16:%.*]] = add i32 [[ARG:%.*]], [[I15]] 593; CHECK-NEXT: [[I17:%.*]] = zext i32 [[I16]] to i64 594; CHECK-NEXT: [[I18:%.*]] = getelementptr inbounds nuw i16, ptr [[I6]], i64 [[I17]] 595; CHECK-NEXT: store i16 [[I14]], ptr [[I18]], align 2 596; CHECK-NEXT: [[I19:%.*]] = add i32 [[I13]], [[I9]] 597; CHECK-NEXT: [[I20:%.*]] = trunc i32 [[I19]] to i16 598; CHECK-NEXT: [[I21:%.*]] = and i16 [[I20]], 255 599; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds nuw i16, ptr [[ARG3]], i64 [[I17]] 600; CHECK-NEXT: store i16 [[I21]], ptr [[I22]], align 2 601; CHECK-NEXT: [[I23]] = add nsw i32 [[I9]], 1 602; CHECK-NEXT: [[I24]] = add nuw nsw i64 [[I8]], 1 603; CHECK-NEXT: [[I25:%.*]] = icmp eq i64 [[I24]], [[ARG2:%.*]] 604; CHECK-NEXT: br i1 [[I25]], label [[BB26:%.*]], label [[BB7]] 605; CHECK: bb26: 606; CHECK-NEXT: ret void 607; 608bb: 609 %i = alloca i32 610 store i32 %arg4, ptr %i 611 %i6 = getelementptr inbounds i16, ptr %arg3, i64 %arg5 612 br label %bb7 613 614bb7: 615 %i8 = phi i64 [ 0, %bb ], [ %i24, %bb7 ] 616 %i9 = phi i32 [ %arg1, %bb ], [ %i23, %bb7 ] 617 %i10 = load i32, ptr %i 618 %i11 = mul nsw i32 %i9, %i10 619 %i12 = srem i32 %i11, 65536 620 %i13 = add nsw i32 %i12, %i9 621 %i14 = trunc i32 %i13 to i16 622 %i15 = trunc i64 %i8 to i32 623 %i16 = add i32 %arg, %i15 624 %i17 = zext i32 %i16 to i64 625 %i18 = getelementptr inbounds i16, ptr %i6, i64 %i17 626 store i16 %i14, ptr %i18, align 2 627 %i19 = add i32 %i13, %i9 628 %i20 = trunc i32 %i19 to i16 629 %i21 = and i16 %i20, 255 630 %i22 = getelementptr inbounds i16, ptr %arg3, i64 %i17 631 store i16 %i21, ptr %i22, align 2 632 %i23 = add nsw i32 %i9, 1 633 %i24 = add nuw nsw i64 %i8, 1 634 %i25 = icmp eq i64 %i24, %arg2 635 store i32 %i12, ptr %i 636 br i1 %i25, label %bb26, label %bb7 637 638bb26: 639 ret void 640} 641 642; Make sure any check-not directives are not triggered by function declarations. 643