1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5 6; Check vectorization on an interleaved load group of factor 2 and an interleaved 7; store group of factor 2. 8 9; int AB[1024]; 10; int CD[1024]; 11; void test_array_load2_store2(int C, int D) { 12; for (int i = 0; i < 1024; i+=2) { 13; int A = AB[i]; 14; int B = AB[i+1]; 15; CD[i] = A + C; 16; CD[i+1] = B * D; 17; } 18; } 19 20 21@AB = common global [1024 x i32] zeroinitializer, align 4 22@CD = common global [1024 x i32] zeroinitializer, align 4 23 24define void @test_array_load2_store2(i32 %C, i32 %D) { 25; CHECK-LABEL: @test_array_load2_store2( 26; CHECK-NEXT: entry: 27; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 28; CHECK: vector.ph: 29; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i64 0 30; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 31; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[D:%.*]], i64 0 32; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer 33; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 34; CHECK: vector.body: 35; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 36; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 37; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]] 38; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 39; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 40; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 41; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] 42; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[STRIDED_VEC1]], [[BROADCAST_SPLAT3]] 43; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[OFFSET_IDX]] 44; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 45; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4 46; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 47; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 48; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 49; CHECK: middle.block: 50; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 51; CHECK: scalar.ph: 52; CHECK-NEXT: br label [[FOR_BODY:%.*]] 53; CHECK: for.body: 54; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] 55; CHECK: for.end: 56; CHECK-NEXT: ret void 57; 58entry: 59 br label %for.body 60 61for.body: ; preds = %for.body, %entry 62 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 63 %arrayidx0 = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 %indvars.iv 64 %tmp = load i32, ptr %arrayidx0, align 4 65 %tmp1 = or disjoint i64 %indvars.iv, 1 66 %arrayidx1 = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 %tmp1 67 %tmp2 = load i32, ptr %arrayidx1, align 4 68 %add = add nsw i32 %tmp, %C 69 %mul = mul nsw i32 %tmp2, %D 70 %arrayidx2 = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 %indvars.iv 71 store i32 %add, ptr %arrayidx2, align 4 72 %arrayidx3 = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 %tmp1 73 store i32 %mul, ptr %arrayidx3, align 4 74 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 75 %cmp = icmp slt i64 %indvars.iv.next, 1024 76 br i1 %cmp, label %for.body, label %for.end 77 78for.end: ; preds = %for.body 79 ret void 80} 81 82; int A[3072]; 83; struct ST S[1024]; 84; void test_struct_st3() { 85; int *ptr = A; 86; for (int i = 0; i < 1024; i++) { 87; int X1 = *ptr++; 88; int X2 = *ptr++; 89; int X3 = *ptr++; 90; T[i].x = X1 + 1; 91; T[i].y = X2 + 2; 92; T[i].z = X3 + 3; 93; } 94; } 95 96 97%struct.ST3 = type { i32, i32, i32 } 98@A = common global [3072 x i32] zeroinitializer, align 4 99@S = common global [1024 x %struct.ST3] zeroinitializer, align 4 100 101define void @test_struct_array_load3_store3() { 102; CHECK-LABEL: @test_struct_array_load3_store3( 103; CHECK-NEXT: entry: 104; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 105; CHECK: vector.ph: 106; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 107; CHECK: vector.body: 108; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 12 110; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr @A, i64 [[OFFSET_IDX]] 111; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[NEXT_GEP]], align 4 112; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 113; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 114; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 115; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) 116; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 [[INDEX]], i32 0 117; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], splat (i32 2) 118; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], splat (i32 3) 119; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 120; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 121; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 122; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[TMP3]], align 4 123; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 124; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 125; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 126; CHECK: middle.block: 127; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 128; CHECK: scalar.ph: 129; CHECK-NEXT: br label [[FOR_BODY:%.*]] 130; CHECK: for.body: 131; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 132; CHECK: for.end: 133; CHECK-NEXT: ret void 134; 135entry: 136 br label %for.body 137 138for.body: ; preds = %for.body, %entry 139 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 140 %ptr.016 = phi ptr [ @A, %entry ], [ %incdec.ptr2, %for.body ] 141 %incdec.ptr = getelementptr inbounds i32, ptr %ptr.016, i64 1 142 %tmp = load i32, ptr %ptr.016, align 4 143 %incdec.ptr1 = getelementptr inbounds i32, ptr %ptr.016, i64 2 144 %tmp1 = load i32, ptr %incdec.ptr, align 4 145 %incdec.ptr2 = getelementptr inbounds i32, ptr %ptr.016, i64 3 146 %tmp2 = load i32, ptr %incdec.ptr1, align 4 147 %add = add nsw i32 %tmp, 1 148 %x = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 %indvars.iv, i32 0 149 store i32 %add, ptr %x, align 4 150 %add3 = add nsw i32 %tmp1, 2 151 %y = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 %indvars.iv, i32 1 152 store i32 %add3, ptr %y, align 4 153 %add6 = add nsw i32 %tmp2, 3 154 %z = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 %indvars.iv, i32 2 155 store i32 %add6, ptr %z, align 4 156 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 157 %exitcond = icmp eq i64 %indvars.iv.next, 1024 158 br i1 %exitcond, label %for.end, label %for.body 159 160for.end: ; preds = %for.body 161 ret void 162} 163 164; Check vectorization on an interleaved load group of factor 4. 165 166; struct ST4{ 167; int x; 168; int y; 169; int z; 170; int w; 171; }; 172; int test_struct_load4(struct ST4 *S) { 173; int r = 0; 174; for (int i = 0; i < 1024; i++) { 175; r += S[i].x; 176; r -= S[i].y; 177; r += S[i].z; 178; r -= S[i].w; 179; } 180; return r; 181; } 182 183%struct.ST4 = type { i32, i32, i32, i32 } 184 185define i32 @test_struct_load4(ptr nocapture readonly %S) { 186; 187; CHECK-LABEL: @test_struct_load4( 188; CHECK-NEXT: entry: 189; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 190; CHECK: vector.ph: 191; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 192; CHECK: vector.body: 193; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 194; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 195; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], ptr [[S:%.*]], i64 [[INDEX]], i32 0 196; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP0]], align 4 197; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 198; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 199; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 200; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 201; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI]] 202; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[STRIDED_VEC2]] 203; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC3]] 204; CHECK-NEXT: [[TMP4]] = sub <4 x i32> [[TMP2]], [[TMP3]] 205; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 206; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 207; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 208; CHECK: middle.block: 209; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) 210; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 211; CHECK: scalar.ph: 212; CHECK-NEXT: br label [[FOR_BODY:%.*]] 213; CHECK: for.body: 214; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 215; CHECK: for.end: 216; CHECK-NEXT: [[SUB8_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 217; CHECK-NEXT: ret i32 [[SUB8_LCSSA]] 218; 219entry: 220 br label %for.body 221 222for.body: ; preds = %for.body, %entry 223 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 224 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ] 225 %x = getelementptr inbounds %struct.ST4, ptr %S, i64 %indvars.iv, i32 0 226 %tmp = load i32, ptr %x, align 4 227 %add = add nsw i32 %tmp, %r.022 228 %y = getelementptr inbounds %struct.ST4, ptr %S, i64 %indvars.iv, i32 1 229 %tmp1 = load i32, ptr %y, align 4 230 %sub = sub i32 %add, %tmp1 231 %z = getelementptr inbounds %struct.ST4, ptr %S, i64 %indvars.iv, i32 2 232 %tmp2 = load i32, ptr %z, align 4 233 %add5 = add nsw i32 %sub, %tmp2 234 %w = getelementptr inbounds %struct.ST4, ptr %S, i64 %indvars.iv, i32 3 235 %tmp3 = load i32, ptr %w, align 4 236 %sub8 = sub i32 %add5, %tmp3 237 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 238 %exitcond = icmp eq i64 %indvars.iv.next, 1024 239 br i1 %exitcond, label %for.end, label %for.body 240 241for.end: ; preds = %for.body 242 ret i32 %sub8 243} 244 245; Check vectorization on an interleaved store group of factor 4. 246 247; void test_struct_store4(int *A, struct ST4 *B) { 248; int *ptr = A; 249; for (int i = 0; i < 1024; i++) { 250; int X = *ptr++; 251; B[i].x = X + 1; 252; B[i].y = X * 2; 253; B[i].z = X + 3; 254; B[i].w = X + 4; 255; } 256; } 257 258 259define void @test_struct_store4(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) { 260; CHECK-LABEL: @test_struct_store4( 261; CHECK-NEXT: entry: 262; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 263; CHECK: vector.ph: 264; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 265; CHECK: vector.body: 266; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 267; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 268; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 269; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4 270; CHECK-NEXT: [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) 271; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], ptr [[B:%.*]], i64 [[INDEX]], i32 0 272; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1) 273; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 3) 274; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 4) 275; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 276; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 277; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 278; CHECK-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4 279; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 280; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 281; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 282; CHECK: middle.block: 283; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 284; CHECK: scalar.ph: 285; CHECK-NEXT: br label [[FOR_BODY:%.*]] 286; CHECK: for.cond.cleanup: 287; CHECK-NEXT: ret void 288; CHECK: for.body: 289; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 290; 291entry: 292 br label %for.body 293 294for.cond.cleanup: ; preds = %for.body 295 ret void 296 297for.body: ; preds = %for.body, %entry 298 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 299 %ptr.024 = phi ptr [ %A, %entry ], [ %incdec.ptr, %for.body ] 300 %incdec.ptr = getelementptr inbounds i32, ptr %ptr.024, i64 1 301 %tmp = load i32, ptr %ptr.024, align 4 302 %add = add nsw i32 %tmp, 1 303 %x = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 0 304 store i32 %add, ptr %x, align 4 305 %mul = shl nsw i32 %tmp, 1 306 %y = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 1 307 store i32 %mul, ptr %y, align 4 308 %add3 = add nsw i32 %tmp, 3 309 %z = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 2 310 store i32 %add3, ptr %z, align 4 311 %add6 = add nsw i32 %tmp, 4 312 %w = getelementptr inbounds %struct.ST4, ptr %B, i64 %indvars.iv, i32 3 313 store i32 %add6, ptr %w, align 4 314 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 315 %exitcond = icmp eq i64 %indvars.iv.next, 1024 316 br i1 %exitcond, label %for.cond.cleanup, label %for.body 317} 318 319; Check vectorization on a reverse interleaved load group of factor 2 and 320; a reverse interleaved store group of factor 2. 321 322; struct ST2 { 323; int x; 324; int y; 325; }; 326; 327; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) { 328; for (int i = 1023; i >= 0; i--) { 329; int a = A[i].x + i; // interleaved load of index 0 330; int b = A[i].y - i; // interleaved load of index 1 331; B[i].x = a; // interleaved store of index 0 332; B[i].y = b; // interleaved store of index 1 333; } 334; } 335 336 337%struct.ST2 = type { i32, i32 } 338 339define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) { 340; CHECK-LABEL: @test_reversed_load2_store2( 341; CHECK-NEXT: entry: 342; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 343; CHECK: vector.ph: 344; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 345; CHECK: vector.body: 346; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 347; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 348; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] 349; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]], i32 0 350; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -24 351; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 352; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 353; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 354; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 355; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 356; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]] 357; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]] 358; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]], i32 0 359; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 -24 360; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 361; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 362; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 363; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4 364; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 365; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) 366; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 367; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 368; CHECK: middle.block: 369; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 370; CHECK: scalar.ph: 371; CHECK-NEXT: br label [[FOR_BODY:%.*]] 372; CHECK: for.cond.cleanup: 373; CHECK-NEXT: ret void 374; CHECK: for.body: 375; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP11:![0-9]+]] 376; 377entry: 378 br label %for.body 379 380for.cond.cleanup: ; preds = %for.body 381 ret void 382 383for.body: ; preds = %for.body, %entry 384 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ] 385 %x = getelementptr inbounds %struct.ST2, ptr %A, i64 %indvars.iv, i32 0 386 %tmp = load i32, ptr %x, align 4 387 %tmp1 = trunc i64 %indvars.iv to i32 388 %add = add nsw i32 %tmp, %tmp1 389 %y = getelementptr inbounds %struct.ST2, ptr %A, i64 %indvars.iv, i32 1 390 %tmp2 = load i32, ptr %y, align 4 391 %sub = sub nsw i32 %tmp2, %tmp1 392 %x5 = getelementptr inbounds %struct.ST2, ptr %B, i64 %indvars.iv, i32 0 393 store i32 %add, ptr %x5, align 4 394 %y8 = getelementptr inbounds %struct.ST2, ptr %B, i64 %indvars.iv, i32 1 395 store i32 %sub, ptr %y8, align 4 396 %indvars.iv.next = add nsw i64 %indvars.iv, -1 397 %cmp = icmp sgt i64 %indvars.iv, 0 398 br i1 %cmp, label %for.body, label %for.cond.cleanup 399} 400 401; Check vectorization on an interleaved load group of factor 2 with 1 gap 402; (missing the load of odd elements). Because the vectorized loop would 403; speculatively access memory out-of-bounds, we must execute at least one 404; iteration of the scalar loop. 405 406; void even_load_static_tc(int *A, int *B) { 407; for (unsigned i = 0; i < 1024; i+=2) 408; B[i/2] = A[i] * 2; 409; } 410 411 412define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) { 413; CHECK-LABEL: @even_load_static_tc( 414; CHECK-NEXT: entry: 415; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 416; CHECK: vector.ph: 417; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 418; CHECK: vector.body: 419; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 420; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3 421; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] 422; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 423; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 424; CHECK-NEXT: [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) 425; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[INDEX]], 9223372036854775804 426; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP2]] 427; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4 428; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 429; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 508 430; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 431; CHECK: middle.block: 432; CHECK-NEXT: br label [[SCALAR_PH]] 433; CHECK: scalar.ph: 434; CHECK-NEXT: br label [[FOR_BODY:%.*]] 435; CHECK: for.cond.cleanup: 436; CHECK-NEXT: ret void 437; CHECK: for.body: 438; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1016, [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 439; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]] 440; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 441; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP]], 1 442; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 443; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] 444; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 445; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 446; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1022 447; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP13:![0-9]+]] 448; 449entry: 450 br label %for.body 451 452for.cond.cleanup: ; preds = %for.body 453 ret void 454 455for.body: ; preds = %for.body, %entry 456 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 457 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 458 %tmp = load i32, ptr %arrayidx, align 4 459 %mul = shl nsw i32 %tmp, 1 460 %tmp1 = lshr exact i64 %indvars.iv, 1 461 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %tmp1 462 store i32 %mul, ptr %arrayidx2, align 4 463 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 464 %cmp = icmp ult i64 %indvars.iv.next, 1024 465 br i1 %cmp, label %for.body, label %for.cond.cleanup 466} 467 468; Check vectorization on an interleaved load group of factor 2 with 1 gap 469; (missing the load of odd elements). Because the vectorized loop would 470; speculatively access memory out-of-bounds, we must execute at least one 471; iteration of the scalar loop. 472 473; void even_load_dynamic_tc(int *A, int *B, unsigned N) { 474; for (unsigned i = 0; i < N; i+=2) 475; B[i/2] = A[i] * 2; 476; } 477 478 479define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i64 %N) { 480; CHECK-LABEL: @even_load_dynamic_tc( 481; CHECK-NEXT: entry: 482; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 9 483; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 484; CHECK: vector.ph: 485; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 486; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 487; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1 488; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 3 489; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 490; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 4, i64 [[N_MOD_VF]] 491; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] 492; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 493; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 494; CHECK: vector.body: 495; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 496; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3 497; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]] 498; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 499; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 500; CHECK-NEXT: [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1) 501; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804 502; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[TMP7]] 503; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4 504; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 505; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 506; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 507; CHECK: middle.block: 508; CHECK-NEXT: br label [[SCALAR_PH]] 509; CHECK: scalar.ph: 510; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 511; CHECK-NEXT: br label [[FOR_BODY:%.*]] 512; CHECK: for.cond.cleanup: 513; CHECK-NEXT: ret void 514; CHECK: for.body: 515; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 516; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 517; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 518; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[TMP]], 1 519; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[INDVARS_IV]], 1 520; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[TMP1]] 521; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 522; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2 523; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]] 524; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]] 525; 526entry: 527 br label %for.body 528 529for.cond.cleanup: ; preds = %for.body 530 ret void 531 532for.body: ; preds = %for.body, %entry 533 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 534 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 535 %tmp = load i32, ptr %arrayidx, align 4 536 %mul = shl nsw i32 %tmp, 1 537 %tmp1 = lshr exact i64 %indvars.iv, 1 538 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %tmp1 539 store i32 %mul, ptr %arrayidx2, align 4 540 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 541 %cmp = icmp ult i64 %indvars.iv.next, %N 542 br i1 %cmp, label %for.body, label %for.cond.cleanup 543} 544 545; Check vectorization on a reverse interleaved load group of factor 2 with 1 546; gap and a reverse interleaved store group of factor 2. The interleaved load 547; group should be removed since it has a gap and is reverse. 548 549; struct pair { 550; int x; 551; int y; 552; }; 553; 554; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) { 555; for (int i = 1023; i >= 0; i--) { 556; int a = X + i; 557; int b = B[i].y - i; 558; A[i].x = a; 559; B[i].y = b; 560; } 561; } 562 563 564%pair = type { i64, i64 } 565define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture %P2, i64 %X) { 566; CHECK-LABEL: @load_gap_reverse( 567; CHECK-NEXT: entry: 568; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 569; CHECK: vector.ph: 570; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X:%.*]], i64 0 571; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer 572; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 573; CHECK: vector.body: 574; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 575; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1023, i64 1022, i64 1021, i64 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 576; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] 577; CHECK-NEXT: [[TMP0:%.*]] = sub i64 1022, [[INDEX]] 578; CHECK-NEXT: [[TMP1:%.*]] = sub i64 1021, [[INDEX]] 579; CHECK-NEXT: [[TMP2:%.*]] = sub i64 1020, [[INDEX]] 580; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], [[VEC_IND]] 581; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P1:%.*]], i64 [[OFFSET_IDX]], i32 0 582; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[TMP0]], i32 0 583; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[TMP1]], i32 0 584; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P1]], i64 [[TMP2]], i32 0 585; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2:%.*]], i64 [[OFFSET_IDX]], i32 1 586; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[TMP0]], i32 1 587; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[TMP1]], i32 1 588; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P2]], i64 [[TMP2]], i32 1 589; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP8]], align 8 590; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 591; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 592; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 593; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> poison, i64 [[TMP12]], i64 0 594; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i64 1 595; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i64 2 596; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i64 3 597; CHECK-NEXT: [[TMP20:%.*]] = sub nsw <4 x i64> [[TMP19]], [[VEC_IND]] 598; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0 599; CHECK-NEXT: store i64 [[TMP21]], ptr [[TMP4]], align 8 600; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1 601; CHECK-NEXT: store i64 [[TMP22]], ptr [[TMP5]], align 8 602; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2 603; CHECK-NEXT: store i64 [[TMP23]], ptr [[TMP6]], align 8 604; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3 605; CHECK-NEXT: store i64 [[TMP24]], ptr [[TMP7]], align 8 606; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[TMP20]], i64 0 607; CHECK-NEXT: store i64 [[TMP25]], ptr [[TMP8]], align 8 608; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP20]], i64 1 609; CHECK-NEXT: store i64 [[TMP26]], ptr [[TMP9]], align 8 610; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP20]], i64 2 611; CHECK-NEXT: store i64 [[TMP27]], ptr [[TMP10]], align 8 612; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3 613; CHECK-NEXT: store i64 [[TMP28]], ptr [[TMP11]], align 8 614; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 615; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) 616; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 617; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 618; CHECK: middle.block: 619; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] 620; CHECK: scalar.ph: 621; CHECK-NEXT: br label [[FOR_BODY:%.*]] 622; CHECK: for.body: 623; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]] 624; CHECK: for.exit: 625; CHECK-NEXT: ret void 626; 627entry: 628 br label %for.body 629 630for.body: 631 %i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ] 632 %0 = add nsw i64 %X, %i 633 %1 = getelementptr inbounds %pair, ptr %P1, i64 %i, i32 0 634 %2 = getelementptr inbounds %pair, ptr %P2, i64 %i, i32 1 635 %3 = load i64, ptr %2, align 8 636 %4 = sub nsw i64 %3, %i 637 store i64 %0, ptr %1, align 8 638 store i64 %4, ptr %2, align 8 639 %i.next = add nsw i64 %i, -1 640 %cond = icmp sgt i64 %i, 0 641 br i1 %cond, label %for.body, label %for.exit 642 643for.exit: 644 ret void 645} 646 647; Check vectorization on interleaved access groups identified from mixed 648; loads/stores. 649; void mixed_load2_store2(int *A, int *B) { 650; for (unsigned i = 0; i < 1024; i+=2) { 651; B[i] = A[i] * A[i+1]; 652; B[i+1] = A[i] + A[i+1]; 653; } 654; } 655 656 657define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias nocapture %B) { 658; CHECK-LABEL: @mixed_load2_store2( 659; CHECK-NEXT: entry: 660; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 661; CHECK: vector.ph: 662; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 663; CHECK: vector.body: 664; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 665; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 666; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 667; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 668; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 669; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 670; CHECK-NEXT: [[TMP2:%.*]] = mul nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]] 671; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]] 672; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 673; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 674; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC3]] 675; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 676; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4 677; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 678; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 679; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 680; CHECK: middle.block: 681; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 682; CHECK: scalar.ph: 683; CHECK-NEXT: br label [[FOR_BODY:%.*]] 684; CHECK: for.cond.cleanup: 685; CHECK-NEXT: ret void 686; CHECK: for.body: 687; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP19:![0-9]+]] 688; 689entry: 690 br label %for.body 691 692for.cond.cleanup: ; preds = %for.body 693 ret void 694 695for.body: ; preds = %for.body, %entry 696 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 697 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 698 %tmp = load i32, ptr %arrayidx, align 4 699 %tmp1 = or disjoint i64 %indvars.iv, 1 700 %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %tmp1 701 %tmp2 = load i32, ptr %arrayidx2, align 4 702 %mul = mul nsw i32 %tmp2, %tmp 703 %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 704 store i32 %mul, ptr %arrayidx4, align 4 705 %tmp3 = load i32, ptr %arrayidx, align 4 706 %tmp4 = load i32, ptr %arrayidx2, align 4 707 %add10 = add nsw i32 %tmp4, %tmp3 708 %arrayidx13 = getelementptr inbounds i32, ptr %B, i64 %tmp1 709 store i32 %add10, ptr %arrayidx13, align 4 710 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 711 %cmp = icmp ult i64 %indvars.iv.next, 1024 712 br i1 %cmp, label %for.body, label %for.cond.cleanup 713} 714 715; Check vectorization on interleaved access groups identified from mixed 716; loads/stores. 717; void mixed_load3_store3(int *A) { 718; for (unsigned i = 0; i < 1024; i++) { 719; *A++ += i; 720; *A++ += i; 721; *A++ += i; 722; } 723; } 724 725 726define void @mixed_load3_store3(ptr nocapture %A) { 727; CHECK-LABEL: @mixed_load3_store3( 728; CHECK-NEXT: entry: 729; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 730; CHECK: vector.ph: 731; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 732; CHECK: vector.body: 733; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 734; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 735; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 12 736; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 737; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[NEXT_GEP]], align 4 738; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 739; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 740; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 741; CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] 742; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_IND]] 743; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[VEC_IND]] 744; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 745; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 746; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 747; CHECK-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4 748; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 749; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 750; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 751; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 752; CHECK: middle.block: 753; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 754; CHECK: scalar.ph: 755; CHECK-NEXT: br label [[FOR_BODY:%.*]] 756; CHECK: for.cond.cleanup: 757; CHECK-NEXT: ret void 758; CHECK: for.body: 759; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 760; 761entry: 762 br label %for.body 763 764for.cond.cleanup: ; preds = %for.body 765 ret void 766 767for.body: ; preds = %for.body, %entry 768 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 769 %A.addr.012 = phi ptr [ %A, %entry ], [ %incdec.ptr3, %for.body ] 770 %incdec.ptr = getelementptr inbounds i32, ptr %A.addr.012, i64 1 771 %tmp = load i32, ptr %A.addr.012, align 4 772 %add = add i32 %tmp, %i.013 773 store i32 %add, ptr %A.addr.012, align 4 774 %incdec.ptr1 = getelementptr inbounds i32, ptr %A.addr.012, i64 2 775 %tmp1 = load i32, ptr %incdec.ptr, align 4 776 %add2 = add i32 %tmp1, %i.013 777 store i32 %add2, ptr %incdec.ptr, align 4 778 %incdec.ptr3 = getelementptr inbounds i32, ptr %A.addr.012, i64 3 779 %tmp2 = load i32, ptr %incdec.ptr1, align 4 780 %add4 = add i32 %tmp2, %i.013 781 store i32 %add4, ptr %incdec.ptr1, align 4 782 %inc = add nuw nsw i32 %i.013, 1 783 %exitcond = icmp eq i32 %inc, 1024 784 br i1 %exitcond, label %for.cond.cleanup, label %for.body 785} 786 787; Check vectorization on interleaved access groups with members having different 788; kinds of type. 789 790; struct IntFloat { 791; int a; 792; float b; 793; }; 794; 795; int SA; 796; float SB; 797; 798; void int_float_struct(struct IntFloat *A) { 799; int SumA; 800; float SumB; 801; for (unsigned i = 0; i < 1024; i++) { 802; SumA += A[i].a; 803; SumB += A[i].b; 804; } 805; SA = SumA; 806; SB = SumB; 807; } 808 809 810%struct.IntFloat = type { i32, float } 811 812@SA = common global i32 0, align 4 813@SB = common global float 0.000000e+00, align 4 814 815define void @int_float_struct(ptr nocapture readonly %A) #0 { 816; CHECK-LABEL: @int_float_struct( 817; CHECK-NEXT: entry: 818; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 819; CHECK: vector.ph: 820; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 821; CHECK: vector.body: 822; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 823; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] 824; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 undef, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] 825; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_INTFLOAT:%.*]], ptr [[A:%.*]], i64 [[INDEX]], i32 0 826; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 827; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 828; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 829; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[STRIDED_VEC2]] to <4 x float> 830; CHECK-NEXT: [[TMP2]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_PHI1]] 831; CHECK-NEXT: [[TMP3]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP1]] 832; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 833; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 834; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 835; CHECK: middle.block: 836; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]]) 837; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) 838; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] 839; CHECK: scalar.ph: 840; CHECK-NEXT: br label [[FOR_BODY:%.*]] 841; CHECK: for.cond.cleanup: 842; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 843; CHECK-NEXT: [[ADD3_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] 844; CHECK-NEXT: store i32 [[ADD_LCSSA]], ptr @SA, align 4 845; CHECK-NEXT: store float [[ADD3_LCSSA]], ptr @SB, align 4 846; CHECK-NEXT: ret void 847; CHECK: for.body: 848; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 849; 850entry: 851 br label %for.body 852 853for.cond.cleanup: ; preds = %for.body 854 store i32 %add, ptr @SA, align 4 855 store float %add3, ptr @SB, align 4 856 ret void 857 858for.body: ; preds = %for.body, %entry 859 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 860 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ] 861 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ] 862 %a = getelementptr inbounds %struct.IntFloat, ptr %A, i64 %indvars.iv, i32 0 863 %tmp = load i32, ptr %a, align 4 864 %add = add nsw i32 %tmp, %SumA.013 865 %b = getelementptr inbounds %struct.IntFloat, ptr %A, i64 %indvars.iv, i32 1 866 %tmp1 = load float, ptr %b, align 4 867 %add3 = fadd fast float %SumB.014, %tmp1 868 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 869 %exitcond = icmp eq i64 %indvars.iv.next, 1024 870 br i1 %exitcond, label %for.cond.cleanup, label %for.body 871} 872 873; Check vectorization of interleaved access groups in the presence of 874; dependences (PR27626). The following tests check that we don't reorder 875; dependent loads and stores when generating code for interleaved access 876; groups. Stores should be scalarized because the required code motion would 877; break dependences, and the remaining interleaved load groups should have 878; gaps. 879 880; PR27626_0: Ensure a strided store is not moved after a dependent (zero 881; distance) strided load. 882 883; void PR27626_0(struct pair *p, int z, int n) { 884; for (int i = 0; i < n; i++) { 885; p[i].x = z; 886; p[i].y = p[i].x; 887; } 888; } 889 890 891%pair.i32 = type { i32, i32 } 892define void @PR27626_0(ptr %p, i32 %z, i64 %n) { 893; CHECK-LABEL: @PR27626_0( 894; CHECK-NEXT: entry: 895; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 5 896; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 897; CHECK: vector.ph: 898; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 3 899; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 900; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 901; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] 902; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 903; CHECK: vector.body: 904; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 905; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 906; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 2 907; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 3 908; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 909; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 0 910; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 0 911; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP4]], i32 0 912; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 913; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1 914; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1 915; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP4]], i32 1 916; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP5]], align 4 917; CHECK-NEXT: store i32 [[Z]], ptr [[TMP6]], align 4 918; CHECK-NEXT: store i32 [[Z]], ptr [[TMP7]], align 4 919; CHECK-NEXT: store i32 [[Z]], ptr [[TMP8]], align 4 920; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 921; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 0 922; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4 923; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 2 924; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP10]], align 4 925; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 4 926; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP11]], align 4 927; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 6 928; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4 929; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 930; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 931; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 932; CHECK: middle.block: 933; CHECK-NEXT: br label [[SCALAR_PH]] 934; CHECK: scalar.ph: 935; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 936; CHECK-NEXT: br label [[FOR_BODY:%.*]] 937; CHECK: for.body: 938; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 939; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 940; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 941; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4 942; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_Y]], align 4 943; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 944; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 945; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]] 946; CHECK: for.end: 947; CHECK-NEXT: ret void 948; 949entry: 950 br label %for.body 951 952for.body: 953 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 954 %p_i.x = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 0 955 %p_i.y = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 1 956 store i32 %z, ptr %p_i.x, align 4 957 %0 = load i32, ptr %p_i.x, align 4 958 store i32 %0, ptr %p_i.y, align 4 959 %i.next = add nuw nsw i64 %i, 1 960 %cond = icmp slt i64 %i.next, %n 961 br i1 %cond, label %for.body, label %for.end 962 963for.end: 964 ret void 965} 966 967; PR27626_1: Ensure a strided load is not moved before a dependent (zero 968; distance) strided store. 969 970; void PR27626_1(struct pair *p, int n) { 971; int s = 0; 972; for (int i = 0; i < n; i++) { 973; p[i].y = p[i].x; 974; s += p[i].y 975; } 976; } 977 978 979define i32 @PR27626_1(ptr %p, i64 %n) { 980; CHECK-LABEL: @PR27626_1( 981; CHECK-NEXT: entry: 982; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 5 983; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 984; CHECK: vector.ph: 985; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 3 986; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 987; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 988; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] 989; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 990; CHECK: vector.body: 991; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 992; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 993; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 994; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 2 995; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 3 996; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 997; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 998; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1 999; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1 1000; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP4]], i32 1 1001; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 1002; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 0 1003; CHECK-NEXT: store i32 [[TMP10]], ptr [[TMP6]], align 4 1004; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 2 1005; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP7]], align 4 1006; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 4 1007; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP8]], align 4 1008; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 6 1009; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4 1010; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 1011; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1012; CHECK-NEXT: [[TMP14]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] 1013; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1014; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1015; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1016; CHECK: middle.block: 1017; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]]) 1018; CHECK-NEXT: br label [[SCALAR_PH]] 1019; CHECK: scalar.ph: 1020; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1021; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1022; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1023; CHECK: for.body: 1024; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1025; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP18:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 1026; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 1027; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 1028; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[P_I_X]], align 4 1029; CHECK-NEXT: store i32 [[TMP17]], ptr [[P_I_Y]], align 4 1030; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP17]], [[S]] 1031; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1032; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1033; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]] 1034; CHECK: for.end: 1035; CHECK-NEXT: ret i32 [[TMP18]] 1036; 1037entry: 1038 br label %for.body 1039 1040for.body: 1041 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1042 %s = phi i32 [ %2, %for.body ], [ 0, %entry ] 1043 %p_i.x = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 0 1044 %p_i.y = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 1 1045 %0 = load i32, ptr %p_i.x, align 4 1046 store i32 %0, ptr %p_i.y, align 4 1047 %1 = load i32, ptr %p_i.y, align 4 1048 %2 = add nsw i32 %1, %s 1049 %i.next = add nuw nsw i64 %i, 1 1050 %cond = icmp slt i64 %i.next, %n 1051 br i1 %cond, label %for.body, label %for.end 1052 1053for.end: 1054 %3 = phi i32 [ %2, %for.body ] 1055 ret i32 %3 1056} 1057 1058; PR27626_2: Ensure a strided store is not moved after a dependent (negative 1059; distance) strided load. 1060 1061; void PR27626_2(struct pair *p, int z, int n) { 1062; for (int i = 0; i < n; i++) { 1063; p[i].x = z; 1064; p[i].y = p[i - 1].x; 1065; } 1066; } 1067 1068 1069define void @PR27626_2(ptr %p, i64 %n, i32 %z) { 1070; CHECK-LABEL: @PR27626_2( 1071; CHECK-NEXT: entry: 1072; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 5 1073; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1074; CHECK: vector.ph: 1075; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 3 1076; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1077; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 1078; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] 1079; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1080; CHECK: vector.body: 1081; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1082; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 1083; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 2 1084; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 3 1085; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 1086; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 0 1087; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 0 1088; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP4]], i32 0 1089; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 -8 1090; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 1091; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP2]], i32 1 1092; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP3]], i32 1 1093; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP4]], i32 1 1094; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP5]], align 4 1095; CHECK-NEXT: store i32 [[Z]], ptr [[TMP6]], align 4 1096; CHECK-NEXT: store i32 [[Z]], ptr [[TMP7]], align 4 1097; CHECK-NEXT: store i32 [[Z]], ptr [[TMP8]], align 4 1098; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4 1099; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 0 1100; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP10]], align 4 1101; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 2 1102; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP11]], align 4 1103; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 4 1104; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4 1105; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 6 1106; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4 1107; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1108; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1109; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1110; CHECK: middle.block: 1111; CHECK-NEXT: br label [[SCALAR_PH]] 1112; CHECK: scalar.ph: 1113; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1114; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1115; CHECK: for.body: 1116; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1117; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 1118; CHECK-NEXT: [[P_I_MINUS_1_X:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 -8 1119; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 1120; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_X]], align 4 1121; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[P_I_MINUS_1_X]], align 4 1122; CHECK-NEXT: store i32 [[TMP19]], ptr [[P_I_Y]], align 4 1123; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1124; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1125; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]] 1126; CHECK: for.end: 1127; CHECK-NEXT: ret void 1128; 1129entry: 1130 br label %for.body 1131 1132for.body: 1133 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1134 %i_minus_1 = add nuw nsw i64 %i, -1 1135 %p_i.x = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 0 1136 %p_i_minus_1.x = getelementptr inbounds %pair.i32, ptr %p, i64 %i_minus_1, i32 0 1137 %p_i.y = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 1 1138 store i32 %z, ptr %p_i.x, align 4 1139 %0 = load i32, ptr %p_i_minus_1.x, align 4 1140 store i32 %0, ptr %p_i.y, align 4 1141 %i.next = add nuw nsw i64 %i, 1 1142 %cond = icmp slt i64 %i.next, %n 1143 br i1 %cond, label %for.body, label %for.end 1144 1145for.end: 1146 ret void 1147} 1148 1149; PR27626_3: Ensure a strided load is not moved before a dependent (negative 1150; distance) strided store. 1151 1152; void PR27626_3(struct pair *p, int z, int n) { 1153; for (int i = 0; i < n; i++) { 1154; p[i + 1].y = p[i].x; 1155; s += p[i].y; 1156; } 1157; } 1158 1159 1160define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) { 1161; CHECK-LABEL: @PR27626_3( 1162; CHECK-NEXT: entry: 1163; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 5 1164; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1165; CHECK: vector.ph: 1166; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 3 1167; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 1168; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] 1169; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] 1170; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1171; CHECK: vector.body: 1172; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1173; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1174; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] 1175; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) 1176; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 1177; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1 1178; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 1179; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP5]], i32 1 1180; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1 1181; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP7]], i32 1 1182; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2 1183; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP9]], i32 1 1184; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3 1185; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1 1186; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 1187; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 0 1188; CHECK-NEXT: store i32 [[TMP13]], ptr [[TMP6]], align 4 1189; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 2 1190; CHECK-NEXT: store i32 [[TMP14]], ptr [[TMP8]], align 4 1191; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 4 1192; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP10]], align 4 1193; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[WIDE_VEC]], i64 6 1194; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4 1195; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 1196; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1197; CHECK-NEXT: [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]] 1198; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1199; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1200; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1201; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1202; CHECK: middle.block: 1203; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]]) 1204; CHECK-NEXT: br label [[SCALAR_PH]] 1205; CHECK: scalar.ph: 1206; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1207; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1208; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1209; CHECK: for.body: 1210; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1211; CHECK-NEXT: [[S:%.*]] = phi i32 [ [[TMP22:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 1212; CHECK-NEXT: [[I_PLUS_1:%.*]] = add nuw nsw i64 [[I]], 1 1213; CHECK-NEXT: [[P_I_X:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 0 1214; CHECK-NEXT: [[P_I_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I]], i32 1 1215; CHECK-NEXT: [[P_I_PLUS_1_Y:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[I_PLUS_1]], i32 1 1216; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[P_I_X]], align 4 1217; CHECK-NEXT: store i32 [[TMP20]], ptr [[P_I_PLUS_1_Y]], align 4 1218; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[P_I_Y]], align 4 1219; CHECK-NEXT: [[TMP22]] = add nsw i32 [[TMP21]], [[S]] 1220; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1221; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1222; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP31:![0-9]+]] 1223; CHECK: for.end: 1224; CHECK-NEXT: ret i32 [[TMP22]] 1225; 1226entry: 1227 br label %for.body 1228 1229for.body: 1230 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1231 %s = phi i32 [ %2, %for.body ], [ 0, %entry ] 1232 %i_plus_1 = add nuw nsw i64 %i, 1 1233 %p_i.x = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 0 1234 %p_i.y = getelementptr inbounds %pair.i32, ptr %p, i64 %i, i32 1 1235 %p_i_plus_1.y = getelementptr inbounds %pair.i32, ptr %p, i64 %i_plus_1, i32 1 1236 %0 = load i32, ptr %p_i.x, align 4 1237 store i32 %0, ptr %p_i_plus_1.y, align 4 1238 %1 = load i32, ptr %p_i.y, align 4 1239 %2 = add nsw i32 %1, %s 1240 %i.next = add nuw nsw i64 %i, 1 1241 %cond = icmp slt i64 %i.next, %n 1242 br i1 %cond, label %for.body, label %for.end 1243 1244for.end: 1245 %3 = phi i32 [ %2, %for.body ] 1246 ret i32 %3 1247} 1248 1249; PR27626_4: Ensure we form an interleaved group for strided stores in the 1250; presence of a write-after-write dependence. We create a group for 1251; (2) and (3) while excluding (1). 1252 1253; void PR27626_4(int *a, int x, int y, int z, int n) { 1254; for (int i = 0; i < n; i += 2) { 1255; a[i] = x; // (1) 1256; a[i] = y; // (2) 1257; a[i + 1] = z; // (3) 1258; } 1259; } 1260 1261 1262define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { 1263; CHECK-LABEL: @PR27626_4( 1264; CHECK-NEXT: entry: 1265; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) 1266; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 1267; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 1268; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1269; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 7 1270; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1271; CHECK: vector.ph: 1272; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 1273; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 1274; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 1275; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1276; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Z:%.*]], i64 0 1277; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer 1278; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1279; CHECK: vector.body: 1280; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1281; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 1282; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 2 1283; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[OFFSET_IDX]], 4 1284; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 6 1285; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 1286; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] 1287; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] 1288; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]] 1289; CHECK-NEXT: store i32 [[X:%.*]], ptr [[TMP7]], align 4 1290; CHECK-NEXT: store i32 [[X]], ptr [[TMP8]], align 4 1291; CHECK-NEXT: store i32 [[X]], ptr [[TMP9]], align 4 1292; CHECK-NEXT: store i32 [[X]], ptr [[TMP10]], align 4 1293; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 1294; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4 1295; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1296; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1297; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1298; CHECK: middle.block: 1299; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1300; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1301; CHECK: scalar.ph: 1302; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1303; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1304; CHECK: for.body: 1305; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1306; CHECK-NEXT: [[I_PLUS_1:%.*]] = or disjoint i64 [[I]], 1 1307; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] 1308; CHECK-NEXT: [[A_I_PLUS_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_PLUS_1]] 1309; CHECK-NEXT: store i32 [[Y]], ptr [[A_I]], align 4 1310; CHECK-NEXT: store i32 [[Z]], ptr [[A_I_PLUS_1]], align 4 1311; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 1312; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1313; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP33:![0-9]+]] 1314; CHECK: for.end: 1315; CHECK-NEXT: ret void 1316; 1317entry: 1318 br label %for.body 1319 1320for.body: 1321 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 1322 %i_plus_1 = add i64 %i, 1 1323 %a_i = getelementptr inbounds i32, ptr %a, i64 %i 1324 %a_i_plus_1 = getelementptr inbounds i32, ptr %a, i64 %i_plus_1 1325 store i32 %x, ptr %a_i, align 4 1326 store i32 %y, ptr %a_i, align 4 1327 store i32 %z, ptr %a_i_plus_1, align 4 1328 %i.next = add nuw nsw i64 %i, 2 1329 %cond = icmp slt i64 %i.next, %n 1330 br i1 %cond, label %for.body, label %for.end 1331 1332for.end: 1333 ret void 1334} 1335 1336; PR27626_5: Ensure we do not form an interleaved group for strided stores in 1337; the presence of a write-after-write dependence. 1338 1339; void PR27626_5(int *a, int x, int y, int z, int n) { 1340; for (int i = 3; i < n; i += 2) { 1341; a[i - 1] = x; 1342; a[i - 3] = y; 1343; a[i] = z; 1344; } 1345; } 1346 1347 1348define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) { 1349; CHECK-LABEL: @PR27626_5( 1350; CHECK-NEXT: entry: 1351; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5) 1352; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4 1353; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 1354; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 1355; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 10 1356; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1357; CHECK: vector.ph: 1358; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 1359; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1 1360; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP3]], 3 1361; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1362; CHECK: vector.body: 1363; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1364; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 5, i64 7, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1365; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1 1366; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i64 [[TMP4]], 3 1367; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[TMP4]], 5 1368; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[TMP4]], 7 1369; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1) 1370; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3) 1371; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 1372; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]] 1373; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] 1374; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP4]] 1375; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 36 1376; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i64 0 1377; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] 1378; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP7]], i64 1 1379; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]] 1380; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP7]], i64 2 1381; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]] 1382; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP7]], i64 3 1383; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]] 1384; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP8]], i64 0 1385; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] 1386; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP8]], i64 1 1387; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP24]] 1388; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP8]], i64 2 1389; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]] 1390; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP8]], i64 3 1391; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP28]] 1392; CHECK-NEXT: store i32 [[X:%.*]], ptr [[TMP15]], align 4 1393; CHECK-NEXT: store i32 [[X]], ptr [[TMP17]], align 4 1394; CHECK-NEXT: store i32 [[X]], ptr [[TMP19]], align 4 1395; CHECK-NEXT: store i32 [[X]], ptr [[TMP21]], align 4 1396; CHECK-NEXT: store i32 [[Y:%.*]], ptr [[TMP23]], align 4 1397; CHECK-NEXT: store i32 [[Y]], ptr [[TMP25]], align 4 1398; CHECK-NEXT: store i32 [[Y]], ptr [[TMP27]], align 4 1399; CHECK-NEXT: store i32 [[Y]], ptr [[TMP29]], align 4 1400; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP9]], align 4 1401; CHECK-NEXT: store i32 [[Z]], ptr [[TMP10]], align 4 1402; CHECK-NEXT: store i32 [[Z]], ptr [[TMP11]], align 4 1403; CHECK-NEXT: store i32 [[Z]], ptr [[TMP13]], align 4 1404; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1405; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8) 1406; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1407; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] 1408; CHECK: middle.block: 1409; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 1410; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1411; CHECK: scalar.ph: 1412; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ] 1413; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1414; CHECK: for.body: 1415; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1416; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] 1417; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] 1418; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i8, ptr [[TMP31]], i64 -4 1419; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]] 1420; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i8, ptr [[TMP32]], i64 -12 1421; CHECK-NEXT: store i32 [[X]], ptr [[A_I_MINUS_1]], align 4 1422; CHECK-NEXT: store i32 [[Y]], ptr [[A_I_MINUS_3]], align 4 1423; CHECK-NEXT: store i32 [[Z]], ptr [[A_I]], align 4 1424; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 1425; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1426; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP35:![0-9]+]] 1427; CHECK: for.end: 1428; CHECK-NEXT: ret void 1429; 1430entry: 1431 br label %for.body 1432 1433for.body: 1434 %i = phi i64 [ %i.next, %for.body ], [ 3, %entry ] 1435 %i_minus_1 = sub i64 %i, 1 1436 %i_minus_3 = sub i64 %i_minus_1, 2 1437 %a_i = getelementptr inbounds i32, ptr %a, i64 %i 1438 %a_i_minus_1 = getelementptr inbounds i32, ptr %a, i64 %i_minus_1 1439 %a_i_minus_3 = getelementptr inbounds i32, ptr %a, i64 %i_minus_3 1440 store i32 %x, ptr %a_i_minus_1, align 4 1441 store i32 %y, ptr %a_i_minus_3, align 4 1442 store i32 %z, ptr %a_i, align 4 1443 %i.next = add nuw nsw i64 %i, 2 1444 %cond = icmp slt i64 %i.next, %n 1445 br i1 %cond, label %for.body, label %for.end 1446 1447for.end: 1448 ret void 1449} 1450 1451; PR34743: Ensure that a cast which needs to sink after a load that belongs to 1452; an interleaved group, indeeded gets sunk. 1453 1454; void PR34743(short *a, int *b, int n) { 1455; for (int i = 0, iv = 0; iv < n; i++, iv += 2) { 1456; b[i] = a[iv] * a[iv+1] * a[iv+2]; 1457; } 1458; } 1459 1460 1461define void @PR34743(ptr %a, ptr %b, i64 %n) { 1462; CHECK-LABEL: @PR34743( 1463; CHECK-NEXT: entry: 1464; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, ptr [[A:%.*]], align 2 1465; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N:%.*]], 1 1466; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[TMP0]], 1 1467; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6 1468; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] 1469; CHECK: vector.memcheck: 1470; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[N]], 1 1471; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -4 1472; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP3]] 1473; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP4]], i64 4 1474; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr nuw i8, ptr [[A]], i64 2 1475; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] 1476; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[TMP5]], i64 6 1477; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP2]] 1478; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]] 1479; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] 1480; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] 1481; CHECK: vector.ph: 1482; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], -4 1483; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 1484; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 1485; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1486; CHECK: vector.body: 1487; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1488; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STRIDED_VEC4:%.*]], [[VECTOR_BODY]] ] 1489; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 1490; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1 1491; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP6]] 1492; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP7]], align 4 1493; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1494; CHECK-NEXT: [[STRIDED_VEC4]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1495; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 1496; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32> 1497; CHECK-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[STRIDED_VEC]] to <4 x i32> 1498; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[STRIDED_VEC4]] to <4 x i32> 1499; CHECK-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP10]] 1500; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP11]] 1501; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 1502; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP14]], align 4, !alias.scope [[META36:![0-9]+]], !noalias [[META39:![0-9]+]] 1503; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1504; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1505; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] 1506; CHECK: middle.block: 1507; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i16> [[WIDE_VEC]], i64 7 1508; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] 1509; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] 1510; CHECK: scalar.ph: 1511; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] 1512; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] 1513; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] 1514; CHECK-NEXT: br label [[LOOP:%.*]] 1515; CHECK: loop: 1516; CHECK-NEXT: [[TMP16:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] 1517; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV2:%.*]], [[LOOP]] ] 1518; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[I1:%.*]], [[LOOP]] ] 1519; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 1520; CHECK-NEXT: [[I1]] = add nuw nsw i64 [[I]], 1 1521; CHECK-NEXT: [[IV1:%.*]] = or disjoint i64 [[IV]], 1 1522; CHECK-NEXT: [[IV2]] = add nuw nsw i64 [[IV]], 2 1523; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV1]] 1524; CHECK-NEXT: [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 4 1525; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[LOAD1]] to i32 1526; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[IV2]] 1527; CHECK-NEXT: [[LOAD2]] = load i16, ptr [[GEP2]], align 4 1528; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[LOAD2]] to i32 1529; CHECK-NEXT: [[MUL01:%.*]] = mul nsw i32 [[CONV]], [[CONV1]] 1530; CHECK-NEXT: [[MUL012:%.*]] = mul nsw i32 [[MUL01]], [[CONV2]] 1531; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]] 1532; CHECK-NEXT: store i32 [[MUL012]], ptr [[ARRAYIDX5]], align 4 1533; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], [[N]] 1534; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP42:![0-9]+]] 1535; CHECK: end: 1536; CHECK-NEXT: ret void 1537; 1538entry: 1539 %.pre = load i16, ptr %a 1540 br label %loop 1541 1542loop: 1543 %0 = phi i16 [ %.pre, %entry ], [ %load2, %loop ] 1544 %iv = phi i64 [ 0, %entry ], [ %iv2, %loop ] 1545 %i = phi i64 [ 0, %entry ], [ %i1, %loop ] 1546 %conv = sext i16 %0 to i32 1547 %i1 = add nuw nsw i64 %i, 1 1548 %iv1 = add nuw nsw i64 %iv, 1 1549 %iv2 = add nuw nsw i64 %iv, 2 1550 %gep1 = getelementptr inbounds i16, ptr %a, i64 %iv1 1551 %load1 = load i16, ptr %gep1, align 4 1552 %conv1 = sext i16 %load1 to i32 1553 %gep2 = getelementptr inbounds i16, ptr %a, i64 %iv2 1554 %load2 = load i16, ptr %gep2, align 4 1555 %conv2 = sext i16 %load2 to i32 1556 %mul01 = mul nsw i32 %conv, %conv1 1557 %mul012 = mul nsw i32 %mul01, %conv2 1558 %arrayidx5 = getelementptr inbounds i32, ptr %b, i64 %i 1559 store i32 %mul012, ptr %arrayidx5 1560 %exitcond = icmp eq i64 %iv, %n 1561 br i1 %exitcond, label %end, label %loop 1562 1563end: 1564 ret void 1565} 1566 1567attributes #0 = { "unsafe-fp-math"="true" } 1568