1; extern int arr[8][8]; 2; extern int arr2[8]; 3; 4; void foo(int n) 5; { 6; int i1, i2; 7; 8; #pragma clang loop vectorize(enable) vectorize_width(4) 9; for (i1 = 0; i1 < 8; i1++) { 10; arr2[i1] = i1; 11; for (i2 = 0; i2 < 8; i2++) 12; arr[i2][i1] = i1 + n; 13; } 14; } 15; 16; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path -verify-loop-info -verify-dom-info < %s | FileCheck %s 17; CHECK-LABEL: vector.ph: 18; CHECK: %[[SplatVal:.*]] = insertelement <4 x i32> poison, i32 %n, i64 0 19; CHECK: %[[Splat:.*]] = shufflevector <4 x i32> %[[SplatVal]], <4 x i32> poison, <4 x i32> zeroinitializer 20 21; CHECK-LABEL: vector.body: 22; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ] 23; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ] 24; CHECK: %[[AAddr:.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> %[[VecInd]] 25; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> 26; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[VecIndTr]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true)) 27; CHECK: %[[VecIndTr2:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32> 28; CHECK: %[[StoreVal:.*]] = add nsw <4 x i32> %[[VecIndTr2]], %[[Splat]] 29; CHECK: br label %[[InnerLoop:.+]] 30 31; CHECK: [[InnerLoop]]: 32; CHECK: %[[InnerPhi:.*]] = phi <4 x i64> [ zeroinitializer, %vector.body ], [ %[[InnerPhiNext:.*]], %[[InnerLoop]] ] 33; CHECK: %[[AAddr2:.*]] = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, <4 x i64> %[[InnerPhi]], <4 x i64> %[[VecInd]] 34; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StoreVal]], <4 x ptr> %[[AAddr2]], i32 4, <4 x i1> splat (i1 true)) 35; CHECK: %[[InnerPhiNext]] = add nuw nsw <4 x i64> %[[InnerPhi]], splat (i64 1) 36; CHECK: %[[VecCond:.*]] = icmp eq <4 x i64> %[[InnerPhiNext]], splat (i64 8) 37; CHECK: %[[InnerCond:.*]] = extractelement <4 x i1> %[[VecCond]], i32 0 38; CHECK: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] 39 40; CHECK: [[ForInc]]: 41; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4 42; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4) 43; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], 8 44; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body 45 46@arr2 = external global [8 x i32], align 16 47@arr = external global [8 x [8 x i32]], align 16 48 49; Function Attrs: norecurse nounwind uwtable 50define void @foo(i32 %n) { 51entry: 52 br label %for.body 53 54for.body: ; preds = %for.inc8, %entry 55 %indvars.iv21 = phi i64 [ 0, %entry ], [ %indvars.iv.next22, %for.inc8 ] 56 %arrayidx = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, i64 %indvars.iv21 57 %0 = trunc i64 %indvars.iv21 to i32 58 store i32 %0, ptr %arrayidx, align 4 59 %1 = trunc i64 %indvars.iv21 to i32 60 %add = add nsw i32 %1, %n 61 br label %for.body3 62 63for.body3: ; preds = %for.body3, %for.body 64 %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] 65 %arrayidx7 = getelementptr inbounds [8 x [8 x i32]], ptr @arr, i64 0, i64 %indvars.iv, i64 %indvars.iv21 66 store i32 %add, ptr %arrayidx7, align 4 67 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 68 %exitcond = icmp eq i64 %indvars.iv.next, 8 69 br i1 %exitcond, label %for.inc8, label %for.body3 70 71for.inc8: ; preds = %for.body3 72 %indvars.iv.next22 = add nuw nsw i64 %indvars.iv21, 1 73 %exitcond23 = icmp eq i64 %indvars.iv.next22, 8 74 br i1 %exitcond23, label %for.end10, label %for.body, !llvm.loop !1 75 76for.end10: ; preds = %for.inc8 77 ret void 78} 79 80!1 = distinct !{!1, !2, !3} 81!2 = !{!"llvm.loop.vectorize.width", i32 4} 82!3 = !{!"llvm.loop.vectorize.enable", i1 true} 83