1; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute -enable-loop-distribute -verify-loop-info -verify-dom-info -S \ 2; RUN: < %s | FileCheck %s 3 4; RUN: opt -aa-pipeline=basic-aa -passes='loop-distribute,print<access-info>' -enable-loop-distribute \ 5; RUN: -verify-loop-info -verify-dom-info -disable-output < %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS 6 7; RUN: opt -aa-pipeline=basic-aa -passes=loop-distribute,loop-vectorize -enable-loop-distribute -force-vector-width=4 -S \ 8; RUN: < %s | FileCheck %s --check-prefix=VECTORIZE 9 10; We should distribute this loop into a safe (2nd statement) and unsafe loop 11; (1st statement): 12; for (i = 0; i < n; i++) { 13; A[i + 1] = A[i] * B[i]; 14; ======================= 15; C[i] = D[i] * E[i]; 16; } 17 18; CHECK-LABEL: @f( 19define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr noalias %e) { 20entry: 21 br label %for.body 22 23; Verify the two distributed loops. 24 25; CHECK: entry.split.ldist1: 26; CHECK: br label %for.body.ldist1 27; CHECK: for.body.ldist1: 28; CHECK: %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1 29; CHECK: br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1 30 31; CHECK: entry.split: 32; CHECK: br label %for.body 33; CHECK: for.body: 34; CHECK: %mulC = mul i32 %loadD, %loadE 35; CHECK: for.end: 36 37 38; ANALYSIS: for.body.ldist1: 39; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop 40; ANALYSIS: for.body: 41; ANALYSIS-NEXT: Memory dependences are safe{{$}} 42 43 44; VECTORIZE: mul <4 x i32> 45 46for.body: ; preds = %for.body, %entry 47 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 48 49 %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind 50 %loadA = load i32, ptr %arrayidxA, align 4 51 52 %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind 53 %loadB = load i32, ptr %arrayidxB, align 4 54 55 %mulA = mul i32 %loadB, %loadA 56 57 %add = add nuw nsw i64 %ind, 1 58 %arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add 59 store i32 %mulA, ptr %arrayidxA_plus_4, align 4 60 61 %arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind 62 %loadD = load i32, ptr %arrayidxD, align 4 63 64 %arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind 65 %loadE = load i32, ptr %arrayidxE, align 4 66 67 %mulC = mul i32 %loadD, %loadE 68 69 %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind 70 store i32 %mulC, ptr %arrayidxC, align 4 71 72 %exitcond = icmp eq i64 %add, 20 73 br i1 %exitcond, label %for.end, label %for.body 74 75for.end: ; preds = %for.body 76 ret void 77} 78 79declare i32 @llvm.convergent(i32) #0 80 81; It is OK to distribute with a convergent operation, since in each 82; new loop the convergent operation has the ssame control dependency. 83; CHECK-LABEL: @f_with_convergent( 84define void @f_with_convergent(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr noalias %e) { 85entry: 86 br label %for.body 87 88; Verify the two distributed loops. 89 90; CHECK: entry.split.ldist1: 91; CHECK: br label %for.body.ldist1 92; CHECK: for.body.ldist1: 93; CHECK: %mulA.ldist1 = mul i32 %loadB.ldist1, %loadA.ldist1 94; CHECK: br i1 %exitcond.ldist1, label %entry.split, label %for.body.ldist1 95 96; CHECK: entry.split: 97; CHECK: br label %for.body 98; CHECK: for.body: 99; CHECK: %convergentD = call i32 @llvm.convergent(i32 %loadD) 100; CHECK: %mulC = mul i32 %convergentD, %loadE 101; CHECK: for.end: 102 103 104; ANALYSIS: for.body.ldist1: 105; ANALYSIS-NEXT: Report: unsafe dependent memory operations in loop 106; ANALYSIS: for.body: 107; ANALYSIS-NEXT: Has convergent operation in loop 108; ANALYSIS-NEXT: Report: cannot add control dependency to convergent operation 109 110; convergent instruction happens to block vectorization 111; VECTORIZE: call i32 @llvm.convergent 112; VECTORIZE: mul i32 113 114for.body: ; preds = %for.body, %entry 115 %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] 116 117 %arrayidxA = getelementptr inbounds i32, ptr %a, i64 %ind 118 %loadA = load i32, ptr %arrayidxA, align 4 119 120 %arrayidxB = getelementptr inbounds i32, ptr %b, i64 %ind 121 %loadB = load i32, ptr %arrayidxB, align 4 122 123 %mulA = mul i32 %loadB, %loadA 124 125 %add = add nuw nsw i64 %ind, 1 126 %arrayidxA_plus_4 = getelementptr inbounds i32, ptr %a, i64 %add 127 store i32 %mulA, ptr %arrayidxA_plus_4, align 4 128 129 %arrayidxD = getelementptr inbounds i32, ptr %d, i64 %ind 130 %loadD = load i32, ptr %arrayidxD, align 4 131 132 %arrayidxE = getelementptr inbounds i32, ptr %e, i64 %ind 133 %loadE = load i32, ptr %arrayidxE, align 4 134 135 %convergentD = call i32 @llvm.convergent(i32 %loadD) 136 %mulC = mul i32 %convergentD, %loadE 137 138 %arrayidxC = getelementptr inbounds i32, ptr %c, i64 %ind 139 store i32 %mulC, ptr %arrayidxC, align 4 140 141 %exitcond = icmp eq i64 %add, 20 142 br i1 %exitcond, label %for.end, label %for.body 143 144for.end: ; preds = %for.body 145 ret void 146} 147 148attributes #0 = { nounwind readnone convergent } 149