1; RUN: opt -passes=loop-simplify,loop-distribute -enable-loop-distribute -S < %s 2>&1 \ 2; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=NO_REMARKS 3; RUN: opt -passes=loop-simplify,loop-distribute -enable-loop-distribute -S \ 4; RUN: -pass-remarks-missed=loop-distribute < %s 2>&1 \ 5; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=MISSED_REMARKS 6; RUN: opt -passes=loop-simplify,loop-distribute -enable-loop-distribute -S \ 7; RUN: -pass-remarks-analysis=loop-distribute < %s 2>&1 \ 8; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=ANALYSIS_REMARKS 9; RUN: opt -passes=loop-simplify,loop-distribute -enable-loop-distribute -S \ 10; RUN: -pass-remarks=loop-distribute < %s 2>&1 \ 11; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=REMARKS 12 13; This is the input program: 14; 15; 1 void forced (char *A, char *B, char *C, int N) { 16; 2 #pragma clang loop distribute(enable) 17; 3 for(int i = 0; i < N; i++) { 18; 4 A[i] = B[i] * C[i]; 19; 5 } 20; 6 } 21; 7 22; 8 void not_forced (char *A, char *B, char *C, int N) { 23; 9 for(int i = 0; i < N; i++) { 24; 10 A[i] = B[i] * C[i]; 25; 11 } 26; 12 } 27; 13 28; 14 void success (char *A, char *B, char *C, char *D, char *E, int N) { 29; 15 for(int i = 0; i < N; i++) { 30; 16 A[i + 1] = A[i] + B[i]; 31; 17 C[i] = D[i] * E[i]; 32; 18 } 33; 19 } 34 35; MISSED_REMARKS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info 36; ALWAYS: remark: /tmp/t.c:3:3: loop not distributed: memory operations are safe for vectorization 37; ALWAYS: warning: /tmp/t.c:3:3: loop not distributed: failed explicitly specified loop distribution 38 39define void @forced(ptr %A, ptr %B, ptr %C, i32 %N) !dbg !7 { 40entry: 41 %cmp12 = icmp sgt i32 %N, 0, !dbg !9 42 br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !10 43 44ph: 45 br label %for.body 46 47for.body: 48 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ] 49 %arrayidx = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !12 50 %0 = load i8, ptr %arrayidx, align 1, !dbg !12, !tbaa !13 51 %arrayidx2 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !16 52 %1 = load i8, ptr %arrayidx2, align 1, !dbg !16, !tbaa !13 53 %mul = mul i8 %1, %0, !dbg !17 54 %arrayidx6 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !18 55 store i8 %mul, ptr %arrayidx6, align 1, !dbg !19, !tbaa !13 56 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10 57 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10 58 %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10 59 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !20 60 61for.cond.cleanup: 62 ret void, !dbg !11 63} 64 65; NO_REMARKS-NOT: remark: /tmp/t.c:9:3: loop not distributed: memory operations are safe for vectorization 66; MISSED_REMARKS: remark: /tmp/t.c:9:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info 67; ANALYSIS_REMARKS: remark: /tmp/t.c:9:3: loop not distributed: memory operations are safe for vectorization 68; ALWAYS-NOT: warning: /tmp/t.c:9:3: loop not distributed: failed explicitly specified loop distribution 69 70define void @not_forced(ptr %A, ptr %B, ptr %C, i32 %N) !dbg !22 { 71entry: 72 %cmp12 = icmp sgt i32 %N, 0, !dbg !23 73 br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !24 74 75ph: 76 br label %for.body 77 78for.body: 79 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ] 80 %arrayidx = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !26 81 %0 = load i8, ptr %arrayidx, align 1, !dbg !26, !tbaa !13 82 %arrayidx2 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !27 83 %1 = load i8, ptr %arrayidx2, align 1, !dbg !27, !tbaa !13 84 %mul = mul i8 %1, %0, !dbg !28 85 %arrayidx6 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !29 86 store i8 %mul, ptr %arrayidx6, align 1, !dbg !30, !tbaa !13 87 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !24 88 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !24 89 %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !24 90 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !24 91 92for.cond.cleanup: 93 ret void, !dbg !25 94} 95 96; REMARKS: remark: /tmp/t.c:15:3: distributed loop 97 98define void @success(ptr %A, ptr %B, ptr %C, ptr %D, ptr %E, i32 %N) !dbg !31 { 99entry: 100 %cmp28 = icmp sgt i32 %N, 0, !dbg !32 101 br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !33 102 103ph: 104 br label %for.body 105 106for.body: 107 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ] 108 %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !35 109 %0 = load i8, ptr %arrayidx, align 1, !dbg !35, !tbaa !13 110 %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !36 111 %1 = load i8, ptr %arrayidx2, align 1, !dbg !36, !tbaa !13 112 %add = add i8 %1, %0, !dbg !37 113 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !33 114 %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !38 115 store i8 %add, ptr %arrayidx7, align 1, !dbg !39, !tbaa !13 116 %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !40 117 %2 = load i8, ptr %arrayidx9, align 1, !dbg !40, !tbaa !13 118 %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !41 119 %3 = load i8, ptr %arrayidx12, align 1, !dbg !41, !tbaa !13 120 %mul = mul i8 %3, %2, !dbg !42 121 %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !43 122 store i8 %mul, ptr %arrayidx16, align 1, !dbg !44, !tbaa !13 123 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !33 124 %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !33 125 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !33 126 127for.cond.cleanup: 128 ret void, !dbg !34 129} 130 131; MISSED_REMARKS: /tmp/t.c:27:5: loop not distributed: use -Rpass-analysis=loop-distribute for more info 132; ANALYSIS_REMARKS: /tmp/t.c:27:5: loop not distributed: may not insert runtime check with convergent operation 133; ALWAYS: warning: /tmp/t.c:27:5: loop not distributed: failed explicitly specified loop distribution 134define void @convergent(ptr %A, ptr %B, ptr %C, ptr %D, ptr %E, i32 %N) #1 !dbg !45 { 135entry: 136 %cmp28 = icmp sgt i32 %N, 0, !dbg !46 137 br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !47 138 139ph: 140 br label %for.body 141 142for.body: 143 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ] 144 %arrayidx = getelementptr inbounds i8, ptr %A, i64 %indvars.iv, !dbg !49 145 %0 = load i8, ptr %arrayidx, align 1, !dbg !49, !tbaa !13 146 %arrayidx2 = getelementptr inbounds i8, ptr %B, i64 %indvars.iv, !dbg !50 147 %1 = load i8, ptr %arrayidx2, align 1, !dbg !50, !tbaa !13 148 %add = add i8 %1, %0, !dbg !51 149 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !57 150 %arrayidx7 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv.next, !dbg !52 151 store i8 %add, ptr %arrayidx7, align 1, !dbg !53, !tbaa !13 152 %arrayidx9 = getelementptr inbounds i8, ptr %D, i64 %indvars.iv, !dbg !54 153 %2 = load i8, ptr %arrayidx9, align 1, !dbg !54, !tbaa !13 154 %arrayidx12 = getelementptr inbounds i8, ptr %E, i64 %indvars.iv, !dbg !55 155 %3 = load i8, ptr %arrayidx12, align 1, !dbg !55, !tbaa !13 156 %mul = mul i8 %3, %2, !dbg !56 157 %arrayidx16 = getelementptr inbounds i8, ptr %C, i64 %indvars.iv, !dbg !57 158 store i8 %mul, ptr %arrayidx16, align 1, !dbg !58, !tbaa !13 159 call void @llvm.convergent() 160 %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !57 161 %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !57 162 br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !20, !dbg !57 163 164for.cond.cleanup: 165 ret void, !dbg !58 166} 167 168 169declare void @llvm.convergent() #0 170 171attributes #0 = { nounwind readnone convergent } 172attributes #1 = { nounwind convergent } 173 174 175!llvm.dbg.cu = !{!0} 176!llvm.module.flags = !{!3, !4} 177 178!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 267633) (llvm/trunk 267675)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) 179!1 = !DIFile(filename: "/tmp/t.c", directory: "/tmp") 180!2 = !{} 181!3 = !{i32 2, !"Dwarf Version", i32 2} 182!4 = !{i32 2, !"Debug Info Version", i32 3} 183!7 = distinct !DISubprogram(name: "forced", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) 184!8 = !DISubroutineType(types: !2) 185!9 = !DILocation(line: 3, column: 20, scope: !7) 186!10 = !DILocation(line: 3, column: 3, scope: !7) 187!11 = !DILocation(line: 6, column: 1, scope: !7) 188!12 = !DILocation(line: 4, column: 12, scope: !7) 189!13 = !{!14, !14, i64 0} 190!14 = !{!"omnipotent char", !15, i64 0} 191!15 = !{!"Simple C/C++ TBAA"} 192!16 = !DILocation(line: 4, column: 19, scope: !7) 193!17 = !DILocation(line: 4, column: 17, scope: !7) 194!18 = !DILocation(line: 4, column: 5, scope: !7) 195!19 = !DILocation(line: 4, column: 10, scope: !7) 196!20 = distinct !{!20, !21} 197!21 = !{!"llvm.loop.distribute.enable", i1 true} 198!22 = distinct !DISubprogram(name: "not_forced", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) 199!23 = !DILocation(line: 9, column: 20, scope: !22) 200!24 = !DILocation(line: 9, column: 3, scope: !22) 201!25 = !DILocation(line: 12, column: 1, scope: !22) 202!26 = !DILocation(line: 10, column: 12, scope: !22) 203!27 = !DILocation(line: 10, column: 19, scope: !22) 204!28 = !DILocation(line: 10, column: 17, scope: !22) 205!29 = !DILocation(line: 10, column: 5, scope: !22) 206!30 = !DILocation(line: 10, column: 10, scope: !22) 207!31 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 14, type: !8, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) 208!32 = !DILocation(line: 15, column: 20, scope: !31) 209!33 = !DILocation(line: 15, column: 3, scope: !31) 210!34 = !DILocation(line: 19, column: 1, scope: !31) 211!35 = !DILocation(line: 16, column: 16, scope: !31) 212!36 = !DILocation(line: 16, column: 23, scope: !31) 213!37 = !DILocation(line: 16, column: 21, scope: !31) 214!38 = !DILocation(line: 16, column: 5, scope: !31) 215!39 = !DILocation(line: 16, column: 14, scope: !31) 216!40 = !DILocation(line: 17, column: 12, scope: !31) 217!41 = !DILocation(line: 17, column: 19, scope: !31) 218!42 = !DILocation(line: 17, column: 17, scope: !31) 219!43 = !DILocation(line: 17, column: 5, scope: !31) 220!44 = !DILocation(line: 17, column: 10, scope: !31) 221!45 = distinct !DISubprogram(name: "convergent", scope: !1, file: !1, line: 24, type: !8, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2) 222!46 = !DILocation(line: 25, column: 20, scope: !45) 223!47 = !DILocation(line: 25, column: 3, scope: !45) 224!48 = !DILocation(line: 29, column: 1, scope: !45) 225!49 = !DILocation(line: 26, column: 16, scope: !45) 226!50 = !DILocation(line: 26, column: 23, scope: !45) 227!51 = !DILocation(line: 26, column: 21, scope: !45) 228!52 = !DILocation(line: 26, column: 5, scope: !45) 229!53 = !DILocation(line: 26, column: 14, scope: !45) 230!54 = !DILocation(line: 27, column: 12, scope: !45) 231!55 = !DILocation(line: 27, column: 19, scope: !45) 232!56 = !DILocation(line: 27, column: 17, scope: !45) 233!57 = !DILocation(line: 27, column: 5, scope: !45) 234!58 = !DILocation(line: 27, column: 10, scope: !45) 235