1; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 --pgo-memop-optimize-memcmp-bcmp -S | FileCheck %s --check-prefix=MEMOP_OPT 2; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT 3; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML 4; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML 5 6 7target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 8target triple = "x86_64-unknown-linux-gnu" 9 10define void @foo(ptr %dst, ptr %src, ptr %dst2, ptr %src2, ptr %a, i32 %n) !prof !27 { 11entry: 12 br label %for.cond 13 14for.cond: 15 %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] 16 %cmp = icmp slt i32 %i.0, %n 17 br i1 %cmp, label %for.body, label %for.end6, !prof !28 18 19for.body: 20 br label %for.cond1 21 22for.cond1: 23 %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] 24 %idx.ext = sext i32 %i.0 to i64 25 %add.ptr = getelementptr inbounds i32, ptr %a, i64 %idx.ext 26 %0 = load i32, ptr %add.ptr, align 4 27 %cmp2 = icmp slt i32 %j.0, %0 28 br i1 %cmp2, label %for.body3, label %for.end, !prof !29 29 30for.body3: 31 %add = add nsw i32 %i.0, 1 32 %conv = sext i32 %add to i64 33 call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %conv, i1 false), !prof !30 34 call void @llvm.memcpy.p0.p0.i64(ptr %dst2, ptr %src2, i64 %conv, i1 false), !prof !31 35 br label %for.inc 36 37; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL:.*]] [ 38; MEMOP_OPT: i64 0, label %[[CASE_1_LABEL:.*]] 39; MEMOP_OPT: ], !prof [[SWITCH_BW:![0-9]+]] 40; MEMOP_OPT: [[CASE_1_LABEL]]: 41; MEMOP_OPT: call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 0, i1 false) 42; MEMOP_OPT: br label %[[MERGE_LABEL:.*]] 43; MEMOP_OPT: [[DEFAULT_LABEL]]: 44; MEMOP_OPT: call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %src, i64 %conv, i1 false), !prof [[NEWVP:![0-9]+]] 45; MEMOP_OPT: br label %[[MERGE_LABEL]] 46; MEMOP_OPT: [[MERGE_LABEL]]: 47; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL2:.*]] [ 48; MEMOP_OPT: i64 0, label %[[CASE_1_LABEL2:.*]] 49; MEMOP_OPT: ], !prof [[SWITCH_BW:![0-9]+]] 50; MEMOP_OPT: [[CASE_1_LABEL2]]: 51; MEMOP_OPT: call void @llvm.memcpy.p0.p0.i64(ptr %dst2, ptr %src2, i64 0, i1 false) 52; MEMOP_OPT: br label %[[MERGE_LABEL2:.*]] 53; MEMOP_OPT: [[DEFAULT_LABEL2]]: 54; MEMOP_OPT: call void @llvm.memcpy.p0.p0.i64(ptr %dst2, ptr %src2, i64 %conv, i1 false), !prof [[NEWVP]] 55; MEMOP_OPT: br label %[[MERGE_LABEL2]] 56; MEMOP_OPT: [[MERGE_LABEL2]]: 57; MEMOP_OPT: br label %for.inc 58 59for.inc: 60 %inc = add nsw i32 %j.0, 1 61 br label %for.cond1 62 63for.end: 64 br label %for.inc4 65 66for.inc4: 67 %inc5 = add nsw i32 %i.0, 1 68 br label %for.cond 69 70for.end6: 71 ret void 72} 73 74declare void @consume(i32 %v1, i32 %v2) 75 76define void @foo_memcmp_bcmp(ptr %dst, ptr %src, ptr %dst2, ptr %src2, ptr %a, i32 %n) !prof !27 { 77entry: 78 br label %for.cond 79 80for.cond: 81 %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ] 82 %cmp = icmp slt i32 %i.0, %n 83 br i1 %cmp, label %for.body, label %for.end6, !prof !28 84 85for.body: 86 br label %for.cond1 87 88for.cond1: 89 %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] 90 %idx.ext = sext i32 %i.0 to i64 91 %add.ptr = getelementptr inbounds i32, ptr %a, i64 %idx.ext 92 %0 = load i32, ptr %add.ptr, align 4 93 %cmp2 = icmp slt i32 %j.0, %0 94 br i1 %cmp2, label %for.body3, label %for.end, !prof !29 95 96for.body3: 97 %add = add nsw i32 %i.0, 1 98 %conv = sext i32 %add to i64 99 %memcmp = call i32 @memcmp(ptr %dst, ptr %src, i64 %conv), !prof !30 100 %bcmp = call i32 @bcmp(ptr %dst2, ptr %src2, i64 %conv), !prof !31 101 call void @consume(i32 %memcmp, i32 %bcmp) 102 br label %for.inc 103 104; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL:.*]] [ 105; MEMOP_OPT: i64 0, label %[[CASE_1_LABEL:.*]] 106; MEMOP_OPT: ], !prof [[SWITCH_BW:![0-9]+]] 107; MEMOP_OPT: [[CASE_1_LABEL]]: 108; MEMOP_OPT: %[[RV:.*]] = call i32 @memcmp(ptr %dst, ptr %src, i64 0) 109; MEMOP_OPT: br label %[[MERGE_LABEL:.*]] 110; MEMOP_OPT: [[DEFAULT_LABEL]]: 111; MEMOP_OPT: %[[RVD:.*]] = call i32 @memcmp(ptr %dst, ptr %src, i64 %conv), !prof [[NEWVP:![0-9]+]] 112; MEMOP_OPT: br label %[[MERGE_LABEL]] 113; MEMOP_OPT: [[MERGE_LABEL]]: 114; MEMOP_OPT: %[[PHI:.*]] = phi i32 [ %[[RVD]], %[[DEFAULT_LABEL]] ], [ %[[RV]], %[[CASE_1_LABEL]] ] 115; MEMOP_OPT: switch i64 %conv, label %[[DEFAULT_LABEL2:.*]] [ 116; MEMOP_OPT: i64 0, label %[[CASE_1_LABEL2:.*]] 117; MEMOP_OPT: ], !prof [[SWITCH_BW:![0-9]+]] 118; MEMOP_OPT: [[CASE_1_LABEL2]]: 119; MEMOP_OPT: %[[RV2:.*]] = call i32 @bcmp(ptr %dst2, ptr %src2, i64 0) 120; MEMOP_OPT: br label %[[MERGE_LABEL2:.*]] 121; MEMOP_OPT: [[DEFAULT_LABEL2]]: 122; MEMOP_OPT: %[[RVD2:.*]] = call i32 @bcmp(ptr %dst2, ptr %src2, i64 %conv), !prof [[NEWVP]] 123; MEMOP_OPT: br label %[[MERGE_LABEL2]] 124; MEMOP_OPT: [[MERGE_LABEL2]]: 125; MEMOP_OPT: %[[PHI2:.*]] = phi i32 [ %[[RVD2]], %[[DEFAULT_LABEL2]] ], [ %[[RV2]], %[[CASE_1_LABEL2]] ] 126; MEMOP_OPT: call void @consume(i32 %[[PHI]], i32 %[[PHI2]]) 127; MEMOP_OPT: br label %for.inc 128 129for.inc: 130 %inc = add nsw i32 %j.0, 1 131 br label %for.cond1 132 133for.end: 134 br label %for.inc4 135 136for.inc4: 137 %inc5 = add nsw i32 %i.0, 1 138 br label %for.cond 139 140for.end6: 141 ret void 142} 143 144; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99} 145; Should be 457 total left (original total count 556, minus 99 from specialized 146; value 0, which is removed from VP array. This should preserve all unpromoted values. 147; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66, i64 5, i64 55, i64 6, i64 44, i64 7, i64 33, i64 8, i64 22} 148 149!llvm.module.flags = !{!0} 150 151!0 = !{i32 1, !"ProfileSummary", !1} 152!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 153!2 = !{!"ProfileFormat", !"InstrProf"} 154!3 = !{!"TotalCount", i64 579} 155!4 = !{!"MaxCount", i64 556} 156!5 = !{!"MaxInternalCount", i64 20} 157!6 = !{!"MaxFunctionCount", i64 556} 158!7 = !{!"NumCounts", i64 6} 159!8 = !{!"NumFunctions", i64 3} 160!9 = !{!"DetailedSummary", !10} 161!10 = !{!11, !12, !13, !14, !15, !16, !16, !17, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26} 162!11 = !{i32 10000, i64 556, i32 1} 163!12 = !{i32 100000, i64 556, i32 1} 164!13 = !{i32 200000, i64 556, i32 1} 165!14 = !{i32 300000, i64 556, i32 1} 166!15 = !{i32 400000, i64 556, i32 1} 167!16 = !{i32 500000, i64 556, i32 1} 168!17 = !{i32 600000, i64 556, i32 1} 169!18 = !{i32 700000, i64 556, i32 1} 170!19 = !{i32 800000, i64 556, i32 1} 171!20 = !{i32 900000, i64 556, i32 1} 172!21 = !{i32 950000, i64 556, i32 1} 173!22 = !{i32 990000, i64 20, i32 2} 174!23 = !{i32 999000, i64 1, i32 5} 175!24 = !{i32 999900, i64 1, i32 5} 176!25 = !{i32 999990, i64 1, i32 5} 177!26 = !{i32 999999, i64 1, i32 5} 178!27 = !{!"function_entry_count", i64 1} 179!28 = !{!"branch_weights", i32 20, i32 1} 180!29 = !{!"branch_weights", i32 556, i32 20} 181!30 = !{!"VP", i32 1, i64 556, i64 0, i64 99, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66, i64 5, i64 55, i64 6, i64 44, i64 7, i64 33, i64 8, i64 22} 182!31 = !{!"VP", i32 1, i64 556, i64 0, i64 99, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66, i64 5, i64 55, i64 6, i64 44, i64 7, i64 33, i64 8, i64 22} 183 184declare void @llvm.lifetime.start(i64, ptr nocapture) 185 186declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) 187 188declare i32 @memcmp(ptr, ptr, i64) 189declare i32 @bcmp(ptr, ptr, i64) 190 191declare void @llvm.lifetime.end(i64, ptr nocapture) 192 193; YAML: --- !Passed 194; YAML-NEXT: Pass: pgo-memop-opt 195; YAML-NEXT: Name: memopt-opt 196; YAML-NEXT: Function: foo 197; YAML-NEXT: Hotness: 0 198; YAML-NEXT: Args: 199; YAML-NEXT: - String: 'optimized ' 200; YAML-NEXT: - Memop: memcpy 201; YAML-NEXT: - String: ' with count ' 202; YAML-NEXT: - Count: '99' 203; YAML-NEXT: - String: ' out of ' 204; YAML-NEXT: - Total: '556' 205; YAML-NEXT: - String: ' for ' 206; YAML-NEXT: - Versions: '1' 207; YAML-NEXT: - String: ' versions' 208; YAML-NEXT: ... 209; YAML-NEXT: --- !Passed 210; YAML-NEXT: Pass: pgo-memop-opt 211; YAML-NEXT: Name: memopt-opt 212; YAML-NEXT: Function: foo 213; YAML-NEXT: Hotness: 0 214; YAML-NEXT: Args: 215; YAML-NEXT: - String: 'optimized ' 216; YAML-NEXT: - Memop: memcpy 217; YAML-NEXT: - String: ' with count ' 218; YAML-NEXT: - Count: '99' 219; YAML-NEXT: - String: ' out of ' 220; YAML-NEXT: - Total: '556' 221; YAML-NEXT: - String: ' for ' 222; YAML-NEXT: - Versions: '1' 223; YAML-NEXT: - String: ' versions' 224; YAML-NEXT: ... 225; YAML-NEXT: --- !Passed 226; YAML-NEXT: Pass: pgo-memop-opt 227; YAML-NEXT: Name: memopt-opt 228; YAML-NEXT: Function: foo_memcmp_bcmp 229; YAML-NEXT: Hotness: 0 230; YAML-NEXT: Args: 231; YAML-NEXT: - String: 'optimized ' 232; YAML-NEXT: - Memop: memcmp 233; YAML-NEXT: - String: ' with count ' 234; YAML-NEXT: - Count: '99' 235; YAML-NEXT: - String: ' out of ' 236; YAML-NEXT: - Total: '556' 237; YAML-NEXT: - String: ' for ' 238; YAML-NEXT: - Versions: '1' 239; YAML-NEXT: - String: ' versions' 240; YAML-NEXT: ... 241; YAML-NEXT: --- !Passed 242; YAML-NEXT: Pass: pgo-memop-opt 243; YAML-NEXT: Name: memopt-opt 244; YAML-NEXT: Function: foo_memcmp_bcmp 245; YAML-NEXT: Hotness: 0 246; YAML-NEXT: Args: 247; YAML-NEXT: - String: 'optimized ' 248; YAML-NEXT: - Memop: bcmp 249; YAML-NEXT: - String: ' with count ' 250; YAML-NEXT: - Count: '99' 251; YAML-NEXT: - String: ' out of ' 252; YAML-NEXT: - Total: '556' 253; YAML-NEXT: - String: ' for ' 254; YAML-NEXT: - Versions: '1' 255; YAML-NEXT: - String: ' versions' 256; YAML-NEXT: ... 257