1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=mergeicmps -verify-dom-info -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 3 4%S = type { i32, i32, i32, i32 } 5 6declare void @foo(...) 7 8; We can split %entry and create a memcmp(16 bytes). 9define zeroext i1 @opeq1( 10; X86-LABEL: @opeq1( 11; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3": 12; X86-NEXT: call void (...) @foo() #[[ATTR2:[0-9]+]] 13; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A:%.*]], ptr [[B:%.*]], i64 16) 14; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 15; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] 16; X86: opeq1.exit: 17; X86-NEXT: ret i1 [[TMP2]] 18; 19; Make sure this call is moved to the beginning of the entry block. 20 ptr nocapture readonly dereferenceable(16) %a, 21 ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync { 22entry: 23 %0 = load i32, ptr %a, align 4 24 %1 = load i32, ptr %b, align 4 25 ; Does other work. 26 call void (...) @foo() inaccessiblememonly 27 %cmp.i = icmp eq i32 %0, %1 28 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 29 30land.rhs.i: 31 %second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1 32 %2 = load i32, ptr %second.i, align 4 33 %second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 1 34 %3 = load i32, ptr %second2.i, align 4 35 %cmp2.i = icmp eq i32 %2, %3 36 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 37 38land.rhs.i.2: 39 %third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2 40 %4 = load i32, ptr %third.i, align 4 41 %third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2 42 %5 = load i32, ptr %third2.i, align 4 43 %cmp3.i = icmp eq i32 %4, %5 44 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 45 46land.rhs.i.3: 47 %fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3 48 %6 = load i32, ptr %fourth.i, align 4 49 %fourth2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3 50 %7 = load i32, ptr %fourth2.i, align 4 51 %cmp4.i = icmp eq i32 %6, %7 52 br label %opeq1.exit 53 54opeq1.exit: 55 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 56 ret i1 %8 57} 58 59 60; We will not be able to merge anything, make sure the call is not moved out. 61define zeroext i1 @opeq1_discontiguous( 62; X86-LABEL: @opeq1_discontiguous( 63; X86-NEXT: entry: 64; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], ptr [[A:%.*]], i64 0, i32 1 65; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[FIRST_I]], align 4 66; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 67; X86-NEXT: call void (...) @foo() #[[ATTR2]] 68; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] 69; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] 70; X86: land.rhs.i: 71; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2 72; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[SECOND_I]], align 4 73; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 1 74; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[SECOND2_I]], align 4 75; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] 76; X86-NEXT: br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]] 77; X86: land.rhs.i.2: 78; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2 79; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[THIRD_I]], align 4 80; X86-NEXT: [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3 81; X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[THIRD2_I]], align 4 82; X86-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] 83; X86-NEXT: br i1 [[CMP3_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT]] 84; X86: land.rhs.i.3: 85; X86-NEXT: [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 1 86; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[FOURTH_I]], align 4 87; X86-NEXT: [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3 88; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[FOURTH2_I]], align 4 89; X86-NEXT: [[CMP4_I:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]] 90; X86-NEXT: br label [[OPEQ1_EXIT]] 91; X86: opeq1.exit: 92; X86-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ false, [[LAND_RHS_I_2]] ], [ [[CMP4_I]], [[LAND_RHS_I_3]] ] 93; X86-NEXT: ret i1 [[TMP8]] 94; 95; Make sure this call is moved in the entry block. 96 ptr nocapture readonly dereferenceable(16) %a, 97 ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { 98entry: 99 %first.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1 100 %0 = load i32, ptr %first.i, align 4 101 %1 = load i32, ptr %b, align 4 102 ; Does other work. 103 call void (...) @foo() inaccessiblememonly 104 %cmp.i = icmp eq i32 %0, %1 105 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 106 107land.rhs.i: 108 %second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2 109 %2 = load i32, ptr %second.i, align 4 110 %second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 1 111 %3 = load i32, ptr %second2.i, align 4 112 %cmp2.i = icmp eq i32 %2, %3 113 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 114 115land.rhs.i.2: 116 %third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2 117 %4 = load i32, ptr %third.i, align 4 118 %third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3 119 %5 = load i32, ptr %third2.i, align 4 120 %cmp3.i = icmp eq i32 %4, %5 121 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 122 123land.rhs.i.3: 124 %fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1 125 %6 = load i32, ptr %fourth.i, align 4 126 %fourth2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3 127 %7 = load i32, ptr %fourth2.i, align 4 128 %cmp4.i = icmp eq i32 %6, %7 129 br label %opeq1.exit 130 131opeq1.exit: 132 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 133 ret i1 %8 134} 135 136; The call happens before the loads, so it cannot clobber them. 137define zeroext i1 @opeq1_call_before_loads( 138; X86-LABEL: @opeq1_call_before_loads( 139; X86-NEXT: "entry+land.rhs.i+land.rhs.i.2+land.rhs.i.3": 140; X86-NEXT: call void (...) @foo() 141; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A:%.*]], ptr [[B:%.*]], i64 16) 142; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 143; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] 144; X86: opeq1.exit: 145; X86-NEXT: ret i1 [[TMP2]] 146; 147; Make sure this call is moved to the beginning of the entry block. 148 ptr nocapture readonly dereferenceable(16) %a, 149 ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync { 150entry: 151 call void (...) @foo() 152 %0 = load i32, ptr %a, align 4 153 %1 = load i32, ptr %b, align 4 154 %cmp.i = icmp eq i32 %0, %1 155 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 156 157land.rhs.i: 158 %second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1 159 %2 = load i32, ptr %second.i, align 4 160 %second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 1 161 %3 = load i32, ptr %second2.i, align 4 162 %cmp2.i = icmp eq i32 %2, %3 163 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 164 165land.rhs.i.2: 166 %third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2 167 %4 = load i32, ptr %third.i, align 4 168 %third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2 169 %5 = load i32, ptr %third2.i, align 4 170 %cmp3.i = icmp eq i32 %4, %5 171 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 172 173land.rhs.i.3: 174 %fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3 175 %6 = load i32, ptr %fourth.i, align 4 176 %fourth2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3 177 %7 = load i32, ptr %fourth2.i, align 4 178 %cmp4.i = icmp eq i32 %6, %7 179 br label %opeq1.exit 180 181opeq1.exit: 182 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 183 ret i1 %8 184} 185 186; Call happens after the loads, and may clobber them. 187define zeroext i1 @opeq1_call_after_loads( 188; X86-LABEL: @opeq1_call_after_loads( 189; X86-NEXT: entry: 190; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4 191; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 192; X86-NEXT: call void (...) @foo() 193; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] 194; X86-NEXT: br i1 [[CMP_I]], label %"land.rhs.i+land.rhs.i.2+land.rhs.i.3", label [[OPEQ1_EXIT:%.*]] 195; X86: "land.rhs.i+land.rhs.i.2+land.rhs.i.3": 196; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 1 197; X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 1 198; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[TMP2]], ptr [[TMP3]], i64 12) 199; X86-NEXT: [[TMP4:%.*]] = icmp eq i32 [[MEMCMP]], 0 200; X86-NEXT: br label [[OPEQ1_EXIT]] 201; X86: opeq1.exit: 202; X86-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TMP4]], %"land.rhs.i+land.rhs.i.2+land.rhs.i.3" ] 203; X86-NEXT: ret i1 [[TMP5]] 204; 205; Make sure this call is moved to the beginning of the entry block. 206 ptr nocapture readonly dereferenceable(16) %a, 207 ptr nocapture readonly dereferenceable(16) %b) local_unnamed_addr nofree nosync { 208entry: 209 %0 = load i32, ptr %a, align 4 210 %1 = load i32, ptr %b, align 4 211 call void (...) @foo() 212 %cmp.i = icmp eq i32 %0, %1 213 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit 214 215land.rhs.i: 216 %second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 1 217 %2 = load i32, ptr %second.i, align 4 218 %second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 1 219 %3 = load i32, ptr %second2.i, align 4 220 %cmp2.i = icmp eq i32 %2, %3 221 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit 222 223land.rhs.i.2: 224 %third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2 225 %4 = load i32, ptr %third.i, align 4 226 %third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2 227 %5 = load i32, ptr %third2.i, align 4 228 %cmp3.i = icmp eq i32 %4, %5 229 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit 230 231land.rhs.i.3: 232 %fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3 233 %6 = load i32, ptr %fourth.i, align 4 234 %fourth2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3 235 %7 = load i32, ptr %fourth2.i, align 4 236 %cmp4.i = icmp eq i32 %6, %7 237 br label %opeq1.exit 238 239opeq1.exit: 240 %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] 241 ret i1 %8 242} 243