1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s 3 4define void @ifconvertstore(ptr %A, i32 %B, i32 %C, i32 %D) { 5; CHECK-LABEL: @ifconvertstore( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: store i32 [[B:%.*]], ptr [[A:%.*]], align 4 8; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42 9; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[C:%.*]], i32 [[B]], !prof [[PROF0:![0-9]+]] 10; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[A]], align 4 11; CHECK-NEXT: ret void 12; 13entry: 14; First store to the location. 15 store i32 %B, ptr %A 16 %cmp = icmp sgt i32 %D, 42 17 br i1 %cmp, label %if.then, label %ret.end, !prof !0 18 19; Make sure we speculate stores like the following one. It is cheap compared to 20; a mispredicated branch. 21if.then: 22 store i32 %C, ptr %A 23 br label %ret.end 24 25ret.end: 26 ret void 27} 28 29; Store to a different location. 30 31define void @noifconvertstore1(ptr %A1, ptr %A2, i32 %B, i32 %C, i32 %D) { 32; CHECK-LABEL: @noifconvertstore1( 33; CHECK-NEXT: entry: 34; CHECK-NEXT: store i32 [[B:%.*]], ptr [[A1:%.*]], align 4 35; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42 36; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] 37; CHECK: if.then: 38; CHECK-NEXT: store i32 [[C:%.*]], ptr [[A2:%.*]], align 4 39; CHECK-NEXT: br label [[RET_END]] 40; CHECK: ret.end: 41; CHECK-NEXT: ret void 42; 43entry: 44 store i32 %B, ptr %A1 45 %cmp = icmp sgt i32 %D, 42 46 br i1 %cmp, label %if.then, label %ret.end 47 48if.then: 49 store i32 %C, ptr %A2 50 br label %ret.end 51 52ret.end: 53 ret void 54} 55 56; This function could store to our address, so we can't repeat the first store a second time. 57declare void @unknown_fun() 58 59define void @noifconvertstore2(ptr %A, i32 %B, i32 %C, i32 %D) { 60; CHECK-LABEL: @noifconvertstore2( 61; CHECK-NEXT: entry: 62; CHECK-NEXT: store i32 [[B:%.*]], ptr [[A:%.*]], align 4 63; CHECK-NEXT: call void @unknown_fun() 64; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42 65; CHECK-NEXT: br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] 66; CHECK: if.then: 67; CHECK-NEXT: store i32 [[C:%.*]], ptr [[A]], align 4 68; CHECK-NEXT: br label [[RET_END]] 69; CHECK: ret.end: 70; CHECK-NEXT: ret void 71; 72entry: 73; First store to the location. 74 store i32 %B, ptr %A 75 call void @unknown_fun() 76 %cmp6 = icmp sgt i32 %D, 42 77 br i1 %cmp6, label %if.then, label %ret.end 78 79if.then: 80 store i32 %C, ptr %A 81 br label %ret.end 82 83ret.end: 84 ret void 85} 86 87; Make sure we don't speculate volatile stores. 88 89define void @noifconvertstore_volatile(ptr %A, i32 %B, i32 %C, i32 %D) { 90; CHECK-LABEL: @noifconvertstore_volatile( 91; CHECK-NEXT: entry: 92; CHECK-NEXT: store i32 [[B:%.*]], ptr [[A:%.*]], align 4 93; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[D:%.*]], 42 94; CHECK-NEXT: br i1 [[CMP6]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] 95; CHECK: if.then: 96; CHECK-NEXT: store volatile i32 [[C:%.*]], ptr [[A]], align 4 97; CHECK-NEXT: br label [[RET_END]] 98; CHECK: ret.end: 99; CHECK-NEXT: ret void 100; 101entry: 102; First store to the location. 103 store i32 %B, ptr %A 104 %cmp6 = icmp sgt i32 %D, 42 105 br i1 %cmp6, label %if.then, label %ret.end 106 107if.then: 108 store volatile i32 %C, ptr %A 109 br label %ret.end 110 111ret.end: 112 ret void 113} 114 115 116;; Speculate a store, preceded by a local, non-escaping load 117define i32 @load_before_store_noescape(i64 %i, i32 %b) { 118; CHECK-LABEL: @load_before_store_noescape( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8 121; CHECK-NEXT: store i64 4294967296, ptr [[A]], align 8 122; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] 123; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 124; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]] 125; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[TMP0]] 126; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4 127; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 128; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 129; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 130; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] 131; CHECK-NEXT: ret i32 [[ADD]] 132; 133entry: 134 %a = alloca [2 x i32], align 8 135 store i64 4294967296, ptr %a, align 8 136 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i 137 %0 = load i32, ptr %arrayidx, align 4 138 %cmp = icmp slt i32 %0, %b 139 br i1 %cmp, label %if.then, label %if.end 140 141if.then: 142 store i32 %b, ptr %arrayidx, align 4 143 br label %if.end 144 145if.end: 146 %1 = load i32, ptr %a, align 4 147 %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 148 %2 = load i32, ptr %arrayidx2, align 4 149 %add = add nsw i32 %1, %2 150 ret i32 %add 151} 152 153;; Don't speculate a store, preceded by a local, escaping load 154define i32 @load_before_store_escape(i64 %i, i32 %b) { 155; CHECK-LABEL: @load_before_store_escape( 156; CHECK-NEXT: entry: 157; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8 158; CHECK-NEXT: store i64 4294967296, ptr [[A]], align 8 159; CHECK-NEXT: call void @fork_some_threads(ptr [[A]]) 160; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] 161; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 162; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]] 163; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 164; CHECK: if.then: 165; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 166; CHECK-NEXT: br label [[IF_END]] 167; CHECK: if.end: 168; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 169; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 170; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 171; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] 172; CHECK-NEXT: call void @join_some_threads() 173; CHECK-NEXT: ret i32 [[ADD]] 174; 175entry: 176 %a = alloca [2 x i32], align 8 177 store i64 4294967296, ptr %a, align 8 178 call void @fork_some_threads(ptr %a) 179 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i 180 %0 = load i32, ptr %arrayidx, align 4 181 %cmp = icmp slt i32 %0, %b 182 br i1 %cmp, label %if.then, label %if.end 183 184if.then: 185 store i32 %b, ptr %arrayidx, align 4 186 br label %if.end 187 188if.end: 189 %1 = load i32, ptr %a, align 4 190 %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 191 %2 = load i32, ptr %arrayidx2, align 4 192 %add = add nsw i32 %1, %2 193 call void @join_some_threads() 194 ret i32 %add 195} 196 197define i64 @load_before_store_noescape_byval(ptr byval([2 x i32]) %a, i64 %i, i32 %b) { 198; CHECK-LABEL: @load_before_store_noescape_byval( 199; CHECK-NEXT: entry: 200; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 201; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] 202; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 203; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] 204; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]] 205; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4 206; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 207; CHECK-NEXT: ret i64 [[V2]] 208; 209entry: 210 store i64 -1, ptr %a, align 8 211 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i 212 %v = load i32, ptr %arrayidx, align 4 213 %cmp = icmp slt i32 %v, %b 214 br i1 %cmp, label %if.then, label %if.end 215 216if.then: 217 store i32 %b, ptr %arrayidx, align 4 218 br label %if.end 219 220if.end: 221 %v2 = load i64, ptr %a, align 8 222 ret i64 %v2 223} 224 225declare noalias ptr @malloc(i64 %size) 226 227define i64 @load_before_store_noescape_malloc(i64 %i, i32 %b) { 228; CHECK-LABEL: @load_before_store_noescape_malloc( 229; CHECK-NEXT: entry: 230; CHECK-NEXT: [[A:%.*]] = call ptr @malloc(i64 8) 231; CHECK-NEXT: store i64 -1, ptr [[A]], align 8 232; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] 233; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 234; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] 235; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]] 236; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4 237; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 238; CHECK-NEXT: ret i64 [[V2]] 239; 240entry: 241 %a = call ptr @malloc(i64 8) 242 store i64 -1, ptr %a, align 8 243 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i 244 %v = load i32, ptr %arrayidx, align 4 245 %cmp = icmp slt i32 %v, %b 246 br i1 %cmp, label %if.then, label %if.end 247 248if.then: 249 store i32 %b, ptr %arrayidx, align 4 250 br label %if.end 251 252if.end: 253 %v2 = load i64, ptr %a, align 8 254 ret i64 %v2 255} 256 257define i64 @load_before_store_noescape_writable(ptr noalias writable dereferenceable(8) %a, i64 %i, i32 %b) { 258; CHECK-LABEL: @load_before_store_noescape_writable( 259; CHECK-NEXT: entry: 260; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 261; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 262; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 263; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] 264; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[B]], i32 [[V]] 265; CHECK-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[ARRAYIDX]], align 4 266; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 267; CHECK-NEXT: ret i64 [[V2]] 268; 269entry: 270 store i64 -1, ptr %a, align 8 271 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 272 %v = load i32, ptr %arrayidx, align 4 273 %cmp = icmp slt i32 %v, %b 274 br i1 %cmp, label %if.then, label %if.end 275 276if.then: 277 store i32 %b, ptr %arrayidx, align 4 278 br label %if.end 279 280if.end: 281 %v2 = load i64, ptr %a, align 8 282 ret i64 %v2 283} 284 285define i64 @load_before_store_noescape_writable_missing_noalias(ptr writable dereferenceable(8) %a, i64 %i, i32 %b) { 286; CHECK-LABEL: @load_before_store_noescape_writable_missing_noalias( 287; CHECK-NEXT: entry: 288; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 289; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 290; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 291; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] 292; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 293; CHECK: if.then: 294; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 295; CHECK-NEXT: br label [[IF_END]] 296; CHECK: if.end: 297; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 298; CHECK-NEXT: ret i64 [[V2]] 299; 300entry: 301 store i64 -1, ptr %a, align 8 302 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 303 %v = load i32, ptr %arrayidx, align 4 304 %cmp = icmp slt i32 %v, %b 305 br i1 %cmp, label %if.then, label %if.end 306 307if.then: 308 store i32 %b, ptr %arrayidx, align 4 309 br label %if.end 310 311if.end: 312 %v2 = load i64, ptr %a, align 8 313 ret i64 %v2 314} 315 316define i64 @load_before_store_noescape_writable_missing_derefable(ptr noalias writable %a, i64 %i, i32 %b) { 317; CHECK-LABEL: @load_before_store_noescape_writable_missing_derefable( 318; CHECK-NEXT: entry: 319; CHECK-NEXT: store i64 -1, ptr [[A:%.*]], align 8 320; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 321; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 322; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[V]], [[B:%.*]] 323; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 324; CHECK: if.then: 325; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 326; CHECK-NEXT: br label [[IF_END]] 327; CHECK: if.end: 328; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A]], align 8 329; CHECK-NEXT: ret i64 [[V2]] 330; 331entry: 332 store i64 -1, ptr %a, align 8 333 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 334 %v = load i32, ptr %arrayidx, align 4 335 %cmp = icmp slt i32 %v, %b 336 br i1 %cmp, label %if.then, label %if.end 337 338if.then: 339 store i32 %b, ptr %arrayidx, align 4 340 br label %if.end 341 342if.end: 343 %v2 = load i64, ptr %a, align 8 344 ret i64 %v2 345} 346 347declare void @fork_some_threads(ptr); 348declare void @join_some_threads(); 349 350; Don't speculate if it's not the only instruction in the block (not counting 351; the terminator) 352define i32 @not_alone_in_block(i64 %i, i32 %b) { 353; CHECK-LABEL: @not_alone_in_block( 354; CHECK-NEXT: entry: 355; CHECK-NEXT: [[A:%.*]] = alloca [2 x i32], align 8 356; CHECK-NEXT: store i64 4294967296, ptr [[A]], align 8 357; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 [[I:%.*]] 358; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 359; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[B:%.*]] 360; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 361; CHECK: if.then: 362; CHECK-NEXT: store i32 [[B]], ptr [[ARRAYIDX]], align 4 363; CHECK-NEXT: store i32 [[B]], ptr [[A]], align 4 364; CHECK-NEXT: br label [[IF_END]] 365; CHECK: if.end: 366; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 367; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i64 0, i64 1 368; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 369; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] 370; CHECK-NEXT: ret i32 [[ADD]] 371; 372entry: 373 %a = alloca [2 x i32], align 8 374 store i64 4294967296, ptr %a, align 8 375 %arrayidx = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 %i 376 %0 = load i32, ptr %arrayidx, align 4 377 %cmp = icmp slt i32 %0, %b 378 br i1 %cmp, label %if.then, label %if.end 379 380if.then: 381 store i32 %b, ptr %arrayidx, align 4 382 store i32 %b, ptr %a, align 4 383 br label %if.end 384 385if.end: 386 %1 = load i32, ptr %a, align 4 387 %arrayidx2 = getelementptr inbounds [2 x i32], ptr %a, i64 0, i64 1 388 %2 = load i32, ptr %arrayidx2, align 4 389 %add = add nsw i32 %1, %2 390 ret i32 %add 391} 392 393define void @wrong_align_store(ptr %A, i32 %B, i32 %C, i32 %D) { 394; CHECK-LABEL: @wrong_align_store( 395; CHECK-NEXT: entry: 396; CHECK-NEXT: store i32 [[B:%.*]], ptr [[A:%.*]], align 4 397; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42 398; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] 399; CHECK: if.then: 400; CHECK-NEXT: store i32 [[C:%.*]], ptr [[A]], align 8 401; CHECK-NEXT: br label [[RET_END]] 402; CHECK: ret.end: 403; CHECK-NEXT: ret void 404; 405entry: 406 store i32 %B, ptr %A, align 4 407 %cmp = icmp sgt i32 %D, 42 408 br i1 %cmp, label %if.then, label %ret.end 409 410if.then: 411 store i32 %C, ptr %A, align 8 412 br label %ret.end 413 414ret.end: 415 ret void 416} 417 418define void @wrong_align_load(i32 %C, i32 %D) { 419; CHECK-LABEL: @wrong_align_load( 420; CHECK-NEXT: entry: 421; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 422; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 423; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[D:%.*]], 42 424; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RET_END:%.*]] 425; CHECK: if.then: 426; CHECK-NEXT: store i32 [[C:%.*]], ptr [[A]], align 8 427; CHECK-NEXT: br label [[RET_END]] 428; CHECK: ret.end: 429; CHECK-NEXT: ret void 430; 431entry: 432 %A = alloca i32, align 4 433 load i32, ptr %A, align 4 434 %cmp = icmp sgt i32 %D, 42 435 br i1 %cmp, label %if.then, label %ret.end 436 437if.then: 438 store i32 %C, ptr %A, align 8 439 br label %ret.end 440 441ret.end: 442 ret void 443} 444 445; CHECK: !0 = !{!"branch_weights", i32 3, i32 5} 446!0 = !{!"branch_weights", i32 3, i32 5} 447 448