1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s 3 4;; Basic case: check masked.load/store is generated for i16/i32/i64. 5define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) { 6; CHECK-LABEL: @basic( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> 9; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) 10; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 11; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison) 12; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 13; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison) 14; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 15; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> 16; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]]) 17; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> 18; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]]) 19; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64> 20; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]]) 21; CHECK-NEXT: ret void 22; 23entry: 24 br i1 %cond, label %if.true, label %if.false 25 26if.false: 27 br label %if.end 28 29if.true: 30 %0 = load i16, ptr %p, align 2 31 %1 = load i32, ptr %q, align 4 32 %2 = load i64, ptr %b, align 8 33 store i16 %0, ptr %b, align 2 34 store i32 %1, ptr %p, align 4 35 store i64 %2, ptr %q, align 8 36 br label %if.false 37 38if.end: 39 ret void 40} 41 42;; Successor 1 branches to successor 0. 43define void @succ1to0(ptr %p, ptr %q, i32 %a) { 44; CHECK-LABEL: @succ1to0( 45; CHECK-NEXT: entry: 46; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 47; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true 48; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> 49; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) 50; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 51; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32> 52; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]]) 53; CHECK-NEXT: ret void 54; 55entry: 56 %tobool = icmp ne i32 %a, 0 57 br i1 %tobool, label %if.end, label %if.then 58 59if.end: 60 ret void 61 62if.then: 63 %0 = load i32, ptr %q 64 store i32 %0, ptr %p 65 br label %if.end 66} 67 68;; Successor 1 branches to successor 0 and there is a phi node. 69define i32 @succ1to0_phi(ptr %p) { 70; CHECK-LABEL: @succ1to0_phi( 71; CHECK-NEXT: entry: 72; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null 73; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true 74; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> 75; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer) 76; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 77; CHECK-NEXT: ret i32 [[TMP3]] 78; 79entry: 80 %cond = icmp eq ptr %p, null 81 br i1 %cond, label %if.true, label %if.false 82 83if.false: 84 %0 = load i32, ptr %p 85 br label %if.true 86 87if.true: 88 %res = phi i32 [ %0, %if.false ], [ 0, %entry ] 89 ret i32 %res 90} 91 92;; Successor 0 branches to successor 1. 93define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) { 94; CHECK-LABEL: @succ0to1( 95; CHECK-NEXT: entry: 96; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 97; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> 98; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison) 99; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32 100; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32> 101; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) 102; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 103; CHECK-NEXT: ret void 104; 105entry: 106 %cond = icmp eq i32 %a, 0 107 br i1 %cond, label %if.true, label %if.false 108 109if.false: 110 store i32 1, ptr %q 111 br label %if.end 112 113if.true: 114 %0 = load i32, ptr %b 115 store i32 %0, ptr %p 116 br label %if.false 117 118if.end: 119 ret void 120} 121 122;; Load after store can be hoisted. 123define i64 @load_after_store(i32 %a, ptr %b, ptr %p) { 124; CHECK-LABEL: @load_after_store( 125; CHECK-NEXT: entry: 126; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 127; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> 128; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]]) 129; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) 130; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 131; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64 132; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0 133; CHECK-NEXT: ret i64 [[SPEC_SELECT]] 134; 135entry: 136 %cond = icmp eq i32 %a, 0 137 br i1 %cond, label %if.true, label %if.end 138 139if.true: 140 store i32 1, ptr %b 141 %0 = load i16, ptr %p 142 %zext = zext i16 %0 to i64 143 ret i64 %zext 144 145if.end: 146 ret i64 0 147} 148 149;; Speculatable memory read doesn't prevent the hoist. 150define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) { 151; CHECK-LABEL: @load_skip_speculatable_memory_read( 152; CHECK-NEXT: entry: 153; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 154; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only() 155; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> 156; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32> 157; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) 158; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 159; CHECK-NEXT: ret void 160; 161entry: 162 %cond = icmp eq i32 %a, 0 163 br i1 %cond, label %if.true, label %if.false 164 165if.false: 166 store i32 1, ptr %q 167 br label %if.end 168 169if.true: 170 %read = call i32 @read_memory_only() 171 store i32 %read, ptr %p 172 br label %if.false 173 174if.end: 175 ret void 176} 177 178;; Source of the load can be a GEP. 179define i32 @load_from_gep(ptr %p) { 180; CHECK-LABEL: @load_from_gep( 181; CHECK-NEXT: entry: 182; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null 183; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 184; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true 185; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> 186; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer) 187; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 188; CHECK-NEXT: ret i32 [[TMP3]] 189; 190entry: 191 %cond = icmp eq ptr %p, null 192 br i1 %cond, label %if.true, label %if.false 193 194if.false: 195 %arrayidx = getelementptr inbounds i8, ptr %p, i64 16 196 %0 = load i32, ptr %arrayidx 197 br label %if.true 198 199if.true: 200 %res = phi i32 [ %0, %if.false ], [ 0, %entry ] 201 ret i32 %res 202} 203 204;; Metadata range/annotation are kept. 205define void @nondebug_metadata(i1 %cond, ptr %p, ptr %q) { 206; CHECK-LABEL: @nondebug_metadata( 207; CHECK-NEXT: entry: 208; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> 209; CHECK-NEXT: [[TMP1:%.*]] = call range(i16 0, 10) <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison) 210; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16 211; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META5:![0-9]+]] 212; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 213; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16> 214; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META5]] 215; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> 216; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]]) 217; CHECK-NEXT: ret void 218; 219entry: 220 br i1 %cond, label %if.true, label %if.false 221 222if.false: 223 ret void 224 225if.true: 226 %0 = load i16, ptr %p, align 2, !range !{i16 0, i16 10} 227 %1 = load i32, ptr %q, align 4, !annotation !11 228 store i16 %0, ptr %q, align 4, !annotation !11 229 store i32 %1, ptr %p, align 2 230 br label %if.false 231} 232 233define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) { 234; CHECK-LABEL: @debug_metadata_diassign( 235; CHECK-NEXT: bb0: 236; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> 237; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]]) 238; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2 239; CHECK-NEXT: ret i16 [[SPEC_SELECT]] 240; 241bb0: 242 br i1 %cond, label %if.true, label %if.false 243 244if.true: 245 store i16 7, ptr %p, align 4, !DIAssignID !9 246 br label %if.false 247 248if.false: 249 %ret = phi i16 [ 2, %bb0 ], [ 3, %if.true ] 250 call void @llvm.dbg.assign(metadata i16 %ret, metadata !8, metadata !DIExpression(), metadata !9, metadata ptr %p, metadata !DIExpression()), !dbg !7 251 ret i16 %ret 252} 253 254;; Not crash when working with opt controlled by simplifycfg-hoist-cond-stores. 255define i32 @hoist_cond_stores(i1 %cond, ptr %p) { 256; CHECK-LABEL: @hoist_cond_stores( 257; CHECK-NEXT: entry: 258; CHECK-NEXT: store i1 false, ptr [[P:%.*]], align 2 259; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND:%.*]], i1 false, i1 false 260; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1> 261; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]]) 262; CHECK-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2 263; CHECK-NEXT: ret i32 0 264; 265entry: 266 store i1 false, ptr %p, align 2 267 br i1 %cond, label %if.true, label %if.false 268 269if.true: ; preds = %entry 270 store i32 0, ptr %p, align 8 271 store i1 false, ptr %p, align 2 272 br label %if.false 273 274if.false: ; preds = %if.true, %entry 275 ret i32 0 276} 277 278;; Both of successor 0 and successor 1 have a single predecessor. 279define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) { 280; CHECK-LABEL: @single_predecessor( 281; CHECK-NEXT: entry: 282; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 283; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true 284; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> 285; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1> 286; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]]) 287; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison) 288; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 289; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]]) 290; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3 291; CHECK-NEXT: ret i32 [[DOT]] 292; 293entry: 294 %tobool = icmp ne i32 %a, 0 295 br i1 %tobool, label %if.end, label %if.then 296 297if.end: 298 store i32 1, ptr %q 299 ret i32 2 300 301if.then: 302 %0 = load i32, ptr %q 303 store i32 %0, ptr %p 304 ret i32 3 305} 306 307;; Hoist 6 stores. 308define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6) { 309; CHECK-LABEL: @threshold_6( 310; CHECK-NEXT: entry: 311; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1> 312; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]]) 313; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]]) 314; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]]) 315; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]]) 316; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]]) 317; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]]) 318; CHECK-NEXT: ret void 319; 320entry: 321 br i1 %cond, label %if.true, label %if.false 322 323if.true: 324 store i32 1, ptr %p1, align 4 325 store i32 2, ptr %p2, align 4 326 store i32 3, ptr %p3, align 4 327 store i32 4, ptr %p4, align 4 328 store i32 5, ptr %p5, align 4 329 store i32 6, ptr %p6, align 4 330 br label %if.false 331 332if.false: 333 ret void 334} 335 336;; Not hoist 7 stores. 337define void @threshold_7(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6, ptr %p7) { 338; CHECK-LABEL: @threshold_7( 339; CHECK-NEXT: entry: 340; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 341; CHECK: if.true: 342; CHECK-NEXT: store i32 1, ptr [[P1:%.*]], align 4 343; CHECK-NEXT: store i32 2, ptr [[P2:%.*]], align 4 344; CHECK-NEXT: store i32 3, ptr [[P3:%.*]], align 4 345; CHECK-NEXT: store i32 4, ptr [[P4:%.*]], align 4 346; CHECK-NEXT: store i32 5, ptr [[P5:%.*]], align 4 347; CHECK-NEXT: store i32 6, ptr [[P6:%.*]], align 4 348; CHECK-NEXT: store i32 7, ptr [[P7:%.*]], align 4 349; CHECK-NEXT: br label [[IF_FALSE]] 350; CHECK: if.false: 351; CHECK-NEXT: ret void 352; 353entry: 354 br i1 %cond, label %if.true, label %if.false 355 356if.true: 357 store i32 1, ptr %p1, align 4 358 store i32 2, ptr %p2, align 4 359 store i32 3, ptr %p3, align 4 360 store i32 4, ptr %p4, align 4 361 store i32 5, ptr %p5, align 4 362 store i32 6, ptr %p6, align 4 363 store i32 7, ptr %p7, align 4 364 br label %if.false 365 366if.false: 367 ret void 368} 369 370;; Not do hoist if the cost of instructions to be hoisted is expensive. 371define i32 @not_cheap_to_hoist(i32 %a, ptr %b, ptr %p, ptr %q, i32 %v0, i32 %v1, i32 %v2, i1 %cc) { 372; CHECK-LABEL: @not_cheap_to_hoist( 373; CHECK-NEXT: entry: 374; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0 375; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 376; CHECK: common.ret: 377; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[VVVV:%.*]], [[IF_FALSE]] ], [ 0, [[IF_TRUE]] ] 378; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] 379; CHECK: if.false: 380; CHECK-NEXT: store i64 1, ptr [[P:%.*]], align 8 381; CHECK-NEXT: store i16 2, ptr [[Q:%.*]], align 2 382; CHECK-NEXT: [[V:%.*]] = udiv i32 [[A]], 12345 383; CHECK-NEXT: [[VV:%.*]] = mul i32 [[V]], [[V0:%.*]] 384; CHECK-NEXT: [[VVV:%.*]] = mul i32 [[VV]], [[V1:%.*]] 385; CHECK-NEXT: [[VVVV]] = select i1 [[CC:%.*]], i32 [[V2:%.*]], i32 [[VVV]] 386; CHECK-NEXT: br label [[COMMON_RET:%.*]] 387; CHECK: if.true: 388; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4 389; CHECK-NEXT: store i32 [[TMP0]], ptr [[P]], align 4 390; CHECK-NEXT: br label [[COMMON_RET]] 391; 392entry: 393 %cond = icmp eq i32 %a, 0 394 br i1 %cond, label %if.true, label %if.false 395 396if.false: 397 store i64 1, ptr %p 398 store i16 2, ptr %q 399 400 %v = udiv i32 %a, 12345 401 %vv = mul i32 %v, %v0 402 %vvv = mul i32 %vv, %v1 403 %vvvv = select i1 %cc, i32 %v2, i32 %vvv 404 ret i32 %vvvv 405 406if.true: 407 %0 = load i32, ptr %b 408 store i32 %0, ptr %p 409 br label %if.end 410 411if.end: 412 ret i32 0 413} 414 415;; Not hoist if there is more than 1 prodecessor. 416define void @not_single_predecessor(ptr %p, ptr %q, i32 %a) { 417; CHECK-LABEL: @not_single_predecessor( 418; CHECK-NEXT: entry: 419; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 420; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] 421; CHECK: if.end: 422; CHECK-NEXT: br label [[IF_THEN]] 423; CHECK: if.then: 424; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4 425; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 426; CHECK-NEXT: br label [[IF_END]] 427; 428entry: 429 %tobool = icmp ne i32 %a, 0 430 br i1 %tobool, label %if.end, label %if.then 431 432if.end: 433 br label %if.then 434 435if.then: 436 %1 = load i32, ptr %q 437 store i32 %1, ptr %p 438 br label %if.end 439} 440 441;; Not hoist b/c i8 is not supported by conditional faulting. 442define void @not_supported_type(i8 %a, ptr %b, ptr %p, ptr %q) { 443; CHECK-LABEL: @not_supported_type( 444; CHECK-NEXT: entry: 445; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[A:%.*]], 0 446; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 447; CHECK: if.false: 448; CHECK-NEXT: store i8 1, ptr [[Q:%.*]], align 1 449; CHECK-NEXT: br label [[IF_END:%.*]] 450; CHECK: if.true: 451; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B:%.*]], align 1 452; CHECK-NEXT: store i8 [[TMP0]], ptr [[P:%.*]], align 1 453; CHECK-NEXT: br label [[IF_END]] 454; CHECK: if.end: 455; CHECK-NEXT: ret void 456; 457entry: 458 %cond = icmp eq i8 %a, 0 459 br i1 %cond, label %if.true, label %if.false 460 461if.false: 462 store i8 1, ptr %q 463 br label %if.end 464 465if.true: 466 %0 = load i8, ptr %b 467 store i8 %0, ptr %p 468 br label %if.end 469 470if.end: 471 ret void 472} 473 474;; Not hoist if the terminator is not br. 475define void @not_br_terminator(i32 %a, ptr %b, ptr %p, ptr %q) { 476; CHECK-LABEL: @not_br_terminator( 477; CHECK-NEXT: entry: 478; CHECK-NEXT: switch i32 [[A:%.*]], label [[IF_END:%.*]] [ 479; CHECK-NEXT: i32 1, label [[IF_FALSE:%.*]] 480; CHECK-NEXT: i32 2, label [[IF_TRUE:%.*]] 481; CHECK-NEXT: ] 482; CHECK: if.false: 483; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4 484; CHECK-NEXT: br label [[IF_END]] 485; CHECK: if.true: 486; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4 487; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 488; CHECK-NEXT: br label [[IF_FALSE]] 489; CHECK: if.end: 490; CHECK-NEXT: ret void 491; 492entry: 493 switch i32 %a, label %if.end [ 494 i32 1, label %if.false 495 i32 2, label %if.true 496 ] 497 498if.false: 499 store i32 1, ptr %q, align 4 500 br label %if.end 501 502if.true: 503 %0 = load i32, ptr %b, align 4 504 store i32 %0, ptr %p, align 4 505 br label %if.false 506 507if.end: 508 ret void 509} 510 511;; Not hoist if the instruction to be hoist is atomic. 512define void @not_atomic(i1 %cond, ptr %p) { 513; CHECK-LABEL: @not_atomic( 514; CHECK-NEXT: entry: 515; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 516; CHECK: if.false: 517; CHECK-NEXT: store atomic i32 1, ptr [[P:%.*]] seq_cst, align 4 518; CHECK-NEXT: br label [[IF_TRUE]] 519; CHECK: if.true: 520; CHECK-NEXT: ret void 521; 522entry: 523 br i1 %cond, label %if.true, label %if.false 524 525if.false: 526 store atomic i32 1, ptr %p seq_cst, align 4 527 br label %if.true 528 529if.true: 530 ret void 531} 532 533;; Not hoist if the instruction to be hoist is volatile. 534define void @not_volatile(i1 %cond, ptr %p) { 535; CHECK-LABEL: @not_volatile( 536; CHECK-NEXT: entry: 537; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 538; CHECK: if.false: 539; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[P:%.*]], align 4 540; CHECK-NEXT: br label [[IF_TRUE]] 541; CHECK: if.true: 542; CHECK-NEXT: ret void 543; 544entry: 545 br i1 %cond, label %if.true, label %if.false 546 547if.false: 548 %0 = load volatile i32, ptr %p, align 4 549 br label %if.true 550 551if.true: 552 ret void 553} 554 555;; Not hoist if there is an instruction that has side effect in the same bb. 556define void @not_hoistable_sideeffect(i1 %cond, ptr %p, ptr %q) { 557; CHECK-LABEL: @not_hoistable_sideeffect( 558; CHECK-NEXT: entry: 559; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 560; CHECK: if.false: 561; CHECK-NEXT: [[RMW:%.*]] = atomicrmw xchg ptr [[Q:%.*]], double 4.000000e+00 seq_cst, align 8 562; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4 563; CHECK-NEXT: br label [[IF_TRUE]] 564; CHECK: if.true: 565; CHECK-NEXT: ret void 566; 567entry: 568 br i1 %cond, label %if.true, label %if.false 569 570if.false: 571 %rmw= atomicrmw xchg ptr %q, double 4.0 seq_cst 572 store i32 1, ptr %p, align 4 573 br label %if.true 574 575if.true: 576 ret void 577} 578 579;; Not hoist if the branch is predictable and the `then` BB is not likely to execute. 580define void @not_likely_to_execute(ptr %p, ptr %q, i32 %a) { 581; CHECK-LABEL: @not_likely_to_execute( 582; CHECK-NEXT: entry: 583; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0 584; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF6:![0-9]+]] 585; CHECK: if.end: 586; CHECK-NEXT: ret void 587; CHECK: if.then: 588; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4 589; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4 590; CHECK-NEXT: br label [[IF_END]] 591; 592entry: 593 %tobool = icmp ne i32 %a, 0 594 br i1 %tobool, label %if.then, label %if.end, !prof !10 595 596if.end: 597 ret void 598 599if.then: 600 %0 = load i32, ptr %q 601 store i32 %0, ptr %p 602 br label %if.end 603} 604 605;; Now the optimization hoist-loads-stores-with-cond-faulting is run in codegen, 606;; which is after sroa and alloca is optimized away. So we don't need to do the transform 607;; for this case. But in the future, it is probably moved before sroa. 608define void @not_alloca(ptr %p, ptr %q, i32 %a) { 609; CHECK-LABEL: @not_alloca( 610; CHECK-NEXT: entry: 611; CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 612; CHECK-NEXT: [[Q_ADDR:%.*]] = alloca ptr, align 8 613; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 614; CHECK-NEXT: store ptr [[P:%.*]], ptr [[P_ADDR]], align 8 615; CHECK-NEXT: store ptr [[Q:%.*]], ptr [[Q_ADDR]], align 8 616; CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 617; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 618; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 619; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] 620; CHECK: if.then: 621; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Q_ADDR]], align 8 622; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 623; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8 624; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 625; CHECK-NEXT: br label [[IF_END]] 626; CHECK: if.end: 627; CHECK-NEXT: ret void 628; 629entry: 630 %p.addr = alloca ptr 631 %q.addr = alloca ptr 632 %a.addr = alloca i32 633 store ptr %p, ptr %p.addr 634 store ptr %q, ptr %q.addr 635 store i32 %a, ptr %a.addr 636 %0 = load i32, ptr %a.addr 637 %tobool = icmp ne i32 %0, 0 638 br i1 %tobool, label %if.then, label %if.end 639 640if.then: 641 %1 = load ptr, ptr %q.addr 642 %2 = load i32, ptr %1 643 %3 = load ptr, ptr %p.addr 644 store i32 %2, ptr %3 645 br label %if.end 646 647if.end: 648 ret void 649} 650 651;; Not transform if alignment = 2^32. 652define void @not_maximum_alignment(i1 %cond, ptr %p) { 653; CHECK-LABEL: @not_maximum_alignment( 654; CHECK-NEXT: entry: 655; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] 656; CHECK: if.true: 657; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4294967296 658; CHECK-NEXT: br label [[IF_FALSE]] 659; CHECK: if.false: 660; CHECK-NEXT: ret void 661; 662entry: 663 br i1 %cond, label %if.true, label %if.false 664 665if.true: 666 store i32 0, ptr %p, align 4294967296 667 br label %if.false 668 669if.false: 670 ret void 671} 672 673define i32 @succ_phi_has_3input(i1 %cond1, ptr %p, i1 %cond2) { 674; CHECK-LABEL: @succ_phi_has_3input( 675; CHECK-NEXT: entry: 676; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]] 677; CHECK: bb1: 678; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1> 679; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer) 680; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64 681; CHECK-NEXT: br label [[BB3]] 682; CHECK: bb3: 683; CHECK-NEXT: [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ] 684; CHECK-NEXT: store i64 [[Y]], ptr [[P]], align 8 685; CHECK-NEXT: ret i32 0 686; 687entry: 688 br i1 %cond1, label %bb3, label %bb1 689 690bb1: ; preds = %entry 691 br i1 %cond2, label %bb2, label %bb3 692 693bb2: ; preds = %bb1 694 %x = load i64, ptr %p, align 8 695 br label %bb3 696 697bb3: ; preds = %bb2, %bb1, %entry 698 %y = phi i64 [ %x, %bb2 ], [ 0, %bb1 ], [ 0, %entry ] 699 store i64 %y, ptr %p, align 8 700 ret i32 0 701} 702 703define i32 @succ1to0_phi2(ptr %p, ptr %p2) { 704; CHECK-LABEL: @succ1to0_phi2( 705; CHECK-NEXT: entry: 706; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null 707; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true 708; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> 709; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer) 710; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32 711; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32> 712; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]]) 713; CHECK-NEXT: ret i32 [[TMP3]] 714; 715entry: 716 %cond = icmp eq ptr %p, null 717 br i1 %cond, label %if.true, label %if.false 718 719if.false: 720 %0 = load i32, ptr %p 721 store i32 %0, ptr %p2 722 br label %if.true 723 724if.true: 725 %res = phi i32 [ %0, %if.false ], [ 0, %entry ] 726 ret i32 %res 727} 728 729define i32 @succ1to0_phi3(ptr %p, ptr %p2, i32 %x) { 730; CHECK-LABEL: @succ1to0_phi3( 731; CHECK-NEXT: entry: 732; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null 733; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true 734; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1> 735; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[X:%.*]] to <1 x i32> 736; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> [[TMP2]]) 737; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32 738; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32> 739; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]]) 740; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP4]] 741; CHECK-NEXT: [[RES:%.*]] = add i32 [[SPEC_SELECT]], [[TMP4]] 742; CHECK-NEXT: ret i32 [[RES]] 743; 744entry: 745 %cond = icmp eq ptr %p, null 746 br i1 %cond, label %if.true, label %if.false 747 748if.false: 749 %0 = load i32, ptr %p 750 store i32 %0, ptr %p2 751 br label %if.true 752 753if.true: 754 %res0 = phi i32 [ %0, %if.false ], [ 0, %entry ] 755 %res1 = phi i32 [ %0, %if.false ], [ %x, %entry ] 756 %res = add i32 %res0, %res1 757 ret i32 %res 758} 759 760;; Not transform if either BB has multiple successors. 761define i32 @not_multi_successors(i1 %c1, i32 %c2, ptr %p) { 762; CHECK-LABEL: @not_multi_successors( 763; CHECK-NEXT: entry: 764; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]] 765; CHECK: entry.if: 766; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4 767; CHECK-NEXT: switch i32 [[C2:%.*]], label [[COMMON_RET]] [ 768; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] 769; CHECK-NEXT: i32 1, label [[SW_BB]] 770; CHECK-NEXT: ] 771; CHECK: common.ret: 772; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL]], [[ENTRY_IF]] ], [ 0, [[SW_BB]] ] 773; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] 774; CHECK: sw.bb: 775; CHECK-NEXT: br label [[COMMON_RET]] 776; 777entry: 778 br i1 %c1, label %entry.if, label %entry.else 779 780entry.if: ; preds = %entry 781 %val = load i32, ptr %p, align 4 782 switch i32 %c2, label %return [ 783 i32 0, label %sw.bb 784 i32 1, label %sw.bb 785 ] 786 787entry.else: ; preds = %entry 788 ret i32 0 789 790sw.bb: ; preds = %entry.if, %entry.if 791 br label %return 792 793return: ; preds = %sw.bb, %entry.if 794 %ret = phi i32 [ %val, %entry.if ], [ 0, %sw.bb ] 795 ret i32 %ret 796} 797 798declare i32 @read_memory_only() readonly nounwind willreturn speculatable 799 800!llvm.dbg.cu = !{!0} 801!llvm.module.flags = !{!2, !3} 802!llvm.ident = !{!4} 803 804!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "clang") 805!1 = !DIFile(filename: "foo.c", directory: "/tmp") 806!2 = !{i32 2, !"Dwarf Version", i32 4} 807!3 = !{i32 2, !"Debug Info Version", i32 3} 808!4 = !{!"clang"} 809!5 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) 810!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0) 811!7 = !DILocation(line: 5, column: 7, scope: !6) 812!8 = !DILocalVariable(name: "a", scope: !6, line: 6, type: !5) 813!9 = distinct !DIAssignID() 814!10 = !{!"branch_weights", i32 1, i32 99} 815!11 = !{ !"auto-init" } 816