1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=instcombine -S < %s | FileCheck %s 3 4@gp = global ptr null, align 8 5 6declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) 7 8define i1 @compare_global_trivialeq() { 9; CHECK-LABEL: @compare_global_trivialeq( 10; CHECK-NEXT: ret i1 false 11; 12 %m = call ptr @malloc(i64 4) 13 %lgp = load ptr, ptr @gp, align 8 14 %cmp = icmp eq ptr %m, %lgp 15 ret i1 %cmp 16} 17 18define i1 @compare_global_trivialne() { 19; CHECK-LABEL: @compare_global_trivialne( 20; CHECK-NEXT: ret i1 true 21; 22 %m = call ptr @malloc(i64 4) 23 %lgp = load ptr, ptr @gp, align 8 24 %cmp = icmp ne ptr %m, %lgp 25 ret i1 %cmp 26} 27 28 29; Although the %m is marked nocapture in the deopt operand in call to function f, 30; we cannot remove the alloc site: call to malloc 31; The comparison should fold to false irrespective of whether the call to malloc can be elided or not 32declare void @f() 33define i1 @compare_and_call_with_deopt() { 34; CHECK-LABEL: @compare_and_call_with_deopt( 35; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24) 36; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ] 37; CHECK-NEXT: ret i1 false 38; 39 %m = call ptr @malloc(i64 24) 40 %lgp = load ptr, ptr @gp, align 8, !nonnull !0 41 %cmp = icmp eq ptr %lgp, %m 42 tail call void @f() [ "deopt"(ptr %m) ] 43 ret i1 %cmp 44} 45 46; Same functon as above with deopt operand in function f, but comparison is NE 47define i1 @compare_ne_and_call_with_deopt() { 48; CHECK-LABEL: @compare_ne_and_call_with_deopt( 49; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24) 50; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ] 51; CHECK-NEXT: ret i1 true 52; 53 %m = call ptr @malloc(i64 24) 54 %lgp = load ptr, ptr @gp, align 8, !nonnull !0 55 %cmp = icmp ne ptr %lgp, %m 56 tail call void @f() [ "deopt"(ptr %m) ] 57 ret i1 %cmp 58} 59 60; Same function as above, but global not marked nonnull, and we cannot fold the comparison 61define i1 @compare_ne_global_maybe_null() { 62; CHECK-LABEL: @compare_ne_global_maybe_null( 63; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24) 64; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8 65; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[LGP]], [[M]] 66; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ] 67; CHECK-NEXT: ret i1 [[CMP]] 68; 69 %m = call ptr @malloc(i64 24) 70 %lgp = load ptr, ptr @gp 71 %cmp = icmp ne ptr %lgp, %m 72 tail call void @f() [ "deopt"(ptr %m) ] 73 ret i1 %cmp 74} 75 76; FIXME: The comparison should fold to false since %m escapes (call to function escape) 77; after the comparison. 78declare void @escape(ptr) 79define i1 @compare_and_call_after() { 80; CHECK-LABEL: @compare_and_call_after( 81; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) ptr @malloc(i64 24) 82; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0:![0-9]+]] 83; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], [[LGP]] 84; CHECK-NEXT: br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]] 85; CHECK: escape_call: 86; CHECK-NEXT: call void @escape(ptr [[M]]) 87; CHECK-NEXT: ret i1 true 88; CHECK: just_return: 89; CHECK-NEXT: ret i1 false 90; 91 %m = call ptr @malloc(i64 24) 92 %lgp = load ptr, ptr @gp, align 8, !nonnull !0 93 %cmp = icmp eq ptr %m, %lgp 94 br i1 %cmp, label %escape_call, label %just_return 95 96escape_call: 97 call void @escape(ptr %m) 98 ret i1 true 99 100just_return: 101 ret i1 %cmp 102} 103 104define i1 @compare_distinct_mallocs() { 105; CHECK-LABEL: @compare_distinct_mallocs( 106; CHECK-NEXT: ret i1 false 107; 108 %m = call ptr @malloc(i64 4) 109 %n = call ptr @malloc(i64 4) 110 %cmp = icmp eq ptr %m, %n 111 ret i1 %cmp 112} 113 114; the compare is folded to true since the folding compare looks through bitcasts. 115; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc 116define i1 @compare_samepointer_under_bitcast() { 117; CHECK-LABEL: @compare_samepointer_under_bitcast( 118; CHECK-NEXT: ret i1 true 119; 120 %m = call ptr @malloc(i64 4) 121 %cmp = icmp eq ptr %m, %m 122 ret i1 %cmp 123} 124 125; the compare is folded to true since the folding compare looks through bitcasts. 126; The malloc call for %m cannot be elided since it is used in the call to function f. 127define i1 @compare_samepointer_escaped() { 128; CHECK-LABEL: @compare_samepointer_escaped( 129; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 130; CHECK-NEXT: call void @f() [ "deopt"(ptr [[M]]) ] 131; CHECK-NEXT: ret i1 true 132; 133 %m = call ptr @malloc(i64 4) 134 %cmp = icmp eq ptr %m, %m 135 call void @f() [ "deopt"(ptr %m) ] 136 ret i1 %cmp 137} 138 139; Technically, we can fold the %cmp2 comparison, even though %m escapes through 140; the ret statement since `ret` terminates the function and we cannot reach from 141; the ret to cmp. 142; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with 143; cross-threading data dependencies since we do not make the distinction between 144; atomic and non-atomic loads in capture tracking. 145define ptr @compare_ret_escape(ptr %c) { 146; CHECK-LABEL: @compare_ret_escape( 147; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 148; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 149; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[N]], [[C:%.*]] 150; CHECK-NEXT: br i1 [[CMP]], label [[RETST:%.*]], label [[CHK:%.*]] 151; CHECK: retst: 152; CHECK-NEXT: ret ptr [[M]] 153; CHECK: chk: 154; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8, !nonnull [[META0]] 155; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[LGP]] 156; CHECK-NEXT: br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]] 157; CHECK: chk2: 158; CHECK-NEXT: ret ptr [[N]] 159; 160 %m = call ptr @malloc(i64 4) 161 %n = call ptr @malloc(i64 4) 162 %cmp = icmp eq ptr %n, %c 163 br i1 %cmp, label %retst, label %chk 164 165retst: 166 ret ptr %m 167 168chk: 169 %lgp = load ptr, ptr @gp, align 8, !nonnull !0 170 %cmp2 = icmp eq ptr %m, %lgp 171 br i1 %cmp2, label %retst, label %chk2 172 173chk2: 174 ret ptr %n 175} 176 177; The malloc call for %m cannot be elided since it is used in the call to function f. 178; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations 179define i1 @compare_distinct_pointer_escape() { 180; CHECK-LABEL: @compare_distinct_pointer_escape( 181; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 182; CHECK-NEXT: tail call void @f() [ "deopt"(ptr [[M]]) ] 183; CHECK-NEXT: ret i1 true 184; 185 %m = call ptr @malloc(i64 4) 186 %n = call ptr @malloc(i64 4) 187 tail call void @f() [ "deopt"(ptr %m) ] 188 %cmp = icmp ne ptr %m, %n 189 ret i1 %cmp 190} 191 192; The next block of tests demonstrate a very subtle correctness requirement. 193; We can generally assume any *single* heap layout we chose for the result of 194; a malloc call, but we can't simultanious assume two different ones. As a 195; result, we must make sure that we only fold conditions if we can ensure that 196; we fold *all* potentially address capturing compares the same. This is 197; the same point that applies to allocas, applied to noaiias/malloc. 198 199; These two functions represents either a) forging a pointer via inttoptr or 200; b) indexing off an adjacent allocation. In either case, the operation is 201; obscured by an uninlined helper and not visible to instcombine. 202declare ptr @hidden_inttoptr() 203declare ptr @hidden_offset(ptr %other) 204 205; FIXME: Missed oppurtunity 206define i1 @ptrtoint_single_cmp() { 207; CHECK-LABEL: @ptrtoint_single_cmp( 208; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 209; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr) 210; CHECK-NEXT: ret i1 [[CMP]] 211; 212 %m = call ptr @malloc(i64 4) 213 %rhs = inttoptr i64 2048 to ptr 214 %cmp = icmp eq ptr %m, %rhs 215 ret i1 %cmp 216} 217 218define i1 @offset_single_cmp() { 219; CHECK-LABEL: @offset_single_cmp( 220; CHECK-NEXT: ret i1 false 221; 222 %m = call ptr @malloc(i64 4) 223 %n = call ptr @malloc(i64 4) 224 %rhs = getelementptr i8, ptr %n, i32 4 225 %cmp = icmp eq ptr %m, %rhs 226 ret i1 %cmp 227} 228 229declare void @witness(i1, i1) 230 231define void @neg_consistent_fold1() { 232; CHECK-LABEL: @neg_consistent_fold1( 233; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 234; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr() 235; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr) 236; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]] 237; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]]) 238; CHECK-NEXT: ret void 239; 240 %m = call ptr @malloc(i64 4) 241 %rhs = inttoptr i64 2048 to ptr 242 %rhs2 = call ptr @hidden_inttoptr() 243 %cmp1 = icmp eq ptr %m, %rhs 244 %cmp2 = icmp eq ptr %m, %rhs2 245 call void @witness(i1 %cmp1, i1 %cmp2) 246 ret void 247} 248 249define void @neg_consistent_fold2() { 250; CHECK-LABEL: @neg_consistent_fold2( 251; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 252; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 253; CHECK-NEXT: [[RHS:%.*]] = getelementptr i8, ptr [[N]], i64 4 254; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_offset(ptr [[N]]) 255; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[RHS]] 256; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]] 257; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]]) 258; CHECK-NEXT: ret void 259; 260 %m = call ptr @malloc(i64 4) 261 %n = call ptr @malloc(i64 4) 262 %rhs = getelementptr i8, ptr %n, i32 4 263 %rhs2 = call ptr @hidden_offset(ptr %n) 264 %cmp1 = icmp eq ptr %m, %rhs 265 %cmp2 = icmp eq ptr %m, %rhs2 266 call void @witness(i1 %cmp1, i1 %cmp2) 267 ret void 268} 269 270define void @neg_consistent_fold3() { 271; CHECK-LABEL: @neg_consistent_fold3( 272; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 273; CHECK-NEXT: [[LGP:%.*]] = load ptr, ptr @gp, align 8 274; CHECK-NEXT: [[RHS2:%.*]] = call ptr @hidden_inttoptr() 275; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[LGP]] 276; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[RHS2]] 277; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]]) 278; CHECK-NEXT: ret void 279; 280 %m = call ptr @malloc(i64 4) 281 %lgp = load ptr, ptr @gp, align 8 282 %rhs2 = call ptr @hidden_inttoptr() 283 %cmp1 = icmp eq ptr %m, %lgp 284 %cmp2 = icmp eq ptr %m, %rhs2 285 call void @witness(i1 %cmp1, i1 %cmp2) 286 ret void 287} 288 289; FIXME: This appears correct, but the current implementation relies 290; on visiting both cmps in the same pass. We may have an simplification order 291; under which one is missed, and that would be a bug. 292define void @neg_consistent_fold4() { 293; CHECK-LABEL: @neg_consistent_fold4( 294; CHECK-NEXT: call void @witness(i1 false, i1 false) 295; CHECK-NEXT: ret void 296; 297 %m = call ptr @malloc(i64 4) 298 %lgp = load ptr, ptr @gp, align 8 299 %cmp1 = icmp eq ptr %m, %lgp 300 %cmp2 = icmp eq ptr %m, %lgp 301 call void @witness(i1 %cmp1, i1 %cmp2) 302 ret void 303} 304 305declare void @unknown(ptr) 306 307; Points out that a nocapture call can't cause a consistent result issue 308; as it is (by assumption) not able to contain a comparison which might 309; capture the address. 310 311define i1 @consistent_nocapture_inttoptr() { 312; CHECK-LABEL: @consistent_nocapture_inttoptr( 313; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 314; CHECK-NEXT: call void @unknown(ptr captures(none) [[M]]) 315; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[M]], inttoptr (i64 2048 to ptr) 316; CHECK-NEXT: ret i1 [[CMP]] 317; 318 %m = call ptr @malloc(i64 4) 319 call void @unknown(ptr nocapture %m) 320 %rhs = inttoptr i64 2048 to ptr 321 %cmp = icmp eq ptr %m, %rhs 322 ret i1 %cmp 323} 324 325define i1 @consistent_nocapture_offset() { 326; CHECK-LABEL: @consistent_nocapture_offset( 327; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 328; CHECK-NEXT: call void @unknown(ptr captures(none) [[M]]) 329; CHECK-NEXT: ret i1 false 330; 331 %m = call ptr @malloc(i64 4) 332 call void @unknown(ptr nocapture %m) 333 %n = call ptr @malloc(i64 4) 334 %rhs = getelementptr i8, ptr %n, i32 4 335 %cmp = icmp eq ptr %m, %rhs 336 ret i1 %cmp 337} 338 339define i1 @consistent_nocapture_through_global() { 340; CHECK-LABEL: @consistent_nocapture_through_global( 341; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) ptr @malloc(i64 4) 342; CHECK-NEXT: call void @unknown(ptr captures(none) [[M]]) 343; CHECK-NEXT: ret i1 false 344; 345 %m = call ptr @malloc(i64 4) 346 call void @unknown(ptr nocapture %m) 347 %lgp = load ptr, ptr @gp, align 8, !nonnull !0 348 %cmp = icmp eq ptr %m, %lgp 349 ret i1 %cmp 350} 351 352; End consistent heap layout tests 353 354; We can fold this by assuming a single heap layout 355define i1 @two_nonnull_mallocs() { 356; CHECK-LABEL: @two_nonnull_mallocs( 357; CHECK-NEXT: ret i1 false 358; 359 %m = call nonnull ptr @malloc(i64 4) 360 %n = call nonnull ptr @malloc(i64 4) 361 %cmp = icmp eq ptr %m, %n 362 ret i1 %cmp 363} 364 365; The address of %n is captured, but %m can be arranged to make 366; the comparison non-equal. 367define i1 @two_nonnull_mallocs2() { 368; CHECK-LABEL: @two_nonnull_mallocs2( 369; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4) 370; CHECK-NEXT: call void @unknown(ptr nonnull [[N]]) 371; CHECK-NEXT: ret i1 false 372; 373 %m = call nonnull ptr @malloc(i64 4) 374 %n = call nonnull ptr @malloc(i64 4) 375 call void @unknown(ptr %n) 376 %cmp = icmp eq ptr %m, %n 377 ret i1 %cmp 378} 379 380; TODO: We can fold this, but don't with the current scheme. 381define i1 @two_nonnull_mallocs_hidden() { 382; CHECK-LABEL: @two_nonnull_mallocs_hidden( 383; CHECK-NEXT: [[M:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4) 384; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) ptr @malloc(i64 4) 385; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds nuw i8, ptr [[M]], i64 1 386; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds nuw i8, ptr [[N]], i64 2 387; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]] 388; CHECK-NEXT: ret i1 [[CMP]] 389; 390 %m = call nonnull ptr @malloc(i64 4) 391 %n = call nonnull ptr @malloc(i64 4) 392 %gep1 = getelementptr i8, ptr %m, i32 1 393 %gep2 = getelementptr i8, ptr %n, i32 2 394 %cmp = icmp eq ptr %gep1, %gep2 395 ret i1 %cmp 396} 397 398 399!0 = !{} 400 401 402