1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s 3; PR4323 4 5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 6 7; Several cases where tail call elimination should move the load above the call, 8; then eliminate the tail recursion. 9 10 11 12@global = external global i32 ; <ptr> [#uses=1] 13@extern_weak_global = extern_weak global i32 ; <ptr> [#uses=1] 14 15 16; This load can be moved above the call because the function won't write to it 17; and the call has no side effects. 18define fastcc i32 @raise_load_1(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly willreturn { 19; CHECK-LABEL: @raise_load_1( 20; CHECK-NEXT: entry: 21; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 22; CHECK: tailrecurse: 23; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ] 24; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ] 25; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]] 26; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]] 27; CHECK: if: 28; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]] 29; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]] 30; CHECK: else: 31; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1 32; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ARG:%.*]], align 4 33; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]] 34; CHECK-NEXT: br label [[TAILRECURSE]] 35; 36entry: 37 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1] 38 br i1 %tmp2, label %if, label %else 39 40if: ; preds = %entry 41 ret i32 0 42 43else: ; preds = %entry 44 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1] 45 %tmp8 = call fastcc i32 @raise_load_1(ptr %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1] 46 %tmp9 = load i32, ptr %a_arg ; <i32> [#uses=1] 47 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1] 48 ret i32 %tmp10 49} 50 51 52; This load can be moved above the call because the function won't write to it 53; and the load provably can't trap. 54define fastcc i32 @raise_load_2(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) readonly { 55; CHECK-LABEL: @raise_load_2( 56; CHECK-NEXT: entry: 57; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 58; CHECK: tailrecurse: 59; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ] 60; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ] 61; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]] 62; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]] 63; CHECK: if: 64; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]] 65; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]] 66; CHECK: else: 67; CHECK-NEXT: [[NULLCHECK:%.*]] = icmp eq ptr [[A_ARG:%.*]], null 68; CHECK-NEXT: br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]] 69; CHECK: unwind: 70; CHECK-NEXT: unreachable 71; CHECK: recurse: 72; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1 73; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr @global, align 4 74; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]] 75; CHECK-NEXT: br label [[TAILRECURSE]] 76; 77entry: 78 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1] 79 br i1 %tmp2, label %if, label %else 80 81if: ; preds = %entry 82 ret i32 0 83 84else: ; preds = %entry 85 %nullcheck = icmp eq ptr %a_arg, null ; <i1> [#uses=1] 86 br i1 %nullcheck, label %unwind, label %recurse 87 88unwind: ; preds = %else 89 unreachable 90 91recurse: ; preds = %else 92 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1] 93 %tmp8 = call fastcc i32 @raise_load_2(ptr %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1] 94 %tmp9 = load i32, ptr @global ; <i32> [#uses=1] 95 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1] 96 ret i32 %tmp10 97} 98 99 100; This load can be safely moved above the call (even though it's from an 101; extern_weak global) because the call has no side effects. 102define fastcc i32 @raise_load_3(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly willreturn { 103; CHECK-LABEL: @raise_load_3( 104; CHECK-NEXT: entry: 105; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 106; CHECK: tailrecurse: 107; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ] 108; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ] 109; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]] 110; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]] 111; CHECK: if: 112; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]] 113; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]] 114; CHECK: else: 115; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1 116; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr @extern_weak_global, align 4 117; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]] 118; CHECK-NEXT: br label [[TAILRECURSE]] 119; 120entry: 121 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1] 122 br i1 %tmp2, label %if, label %else 123 124if: ; preds = %entry 125 ret i32 0 126 127else: ; preds = %entry 128 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1] 129 %tmp8 = call fastcc i32 @raise_load_3(ptr %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1] 130 %tmp9 = load i32, ptr @extern_weak_global ; <i32> [#uses=1] 131 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1] 132 ret i32 %tmp10 133} 134 135 136; The second load can be safely moved above the call even though it's from an 137; unknown pointer (which normally means it might trap) because the first load 138; proves it doesn't trap. 139define fastcc i32 @raise_load_4(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) readonly { 140; CHECK-LABEL: @raise_load_4( 141; CHECK-NEXT: entry: 142; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 143; CHECK: tailrecurse: 144; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[RECURSE:%.*]] ] 145; CHECK-NEXT: [[A_LEN_ARG_TR:%.*]] = phi i32 [ [[A_LEN_ARG:%.*]], [[ENTRY]] ], [ [[FIRST:%.*]], [[RECURSE]] ] 146; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[RECURSE]] ] 147; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG_TR]] 148; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE:%.*]] 149; CHECK: if: 150; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]] 151; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]] 152; CHECK: else: 153; CHECK-NEXT: [[NULLCHECK:%.*]] = icmp eq ptr [[A_ARG:%.*]], null 154; CHECK-NEXT: br i1 [[NULLCHECK]], label [[UNWIND:%.*]], label [[RECURSE]] 155; CHECK: unwind: 156; CHECK-NEXT: unreachable 157; CHECK: recurse: 158; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1 159; CHECK-NEXT: [[FIRST]] = load i32, ptr [[A_ARG]], align 4 160; CHECK-NEXT: [[SECOND:%.*]] = load i32, ptr [[A_ARG]], align 4 161; CHECK-NEXT: [[TMP10]] = add i32 [[SECOND]], [[ACCUMULATOR_TR]] 162; CHECK-NEXT: br label [[TAILRECURSE]] 163; 164entry: 165 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1] 166 br i1 %tmp2, label %if, label %else 167 168if: ; preds = %entry 169 ret i32 0 170 171else: ; preds = %entry 172 %nullcheck = icmp eq ptr %a_arg, null ; <i1> [#uses=1] 173 br i1 %nullcheck, label %unwind, label %recurse 174 175unwind: ; preds = %else 176 unreachable 177 178recurse: ; preds = %else 179 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1] 180 %first = load i32, ptr %a_arg ; <i32> [#uses=1] 181 %tmp8 = call fastcc i32 @raise_load_4(ptr %a_arg, i32 %first, i32 %tmp7) ; <i32> [#uses=1] 182 %second = load i32, ptr %a_arg ; <i32> [#uses=1] 183 %tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1] 184 ret i32 %tmp10 185} 186 187; This load can be moved above the call because the function won't write to it 188; and the a_arg is dereferenceable. 189define fastcc i32 @raise_load_5(ptr dereferenceable(4) align 4 %a_arg, i32 %a_len_arg, i32 %start_arg) readonly nofree nosync { 190; CHECK-LABEL: @raise_load_5( 191; CHECK-NEXT: entry: 192; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 193; CHECK: tailrecurse: 194; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ] 195; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ] 196; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]] 197; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]] 198; CHECK: if: 199; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]] 200; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]] 201; CHECK: else: 202; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1 203; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ARG:%.*]], align 4 204; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]] 205; CHECK-NEXT: br label [[TAILRECURSE]] 206; 207entry: 208 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1] 209 br i1 %tmp2, label %if, label %else 210 211if: ; preds = %entry 212 ret i32 0 213 214else: ; preds = %entry 215 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1] 216 %tmp8 = call fastcc i32 @raise_load_5(ptr %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1] 217 %tmp9 = load i32, ptr %a_arg ; <i32> [#uses=1] 218 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1] 219 ret i32 %tmp10 220} 221 222; This load can be moved above the call because the function call does not write to the memory the load 223; is accessing and the load is safe to speculate. 224define fastcc i32 @raise_load_6(ptr %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind { 225; CHECK-LABEL: @raise_load_6( 226; CHECK-NEXT: entry: 227; CHECK-NEXT: [[S:%.*]] = alloca i32, align 4 228; CHECK-NEXT: br label [[TAILRECURSE:%.*]] 229; CHECK: tailrecurse: 230; CHECK-NEXT: [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP10:%.*]], [[ELSE:%.*]] ] 231; CHECK-NEXT: [[START_ARG_TR:%.*]] = phi i32 [ [[START_ARG:%.*]], [[ENTRY]] ], [ [[TMP7:%.*]], [[ELSE]] ] 232; CHECK-NEXT: store i32 4, ptr [[S]], align 4 233; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i32 [[START_ARG_TR]], [[A_LEN_ARG:%.*]] 234; CHECK-NEXT: br i1 [[TMP2]], label [[IF:%.*]], label [[ELSE]] 235; CHECK: if: 236; CHECK-NEXT: store i32 1, ptr [[A_ARG:%.*]], align 4 237; CHECK-NEXT: [[ACCUMULATOR_RET_TR:%.*]] = add i32 0, [[ACCUMULATOR_TR]] 238; CHECK-NEXT: ret i32 [[ACCUMULATOR_RET_TR]] 239; CHECK: else: 240; CHECK-NEXT: [[TMP7]] = add i32 [[START_ARG_TR]], 1 241; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[S]], align 4 242; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[ACCUMULATOR_TR]] 243; CHECK-NEXT: br label [[TAILRECURSE]] 244; 245entry: 246 %s = alloca i32 247 store i32 4, ptr %s 248 %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1] 249 br i1 %tmp2, label %if, label %else 250 251if: ; preds = %entry 252 store i32 1, ptr %a_arg 253 ret i32 0 254 255else: ; preds = %entry 256 %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1] 257 %tmp8 = call fastcc i32 @raise_load_6(ptr %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1] 258 %tmp9 = load i32, ptr %s ; <i32> [#uses=1] 259 %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1] 260 ret i32 %tmp10 261} 262