1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=loop-load-elim -S < %s | FileCheck %s 3 4; If the store and the load use different types, but have the same 5; size then we should still be able to forward the value. 6; 7; for (unsigned i = 0; i < 100; i++) { 8; A[i+1] = B[i] + 2; 9; C[i] = ((float*)A)[i] * 2; 10; } 11 12target datalayout = "e-m:o-p64:64:64-i64:64-f80:128-n8:16:32:64-S128" 13 14define void @f(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { 15; CHECK-LABEL: @f( 16; CHECK-NEXT: entry: 17; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load float, ptr [[A:%.*]], align 4 18; CHECK-NEXT: br label [[FOR_BODY:%.*]] 19; CHECK: for.body: 20; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi float [ [[LOAD_INITIAL]], [[ENTRY:%.*]] ], [ [[STORE_FORWARD_CAST:%.*]], [[FOR_BODY]] ] 21; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 22; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 23; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]] 24; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]] 25; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDVARS_IV]] 26; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 27; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[BIDX]], align 4 28; CHECK-NEXT: [[A_P1:%.*]] = add i32 [[B]], 2 29; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to float 30; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 31; CHECK-NEXT: [[A:%.*]] = load float, ptr [[AIDX]], align 4 32; CHECK-NEXT: [[C:%.*]] = fmul float [[STORE_FORWARDED]], 2.000000e+00 33; CHECK-NEXT: [[C_INT:%.*]] = fptosi float [[C]] to i32 34; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 35; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] 36; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 37; CHECK: for.end: 38; CHECK-NEXT: ret void 39; 40entry: 41 br label %for.body 42 43for.body: ; preds = %for.body, %entry 44 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 45 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 46 47 %Aidx_next = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next 48 %Bidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 49 %Cidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv 50 %Aidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 51 52 %b = load i32, ptr %Bidx, align 4 53 %a_p1 = add i32 %b, 2 54 store i32 %a_p1, ptr %Aidx_next, align 4 55 56 %a = load float, ptr %Aidx, align 4 57 %c = fmul float %a, 2.0 58 %c.int = fptosi float %c to i32 59 store i32 %c.int, ptr %Cidx, align 4 60 61 %exitcond = icmp eq i64 %indvars.iv.next, %N 62 br i1 %exitcond, label %for.end, label %for.body 63 64for.end: ; preds = %for.body 65 ret void 66} 67 68; If the store and the load use different types, but have the same 69; size then we should still be able to forward the value. 70; 71; for (unsigned i = 0; i < 100; i++) { 72; A[i+1] = B[i] + 2; 73; A[i+1] = B[i] + 3; 74; C[i] = ((float*)A)[i] * 2; 75; } 76 77define void @f2(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { 78; CHECK-LABEL: @f2( 79; CHECK-NEXT: entry: 80; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load float, ptr [[A:%.*]], align 4 81; CHECK-NEXT: br label [[FOR_BODY:%.*]] 82; CHECK: for.body: 83; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi float [ [[LOAD_INITIAL]], [[ENTRY:%.*]] ], [ [[STORE_FORWARD_CAST:%.*]], [[FOR_BODY]] ] 84; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 85; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 86; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]] 87; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]] 88; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDVARS_IV]] 89; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 90; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[BIDX]], align 4 91; CHECK-NEXT: [[A_P2:%.*]] = add i32 [[B]], 2 92; CHECK-NEXT: store i32 [[A_P2]], ptr [[AIDX_NEXT]], align 4 93; CHECK-NEXT: [[A_P3:%.*]] = add i32 [[B]], 3 94; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P3]] to float 95; CHECK-NEXT: store i32 [[A_P3]], ptr [[AIDX_NEXT]], align 4 96; CHECK-NEXT: [[A:%.*]] = load float, ptr [[AIDX]], align 4 97; CHECK-NEXT: [[C:%.*]] = fmul float [[STORE_FORWARDED]], 2.000000e+00 98; CHECK-NEXT: [[C_INT:%.*]] = fptosi float [[C]] to i32 99; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 100; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] 101; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 102; CHECK: for.end: 103; CHECK-NEXT: ret void 104; 105entry: 106 br label %for.body 107 108for.body: ; preds = %for.body, %entry 109 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 111 112 %Aidx_next = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next 113 %Bidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 114 %Cidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv 115 %Aidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 116 117 %b = load i32, ptr %Bidx, align 4 118 %a_p2 = add i32 %b, 2 119 store i32 %a_p2, ptr %Aidx_next, align 4 120 121 %a_p3 = add i32 %b, 3 122 store i32 %a_p3, ptr %Aidx_next, align 4 123 124 %a = load float, ptr %Aidx, align 4 125 %c = fmul float %a, 2.0 126 %c.int = fptosi float %c to i32 127 store i32 %c.int, ptr %Cidx, align 4 128 129 %exitcond = icmp eq i64 %indvars.iv.next, %N 130 br i1 %exitcond, label %for.end, label %for.body 131 132for.end: ; preds = %for.body 133 ret void 134} 135 136; Check that we can forward between pointer-sized integers and actual 137; pointers. 138 139define void @f3(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { 140; CHECK-LABEL: @f3( 141; CHECK-NEXT: entry: 142; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load ptr, ptr [[A:%.*]], align 8 143; CHECK-NEXT: br label [[FOR_BODY:%.*]] 144; CHECK: for.body: 145; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi ptr [ [[LOAD_INITIAL]], [[ENTRY:%.*]] ], [ [[STORE_FORWARD_CAST:%.*]], [[FOR_BODY]] ] 146; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 147; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 148; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV_NEXT]] 149; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] 150; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i64, ptr [[C:%.*]], i64 [[INDVARS_IV]] 151; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] 152; CHECK-NEXT: [[B:%.*]] = load i64, ptr [[BIDX]], align 8 153; CHECK-NEXT: [[A_P1:%.*]] = add i64 [[B]], 2 154; CHECK-NEXT: [[STORE_FORWARD_CAST]] = inttoptr i64 [[A_P1]] to ptr 155; CHECK-NEXT: store i64 [[A_P1]], ptr [[AIDX_NEXT]], align 8 156; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[AIDX]], align 8 157; CHECK-NEXT: [[C:%.*]] = getelementptr i8, ptr [[STORE_FORWARDED]], i64 57 158; CHECK-NEXT: [[C_I64P:%.*]] = ptrtoint ptr [[C]] to i64 159; CHECK-NEXT: store i64 [[C_I64P]], ptr [[CIDX]], align 8 160; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] 161; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 162; CHECK: for.end: 163; CHECK-NEXT: ret void 164; 165entry: 166 br label %for.body 167 168for.body: ; preds = %for.body, %entry 169 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 170 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 171 172 %Aidx_next = getelementptr inbounds i64, ptr %A, i64 %indvars.iv.next 173 %Bidx = getelementptr inbounds i64, ptr %B, i64 %indvars.iv 174 %Cidx = getelementptr inbounds i64, ptr %C, i64 %indvars.iv 175 %Aidx = getelementptr inbounds i64, ptr %A, i64 %indvars.iv 176 177 %b = load i64, ptr %Bidx, align 8 178 %a_p1 = add i64 %b, 2 179 store i64 %a_p1, ptr %Aidx_next, align 8 180 181 %a = load ptr, ptr %Aidx, align 8 182 %c = getelementptr i8, ptr %a, i64 57 183 %c.i64p = ptrtoint ptr %c to i64 184 store i64 %c.i64p, ptr %Cidx, align 8 185 186 %exitcond = icmp eq i64 %indvars.iv.next, %N 187 br i1 %exitcond, label %for.end, label %for.body 188 189for.end: ; preds = %for.body 190 ret void 191} 192 193; If the store and the load use different types, but have the same 194; size then we should still be able to forward the value--also for 195; vector types. 196; 197; for (unsigned i = 0; i < 100; i++) { 198; A[i+1] = B[i] + 2; 199; C[i] = ((float*)A)[i] * 2; 200; } 201 202define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { 203; CHECK-LABEL: @f4( 204; CHECK-NEXT: entry: 205; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load <2 x half>, ptr [[A:%.*]], align 4 206; CHECK-NEXT: br label [[FOR_BODY:%.*]] 207; CHECK: for.body: 208; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi <2 x half> [ [[LOAD_INITIAL]], [[ENTRY:%.*]] ], [ [[STORE_FORWARD_CAST:%.*]], [[FOR_BODY]] ] 209; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 210; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 211; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]] 212; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]] 213; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDVARS_IV]] 214; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 215; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[BIDX]], align 4 216; CHECK-NEXT: [[A_P1:%.*]] = add i32 [[B]], 2 217; CHECK-NEXT: [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half> 218; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 219; CHECK-NEXT: [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4 220; CHECK-NEXT: [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000) 221; CHECK-NEXT: [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32 222; CHECK-NEXT: store i32 [[C_INT]], ptr [[CIDX]], align 4 223; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] 224; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 225; CHECK: for.end: 226; CHECK-NEXT: ret void 227; 228entry: 229 br label %for.body 230 231for.body: ; preds = %for.body, %entry 232 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 233 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 234 235 %Aidx_next = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next 236 %Bidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 237 %Cidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv 238 %Aidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 239 240 %b = load i32, ptr %Bidx, align 4 241 %a_p1 = add i32 %b, 2 242 store i32 %a_p1, ptr %Aidx_next, align 4 243 244 %a = load <2 x half>, ptr %Aidx, align 4 245 %c = fmul <2 x half> %a, <half 2.0, half 2.0> 246 %c.int = bitcast <2 x half> %c to i32 247 store i32 %c.int, ptr %Cidx, align 4 248 249 %exitcond = icmp eq i64 %indvars.iv.next, %N 250 br i1 %exitcond, label %for.end, label %for.body 251 252for.end: ; preds = %for.body 253 ret void 254} 255 256; Check that we don't forward between integers and actual 257; pointers if sizes don't match. 258 259define void @f5(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) { 260; CHECK-LABEL: @f5( 261; CHECK-NEXT: entry: 262; CHECK-NEXT: br label [[FOR_BODY:%.*]] 263; CHECK: for.body: 264; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 265; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 266; CHECK-NEXT: [[AIDX_NEXT:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV_NEXT]] 267; CHECK-NEXT: [[BIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV]] 268; CHECK-NEXT: [[CIDX:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDVARS_IV]] 269; CHECK-NEXT: [[AIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 270; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[BIDX]], align 4 271; CHECK-NEXT: [[A_P1:%.*]] = add i32 [[B]], 2 272; CHECK-NEXT: store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4 273; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[AIDX]], align 8 274; CHECK-NEXT: [[C:%.*]] = getelementptr i8, ptr [[A]], i32 57 275; CHECK-NEXT: [[C_I64P:%.*]] = ptrtoint ptr [[C]] to i32 276; CHECK-NEXT: store i32 [[C_I64P]], ptr [[CIDX]], align 4 277; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] 278; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 279; CHECK: for.end: 280; CHECK-NEXT: ret void 281; 282entry: 283 br label %for.body 284 285for.body: ; preds = %for.body, %entry 286 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 287 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 288 289 %Aidx_next = getelementptr inbounds i32, ptr %A, i64 %indvars.iv.next 290 %Bidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 291 %Cidx = getelementptr inbounds i32, ptr %C, i64 %indvars.iv 292 %Aidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 293 294 %b = load i32, ptr %Bidx, align 4 295 %a_p1 = add i32 %b, 2 296 store i32 %a_p1, ptr %Aidx_next, align 4 297 298 %a = load ptr, ptr %Aidx, align 8 299 %c = getelementptr i8, ptr %a, i32 57 300 %c.i64p = ptrtoint ptr %c to i32 301 store i32 %c.i64p, ptr %Cidx, align 4 302 303 %exitcond = icmp eq i64 %indvars.iv.next, %N 304 br i1 %exitcond, label %for.end, label %for.body 305 306for.end: ; preds = %for.body 307 ret void 308} 309