1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=dse -enable-dse-partial-store-merging=false < %s | FileCheck --check-prefixes=CHECK %s 3target datalayout = "E-m:e-i64:64-n32:64" 4target triple = "powerpc64le-unknown-linux" 5 6%"struct.std::complex" = type { { float, float } } 7 8define void @_Z4testSt7complexIfE(ptr noalias nocapture sret(%"struct.std::complex") %agg.result, i64 %c.coerce) { 9; CHECK-LABEL: @_Z4testSt7complexIfE( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[REF_TMP:%.*]] = alloca i64, align 8 12; CHECK-NEXT: [[C_SROA_0_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[C_COERCE:%.*]], 32 13; CHECK-NEXT: [[C_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_SROA_0_0_EXTRACT_SHIFT]] to i32 14; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[C_SROA_0_0_EXTRACT_TRUNC]] to float 15; CHECK-NEXT: [[C_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[C_COERCE]] to i32 16; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[C_SROA_2_0_EXTRACT_TRUNC]] to float 17; CHECK-NEXT: call void @_Z3barSt7complexIfE(ptr nonnull sret(%"struct.std::complex") [[REF_TMP]], i64 [[C_COERCE]]) 18; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[REF_TMP]], align 8 19; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 32 20; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 21; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float 22; CHECK-NEXT: [[_M_VALUE_IMAGP_I_I:%.*]] = getelementptr inbounds %"struct.std::complex", ptr [[AGG_RESULT:%.*]], i64 0, i32 0, i32 1 23; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP2]] to i32 24; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float 25; CHECK-NEXT: [[MUL_AD_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP1]] 26; CHECK-NEXT: [[MUL_BC_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP0]] 27; CHECK-NEXT: [[MUL_I_I_I:%.*]] = fadd fast float [[MUL_AD_I_I]], [[MUL_BC_I_I]] 28; CHECK-NEXT: [[MUL_AC_I_I:%.*]] = fmul fast float [[TMP5]], [[TMP0]] 29; CHECK-NEXT: [[MUL_BD_I_I:%.*]] = fmul fast float [[TMP7]], [[TMP1]] 30; CHECK-NEXT: [[MUL_R_I_I:%.*]] = fsub fast float [[MUL_AC_I_I]], [[MUL_BD_I_I]] 31; CHECK-NEXT: store float [[MUL_R_I_I]], ptr [[AGG_RESULT]], align 4 32; CHECK-NEXT: store float [[MUL_I_I_I]], ptr [[_M_VALUE_IMAGP_I_I]], align 4 33; CHECK-NEXT: ret void 34; 35entry: 36 37 %ref.tmp = alloca i64, align 8 38 %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32 39 %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32 40 %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float 41 %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32 42 %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float 43 call void @_Z3barSt7complexIfE(ptr nonnull sret(%"struct.std::complex") %ref.tmp, i64 %c.coerce) 44 %2 = load i64, ptr %ref.tmp, align 8 45 store i64 %2, ptr %agg.result, align 4 46 47 %3 = lshr i64 %2, 32 48 %4 = trunc i64 %3 to i32 49 %5 = bitcast i32 %4 to float 50 %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", ptr %agg.result, i64 0, i32 0, i32 1 51 %6 = trunc i64 %2 to i32 52 %7 = bitcast i32 %6 to float 53 %mul_ad.i.i = fmul fast float %5, %1 54 %mul_bc.i.i = fmul fast float %7, %0 55 %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i 56 %mul_ac.i.i = fmul fast float %5, %0 57 %mul_bd.i.i = fmul fast float %7, %1 58 %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i 59 store float %mul_r.i.i, ptr %agg.result, align 4 60 store float %mul_i.i.i, ptr %_M_value.imagp.i.i, align 4 61 ret void 62} 63 64declare void @_Z3barSt7complexIfE(ptr sret(%"struct.std::complex"), i64) 65 66define void @test1(ptr %ptr) { 67; CHECK-LABEL: @test1( 68; CHECK-NEXT: entry: 69; CHECK-NEXT: store i16 -30062, ptr [[PTR:%.*]], align 2 70; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 71; CHECK-NEXT: store i8 47, ptr [[BPTR3]], align 1 72; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 73; CHECK-NEXT: store i16 2020, ptr [[BPTR1]], align 1 74; CHECK-NEXT: ret void 75; 76entry: 77 78 store i32 5, ptr %ptr 79 store i8 7, ptr %ptr 80 store i16 -30062, ptr %ptr 81 %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2 82 store i8 25, ptr %bptr2 83 %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3 84 store i8 47, ptr %bptr3 85 %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1 86 store i16 2020, ptr %bptr1, align 1 87 ret void 88 89 90} 91 92define void @test2(ptr %ptr) { 93; CHECK-LABEL: @test2( 94; CHECK-NEXT: entry: 95; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 -1 96; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 97; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 98; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 99; CHECK-NEXT: store i16 1456, ptr [[BPTRM1]], align 1 100; CHECK-NEXT: store i16 1346, ptr [[PTR]], align 1 101; CHECK-NEXT: store i16 1756, ptr [[BPTR1]], align 1 102; CHECK-NEXT: store i16 1126, ptr [[BPTR2]], align 1 103; CHECK-NEXT: store i16 5656, ptr [[BPTR3]], align 1 104; CHECK-NEXT: ret void 105; 106entry: 107 108 store i32 5, ptr %ptr 109 110 %bptrm1 = getelementptr inbounds i8, ptr %ptr, i64 -1 111 %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1 112 %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2 113 %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3 114 115 116 store i16 1456, ptr %bptrm1, align 1 117 store i16 1346, ptr %ptr, align 1 118 store i16 1756, ptr %bptr1, align 1 119 store i16 1126, ptr %bptr2, align 1 120 store i16 5656, ptr %bptr3, align 1 121 122 123 124 ret void 125 126} 127 128define signext i8 @test3(ptr %ptr) { 129; CHECK-LABEL: @test3( 130; CHECK-NEXT: entry: 131; CHECK-NEXT: store i32 5, ptr [[PTR:%.*]], align 4 132; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 -1 133; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 134; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 135; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 136; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[PTR]], align 1 137; CHECK-NEXT: store i16 1456, ptr [[BPTRM1]], align 1 138; CHECK-NEXT: store i16 1346, ptr [[PTR]], align 1 139; CHECK-NEXT: store i16 1756, ptr [[BPTR1]], align 1 140; CHECK-NEXT: store i16 1126, ptr [[BPTR2]], align 1 141; CHECK-NEXT: store i16 5656, ptr [[BPTR3]], align 1 142; CHECK-NEXT: ret i8 [[V]] 143; 144entry: 145 146 store i32 5, ptr %ptr 147 148 %bptrm1 = getelementptr inbounds i8, ptr %ptr, i64 -1 149 %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1 150 %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2 151 %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3 152 153 154 %v = load i8, ptr %ptr, align 1 155 store i16 1456, ptr %bptrm1, align 1 156 store i16 1346, ptr %ptr, align 1 157 store i16 1756, ptr %bptr1, align 1 158 store i16 1126, ptr %bptr2, align 1 159 store i16 5656, ptr %bptr3, align 1 160 161 162 ret i8 %v 163 164} 165 166%struct.foostruct = type { 167ptr, 168ptr, 169ptr, 170ptr, 171ptr 172} 173declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) 174declare void @goFunc(ptr) 175declare i32 @fa(ptr, ptr, i32, i8, ptr) 176 177; We miss this case, because of an aggressive limit of partial overlap analysis. 178; With a larger partial store limit, we remove the memset. 179define void @test4() { 180; CHECK-LABEL: @test4( 181; CHECK-NEXT: entry: 182; CHECK-NEXT: [[BANG:%.*]] = alloca [[STRUCT_FOOSTRUCT:%.*]], align 8 183; CHECK-NEXT: store ptr @fa, ptr [[BANG]], align 8 184; CHECK-NEXT: [[V3:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 1 185; CHECK-NEXT: store ptr @fa, ptr [[V3]], align 8 186; CHECK-NEXT: [[V4:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 2 187; CHECK-NEXT: store ptr @fa, ptr [[V4]], align 8 188; CHECK-NEXT: [[V5:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 3 189; CHECK-NEXT: store ptr @fa, ptr [[V5]], align 8 190; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds [[STRUCT_FOOSTRUCT]], ptr [[BANG]], i64 0, i32 4 191; CHECK-NEXT: store ptr null, ptr [[V6]], align 8 192; CHECK-NEXT: call void @goFunc(ptr [[BANG]]) 193; CHECK-NEXT: ret void 194entry: 195 196 %bang = alloca %struct.foostruct, align 8 197 call void @llvm.memset.p0.i64(ptr align 8 %bang, i8 0, i64 40, i1 false) 198 store ptr @fa, ptr %bang, align 8 199 %v3 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 1 200 store ptr @fa, ptr %v3, align 8 201 %v4 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 2 202 store ptr @fa, ptr %v4, align 8 203 %v5 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 3 204 store ptr @fa, ptr %v5, align 8 205 %v6 = getelementptr inbounds %struct.foostruct, ptr %bang, i64 0, i32 4 206 store ptr null, ptr %v6, align 8 207 call void @goFunc(ptr %bang) 208 ret void 209 210} 211 212define signext i8 @test5(ptr %ptr) { 213; CHECK-LABEL: @test5( 214; CHECK-NEXT: entry: 215; CHECK-NEXT: [[BPTR1:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 1 216; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 217; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 218; CHECK-NEXT: store i16 -1, ptr [[BPTR2]], align 1 219; CHECK-NEXT: store i16 1456, ptr [[BPTR1]], align 1 220; CHECK-NEXT: store i16 1346, ptr [[PTR]], align 1 221; CHECK-NEXT: ret i8 0 222; 223entry: 224 225 store i32 0, ptr %ptr 226 227 %bptr1 = getelementptr inbounds i8, ptr %ptr, i64 1 228 %bptr2 = getelementptr inbounds i8, ptr %ptr, i64 2 229 %bptr3 = getelementptr inbounds i8, ptr %ptr, i64 3 230 231 232 store i16 65535, ptr %bptr2, align 1 233 store i16 1456, ptr %bptr1, align 1 234 store i16 1346, ptr %ptr, align 1 235 236 237 ret i8 0 238} 239 240define signext i8 @test6(ptr %ptr) { 241; CHECK-LABEL: @test6( 242; CHECK-NEXT: entry: 243; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1 244; CHECK-NEXT: store i16 1456, ptr [[BPTR2]], align 1 245; CHECK-NEXT: store i16 -1, ptr [[PTR]], align 1 246; CHECK-NEXT: ret i8 0 247; 248entry: 249 250 store i32 0, ptr %ptr 251 252 %bptr2 = getelementptr inbounds i16, ptr %ptr, i64 1 253 254 store i16 1456, ptr %bptr2, align 1 255 store i16 65535, ptr %ptr, align 1 256 257 258 ret i8 0 259} 260 261define signext i8 @test7(ptr %ptr) { 262; CHECK-LABEL: @test7( 263; CHECK-NEXT: entry: 264; CHECK-NEXT: [[BPTR2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 1 265; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 2 266; CHECK-NEXT: [[BPTR4:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i64 3 267; CHECK-NEXT: store i16 1346, ptr [[PTR]], align 1 268; CHECK-NEXT: store i16 1756, ptr [[BPTR3]], align 1 269; CHECK-NEXT: store i16 1456, ptr [[BPTR2]], align 1 270; CHECK-NEXT: store i16 5656, ptr [[BPTR4]], align 1 271; CHECK-NEXT: ret i8 0 272; 273entry: 274 275 store i64 0, ptr %ptr 276 277 %bptr2 = getelementptr inbounds i16, ptr %ptr, i64 1 278 %bptr3 = getelementptr inbounds i16, ptr %ptr, i64 2 279 %bptr4 = getelementptr inbounds i16, ptr %ptr, i64 3 280 281 store i16 1346, ptr %ptr, align 1 282 store i16 1756, ptr %bptr3, align 1 283 store i16 1456, ptr %bptr2, align 1 284 store i16 5656, ptr %bptr4, align 1 285 286 287 ret i8 0 288} 289