1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s 3 4target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128" 5target triple = "i386-apple-macosx10.8.0" 6 7;int test(ptr G) { 8; G[0] = 1+Gptr4; 9; G[1] = 6+Gptr3; 10; G[2] = 7+Gptr4; 11; G[3] = 8+Gptr4; 12;} 13 14define i32 @test(ptr nocapture %G) { 15; CHECK-LABEL: @test( 16; CHECK-NEXT: entry: 17; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[G:%.*]], i64 5 18; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8 19; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 20; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], <double 4.000000e+00, double 3.000000e+00, double 4.000000e+00, double 4.000000e+00> 21; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], <double 1.000000e+00, double 6.000000e+00, double 7.000000e+00, double 8.000000e+00> 22; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[G]], align 8 23; CHECK-NEXT: ret i32 undef 24; 25entry: 26 %arrayidx = getelementptr inbounds double, ptr %G, i64 5 27 %0 = load double, ptr %arrayidx, align 8 28 %mul = fmul double %0, 4.000000e+00 29 %add = fadd double %mul, 1.000000e+00 30 store double %add, ptr %G, align 8 31 %arrayidx2 = getelementptr inbounds double, ptr %G, i64 6 32 %1 = load double, ptr %arrayidx2, align 8 33 %mul3 = fmul double %1, 3.000000e+00 34 %add4 = fadd double %mul3, 6.000000e+00 35 %arrayidx5 = getelementptr inbounds double, ptr %G, i64 1 36 store double %add4, ptr %arrayidx5, align 8 37 %add8 = fadd double %mul, 7.000000e+00 38 %arrayidx9 = getelementptr inbounds double, ptr %G, i64 2 39 store double %add8, ptr %arrayidx9, align 8 40 %mul11 = fmul double %1, 4.000000e+00 41 %add12 = fadd double %mul11, 8.000000e+00 42 %arrayidx13 = getelementptr inbounds double, ptr %G, i64 3 43 store double %add12, ptr %arrayidx13, align 8 44 ret i32 undef 45} 46 47;int foo(ptr A, int n) { 48; A[0] = Aptr 7.9 * n + 6.0; 49; A[1] = Aptr 7.7 * n + 2.0; 50; A[2] = Aptr 7.6 * n + 3.0; 51; A[3] = Aptr 7.4 * n + 4.0; 52;} 53 54define i32 @foo(ptr nocapture %A, i32 %n) { 55; CHECK-LABEL: @foo( 56; CHECK-NEXT: entry: 57; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double 58; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 8 59; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], <double 7.900000e+00, double 7.700000e+00, double 7.600000e+00, double 7.400000e+00> 60; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 61; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer 62; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], [[TMP1]] 63; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 6.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00> 64; CHECK-NEXT: store <4 x double> [[TMP5]], ptr [[A]], align 8 65; CHECK-NEXT: ret i32 undef 66; 67entry: 68 %0 = load double, ptr %A, align 8 69 %mul = fmul double %0, 7.900000e+00 70 %conv = sitofp i32 %n to double 71 %mul1 = fmul double %conv, %mul 72 %add = fadd double %mul1, 6.000000e+00 73 store double %add, ptr %A, align 8 74 %arrayidx3 = getelementptr inbounds double, ptr %A, i64 1 75 %1 = load double, ptr %arrayidx3, align 8 76 %mul4 = fmul double %1, 7.700000e+00 77 %mul6 = fmul double %conv, %mul4 78 %add7 = fadd double %mul6, 2.000000e+00 79 store double %add7, ptr %arrayidx3, align 8 80 %arrayidx9 = getelementptr inbounds double, ptr %A, i64 2 81 %2 = load double, ptr %arrayidx9, align 8 82 %mul10 = fmul double %2, 7.600000e+00 83 %mul12 = fmul double %conv, %mul10 84 %add13 = fadd double %mul12, 3.000000e+00 85 store double %add13, ptr %arrayidx9, align 8 86 %arrayidx15 = getelementptr inbounds double, ptr %A, i64 3 87 %3 = load double, ptr %arrayidx15, align 8 88 %mul16 = fmul double %3, 7.400000e+00 89 %mul18 = fmul double %conv, %mul16 90 %add19 = fadd double %mul18, 4.000000e+00 91 store double %add19, ptr %arrayidx15, align 8 92 ret i32 undef 93} 94 95; int test2(ptr G, int k) { 96; if (k) { 97; G[0] = 1+Gptr4; 98; G[1] = 6+Gptr3; 99; } else { 100; G[2] = 7+Gptr4; 101; G[3] = 8+Gptr3; 102; } 103; } 104 105; We can't merge the gather sequences because one does not dominate the other. 106 107define i32 @test2(ptr nocapture %G, i32 %k) { 108; CHECK-LABEL: @test2( 109; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[K:%.*]], 0 110; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[G:%.*]], i64 5 111; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 112; CHECK-NEXT: [[TMP4:%.*]] = fmul double [[TMP3]], 4.000000e+00 113; CHECK-NEXT: br i1 [[TMP1]], label [[TMP12:%.*]], label [[TMP5:%.*]] 114; CHECK: 5: 115; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[G]], i64 6 116; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 117; CHECK-NEXT: [[TMP8:%.*]] = fmul double [[TMP7]], 3.000000e+00 118; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 119; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP8]], i32 1 120; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 1.000000e+00, double 6.000000e+00> 121; CHECK-NEXT: store <2 x double> [[TMP11]], ptr [[G]], align 8 122; CHECK-NEXT: br label [[TMP20:%.*]] 123; CHECK: 12: 124; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, ptr [[G]], i64 2 125; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[G]], i64 6 126; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP14]], align 8 127; CHECK-NEXT: [[TMP16:%.*]] = fmul double [[TMP15]], 3.000000e+00 128; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x double> poison, double [[TMP4]], i32 0 129; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> [[TMP17]], double [[TMP16]], i32 1 130; CHECK-NEXT: [[TMP19:%.*]] = fadd <2 x double> [[TMP18]], <double 7.000000e+00, double 8.000000e+00> 131; CHECK-NEXT: store <2 x double> [[TMP19]], ptr [[TMP13]], align 8 132; CHECK-NEXT: br label [[TMP20]] 133; CHECK: 20: 134; CHECK-NEXT: ret i32 undef 135; 136 %1 = icmp eq i32 %k, 0 137 %2 = getelementptr inbounds double, ptr %G, i64 5 138 %3 = load double, ptr %2, align 8 139 %4 = fmul double %3, 4.000000e+00 140 br i1 %1, label %12, label %5 141 142; <label>:5 ; preds = %0 143 %6 = fadd double %4, 1.000000e+00 144 store double %6, ptr %G, align 8 145 %7 = getelementptr inbounds double, ptr %G, i64 6 146 %8 = load double, ptr %7, align 8 147 %9 = fmul double %8, 3.000000e+00 148 %10 = fadd double %9, 6.000000e+00 149 %11 = getelementptr inbounds double, ptr %G, i64 1 150 store double %10, ptr %11, align 8 151 br label %20 152 153; <label>:12 ; preds = %0 154 %13 = fadd double %4, 7.000000e+00 155 %14 = getelementptr inbounds double, ptr %G, i64 2 156 store double %13, ptr %14, align 8 157 %15 = getelementptr inbounds double, ptr %G, i64 6 158 %16 = load double, ptr %15, align 8 159 %17 = fmul double %16, 3.000000e+00 160 %18 = fadd double %17, 8.000000e+00 161 %19 = getelementptr inbounds double, ptr %G, i64 3 162 store double %18, ptr %19, align 8 163 br label %20 164 165; <label>:20 ; preds = %12, %5 166 ret i32 undef 167} 168 169 170;int foo(ptr A, int n) { 171; A[0] = Aptr 7.9 * n + 6.0; 172; A[1] = Aptr 7.9 * n + 6.0; 173; A[2] = Aptr 7.9 * n + 6.0; 174; A[3] = Aptr 7.9 * n + 6.0; 175;} 176 177define i32 @foo4(ptr nocapture %A, i32 %n) { 178; CHECK-LABEL: @foo4( 179; CHECK-NEXT: entry: 180; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double 181; CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 8 182; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], splat (double 7.900000e+00) 183; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0 184; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer 185; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], [[TMP1]] 186; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], splat (double 6.000000e+00) 187; CHECK-NEXT: store <4 x double> [[TMP5]], ptr [[A]], align 8 188; CHECK-NEXT: ret i32 undef 189; 190entry: 191 %0 = load double, ptr %A, align 8 192 %mul = fmul double %0, 7.900000e+00 193 %conv = sitofp i32 %n to double 194 %mul1 = fmul double %conv, %mul 195 %add = fadd double %mul1, 6.000000e+00 196 store double %add, ptr %A, align 8 197 %arrayidx3 = getelementptr inbounds double, ptr %A, i64 1 198 %1 = load double, ptr %arrayidx3, align 8 199 %mul4 = fmul double %1, 7.900000e+00 200 %mul6 = fmul double %conv, %mul4 201 %add7 = fadd double %mul6, 6.000000e+00 202 store double %add7, ptr %arrayidx3, align 8 203 %arrayidx9 = getelementptr inbounds double, ptr %A, i64 2 204 %2 = load double, ptr %arrayidx9, align 8 205 %mul10 = fmul double %2, 7.900000e+00 206 %mul12 = fmul double %conv, %mul10 207 %add13 = fadd double %mul12, 6.000000e+00 208 store double %add13, ptr %arrayidx9, align 8 209 %arrayidx15 = getelementptr inbounds double, ptr %A, i64 3 210 %3 = load double, ptr %arrayidx15, align 8 211 %mul16 = fmul double %3, 7.900000e+00 212 %mul18 = fmul double %conv, %mul16 213 %add19 = fadd double %mul18, 6.000000e+00 214 store double %add19, ptr %arrayidx15, align 8 215 ret i32 undef 216} 217 218;int partial_mrg(ptr A, int n) { 219; A[0] = Aptr n; 220; A[1] = Aptr n; 221; if (n < 4) return 0; 222; A[2] = Aptr n; 223; A[3] = Aptr (n+4); 224;} 225 226define i32 @partial_mrg(ptr nocapture %A, i32 %n) { 227; CHECK-LABEL: @partial_mrg( 228; CHECK-NEXT: entry: 229; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double 230; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 231; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[CONV]], i32 0 232; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> zeroinitializer 233; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP0]] 234; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[A]], align 8 235; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], 4 236; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]] 237; CHECK: if.end: 238; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 2 239; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[N]], 4 240; CHECK-NEXT: [[CONV12:%.*]] = sitofp i32 [[ADD]] to double 241; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[ARRAYIDX7]], align 8 242; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP1]], double [[CONV12]], i32 1 243; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP4]] 244; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[ARRAYIDX7]], align 8 245; CHECK-NEXT: br label [[RETURN]] 246; CHECK: return: 247; CHECK-NEXT: ret i32 0 248; 249entry: 250 %0 = load double, ptr %A, align 8 251 %conv = sitofp i32 %n to double 252 %mul = fmul double %conv, %0 253 store double %mul, ptr %A, align 8 254 %arrayidx2 = getelementptr inbounds double, ptr %A, i64 1 255 %1 = load double, ptr %arrayidx2, align 8 256 %mul4 = fmul double %conv, %1 257 store double %mul4, ptr %arrayidx2, align 8 258 %cmp = icmp slt i32 %n, 4 259 br i1 %cmp, label %return, label %if.end 260 261if.end: ; preds = %entry 262 %arrayidx7 = getelementptr inbounds double, ptr %A, i64 2 263 %2 = load double, ptr %arrayidx7, align 8 264 %mul9 = fmul double %conv, %2 265 store double %mul9, ptr %arrayidx7, align 8 266 %arrayidx11 = getelementptr inbounds double, ptr %A, i64 3 267 %3 = load double, ptr %arrayidx11, align 8 268 %add = add nsw i32 %n, 4 269 %conv12 = sitofp i32 %add to double 270 %mul13 = fmul double %conv12, %3 271 store double %mul13, ptr %arrayidx11, align 8 272 br label %return 273 274return: ; preds = %entry, %if.end 275 ret i32 0 276} 277 278%class.B.53.55 = type { %class.A.52.54, double } 279%class.A.52.54 = type { double, double, double } 280 281@a = external global double, align 8 282 283define void @PR19646(ptr %this, i1 %arg) { 284; CHECK-LABEL: @PR19646( 285; CHECK-NEXT: entry: 286; CHECK-NEXT: br i1 %arg, label [[IF_END13:%.*]], label [[IF_END13]] 287; CHECK: sw.epilog7: 288; CHECK-NEXT: [[DOTIN:%.*]] = getelementptr inbounds [[CLASS_B_53_55:%.*]], ptr [[THIS:%.*]], i64 0, i32 0, i32 1 289; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[DOTIN]], align 8 290; CHECK-NEXT: [[ADD:%.*]] = fadd double undef, 0.000000e+00 291; CHECK-NEXT: [[ADD6:%.*]] = fadd double [[ADD]], [[TMP0]] 292; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr @a, align 8 293; CHECK-NEXT: [[ADD8:%.*]] = fadd double [[TMP1]], 0.000000e+00 294; CHECK-NEXT: [[_DY:%.*]] = getelementptr inbounds [[CLASS_B_53_55]], ptr [[THIS]], i64 0, i32 0, i32 2 295; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[_DY]], align 8 296; CHECK-NEXT: [[ADD10:%.*]] = fadd double [[ADD8]], [[TMP2]] 297; CHECK-NEXT: br i1 %arg, label [[IF_THEN12:%.*]], label [[IF_END13]] 298; CHECK: if.then12: 299; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 300; CHECK-NEXT: br label [[IF_END13]] 301; CHECK: if.end13: 302; CHECK-NEXT: [[X_1:%.*]] = phi double [ 0.000000e+00, [[IF_THEN12]] ], [ [[ADD6]], [[SW_EPILOG7:%.*]] ], [ undef, [[ENTRY:%.*]] ], [ undef, [[ENTRY]] ] 303; CHECK-NEXT: [[B_0:%.*]] = phi double [ [[TMP3]], [[IF_THEN12]] ], [ [[ADD10]], [[SW_EPILOG7]] ], [ undef, [[ENTRY]] ], [ undef, [[ENTRY]] ] 304; CHECK-NEXT: unreachable 305; 306entry: 307 br i1 %arg, label %if.end13, label %if.end13 308 309sw.epilog7: ; No predecessors! 310 %.in = getelementptr inbounds %class.B.53.55, ptr %this, i64 0, i32 0, i32 1 311 %0 = load double, ptr %.in, align 8 312 %add = fadd double undef, 0.000000e+00 313 %add6 = fadd double %add, %0 314 %1 = load double, ptr @a, align 8 315 %add8 = fadd double %1, 0.000000e+00 316 %_dy = getelementptr inbounds %class.B.53.55, ptr %this, i64 0, i32 0, i32 2 317 %2 = load double, ptr %_dy, align 8 318 %add10 = fadd double %add8, %2 319 br i1 %arg, label %if.then12, label %if.end13 320 321if.then12: ; preds = %sw.epilog7 322 %3 = load double, ptr undef, align 8 323 br label %if.end13 324 325if.end13: ; preds = %if.then12, %sw.epilog7, %entry 326 %x.1 = phi double [ 0.000000e+00, %if.then12 ], [ %add6, %sw.epilog7 ], [ undef, %entry ], [ undef, %entry ] 327 %b.0 = phi double [ %3, %if.then12 ], [ %add10, %sw.epilog7 ], [ undef, %entry], [ undef, %entry ] 328 unreachable 329} 330 331define void @cse_for_hoisted_instructions_in_preheader(ptr %dst, i32 %a, i1 %c) { 332; CHECK-LABEL: @cse_for_hoisted_instructions_in_preheader( 333; CHECK-NEXT: entry: 334; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0 335; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer 336; CHECK-NEXT: br label [[LOOP:%.*]] 337; CHECK: loop: 338; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> splat (i32 22), [[TMP1]] 339; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP2]], splat (i32 3) 340; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 341; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[A]], 3 342; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 10 343; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_2]], align 4 344; CHECK-NEXT: [[OR_3:%.*]] = or i32 [[A]], 3 345; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 11 346; CHECK-NEXT: store i32 [[OR_3]], ptr [[GEP_3]], align 4 347; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]] 348; CHECK: exit: 349; CHECK-NEXT: ret void 350; 351entry: 352 br label %loop 353 354loop: 355 %or.a = or i32 22, %a 356 %or.0 = or i32 %or.a, 3 357 store i32 %or.0, ptr %dst 358 %or.a.2 = or i32 22, %a 359 %or.1 = or i32 %or.a.2, 3 360 %gep.1 = getelementptr inbounds i32, ptr %dst, i64 1 361 store i32 %or.1, ptr %gep.1 362 %or.2 = or i32 %a, 3 363 %gep.2 = getelementptr inbounds i32, ptr %dst, i64 10 364 store i32 %or.2, ptr %gep.2 365 %or.3 = or i32 %a, 3 366 %gep.3 = getelementptr inbounds i32, ptr %dst, i64 11 367 store i32 %or.3, ptr %gep.3 368 br i1 %c, label %loop, label %exit 369 370exit: 371 ret void 372} 373