1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s 3; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s 4 5define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { 6; CHECK-LABEL: @v3_load_i32_mul_by_constant_store( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 9; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4 10; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10 11; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1 12; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 13; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10 14; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 15; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 16; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 17; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4 18; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 19; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4 20; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 21; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 22; CHECK-NEXT: ret void 23; 24entry: 25 %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 26 %l.src.0 = load i32, ptr %gep.src.0, align 4 27 %mul.0 = mul nsw i32 %l.src.0, 10 28 29 %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 30 %l.src.1 = load i32, ptr %gep.src.1, align 4 31 %mul.1 = mul nsw i32 %l.src.1, 10 32 33 %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 34 %l.src.2 = load i32, ptr %gep.src.2, align 4 35 %mul.2 = mul nsw i32 %l.src.2, 10 36 37 store i32 %mul.0, ptr %dst 38 39 %dst.1 = getelementptr i32, ptr %dst, i32 1 40 store i32 %mul.1, ptr %dst.1 41 42 %dst.2 = getelementptr i32, ptr %dst, i32 2 43 store i32 %mul.2, ptr %dst.2 44 45 ret void 46} 47 48define void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) { 49; CHECK-LABEL: @v3_load_i32_mul_store( 50; CHECK-NEXT: entry: 51; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 52; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4 53; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 54; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4 55; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]] 56; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1 57; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4 58; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1 59; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4 60; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]] 61; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 62; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 63; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 64; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 65; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] 66; CHECK-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4 67; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 68; CHECK-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4 69; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 70; CHECK-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 71; CHECK-NEXT: ret void 72; 73entry: 74 %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 75 %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 76 %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 77 %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 78 %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 79 80 %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 81 %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 82 %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 83 %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 84 %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 85 86 %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 87 %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 88 %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 89 %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 90 %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 91 92 store i32 %mul.0, ptr %dst 93 94 %dst.1 = getelementptr i32, ptr %dst, i32 1 95 store i32 %mul.1, ptr %dst.1 96 97 %dst.2 = getelementptr i32, ptr %dst, i32 2 98 store i32 %mul.2, ptr %dst.2 99 100 ret void 101} 102 103define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { 104; CHECK-LABEL: @v3_load_i32_mul_add_const_store( 105; CHECK-NEXT: entry: 106; CHECK-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 107; CHECK-NEXT: [[L_SRC_1_0:%.*]] = load i32, ptr [[GEP_SRC_1_0]], align 4 108; CHECK-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 109; CHECK-NEXT: [[L_SRC_2_0:%.*]] = load i32, ptr [[GEP_SRC_2_0]], align 4 110; CHECK-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_1_0]], [[L_SRC_2_0]] 111; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], 9 112; CHECK-NEXT: [[GEP_SRC_1_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 1 113; CHECK-NEXT: [[L_SRC_1_1:%.*]] = load i32, ptr [[GEP_SRC_1_1]], align 4 114; CHECK-NEXT: [[GEP_SRC_2_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 1 115; CHECK-NEXT: [[L_SRC_2_1:%.*]] = load i32, ptr [[GEP_SRC_2_1]], align 4 116; CHECK-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1_1]], [[L_SRC_2_1]] 117; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[MUL_1]], 9 118; CHECK-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 119; CHECK-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 120; CHECK-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 121; CHECK-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 122; CHECK-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] 123; CHECK-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9 124; CHECK-NEXT: store i32 [[ADD_0]], ptr [[DST:%.*]], align 4 125; CHECK-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 126; CHECK-NEXT: store i32 [[ADD_1]], ptr [[DST_1]], align 4 127; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 128; CHECK-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 129; CHECK-NEXT: ret void 130; 131entry: 132 %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 133 %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 134 %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 135 %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 136 %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 137 %add.0 = add i32 %mul.0, 9 138 139 %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 140 %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 141 %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 142 %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 143 %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 144 %add.1 = add i32 %mul.1, 9 145 146 %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 147 %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 148 %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 149 %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 150 %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 151 %add.2 = add i32 %mul.2, 9 152 153 store i32 %add.0, ptr %dst 154 155 %dst.1 = getelementptr i32, ptr %dst, i32 1 156 store i32 %add.1, ptr %dst.1 157 158 %dst.2 = getelementptr i32, ptr %dst, i32 2 159 store i32 %add.2, ptr %dst.2 160 161 ret void 162} 163 164define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { 165; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store( 166; NON-POW2-NEXT: entry: 167; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 168; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 169; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) 170; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 171; NON-POW2-NEXT: ret void 172; 173; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store( 174; POW2-ONLY-NEXT: entry: 175; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 176; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2 177; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 178; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 179; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 180; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) 181; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 182; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 183; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 184; POW2-ONLY-NEXT: ret void 185; 186entry: 187 %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0 188 %l.src.0 = load float , ptr %gep.src.0, align 4 189 %fadd.0 = fadd float %l.src.0, 10.0 190 191 %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1 192 %l.src.1 = load float, ptr %gep.src.1, align 4 193 %fadd.1 = fadd float %l.src.1, 10.0 194 195 %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2 196 %l.src.2 = load float, ptr %gep.src.2, align 4 197 %fadd.2 = fadd float %l.src.2, 10.0 198 199 store float %fadd.0, ptr %dst 200 201 %dst.1 = getelementptr float, ptr %dst, i32 1 202 store float %fadd.1, ptr %dst.1 203 204 %dst.2 = getelementptr float, ptr %dst, i32 2 205 store float %fadd.2, ptr %dst.2 206 207 ret void 208} 209 210define void @phi_store3(ptr %dst) { 211; CHECK-LABEL: @phi_store3( 212; CHECK-NEXT: entry: 213; CHECK-NEXT: br label [[EXIT:%.*]] 214; CHECK: invoke.cont8.loopexit: 215; CHECK-NEXT: br label [[EXIT]] 216; CHECK: exit: 217; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ] 218; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ] 219; CHECK-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2 220; CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4 221; CHECK-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4 222; CHECK-NEXT: ret void 223; 224entry: 225 br label %exit 226 227invoke.cont8.loopexit: ; No predecessors! 228 br label %exit 229 230exit: 231 %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ] 232 %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ] 233 %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ] 234 235 %dst.1 = getelementptr i32, ptr %dst, i32 1 236 %dst.2 = getelementptr i32, ptr %dst, i32 2 237 238 store i32 %p.0, ptr %dst, align 4 239 store i32 %p.1, ptr %dst.1, align 4 240 store i32 %p.2, ptr %dst.2, align 4 241 ret void 242} 243 244define void @store_try_reorder(ptr %dst) { 245; CHECK-LABEL: @store_try_reorder( 246; CHECK-NEXT: entry: 247; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 248; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 249; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 250; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 251; CHECK-NEXT: ret void 252; 253entry: 254 %add = add i32 0, 0 255 store i32 %add, ptr %dst, align 4 256 %add207 = sub i32 0, 0 257 %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1 258 store i32 %add207, ptr %arrayidx.i1887, align 4 259 %add216 = sub i32 0, 0 260 %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2 261 store i32 %add216, ptr %arrayidx.i1891, align 4 262 ret void 263} 264 265define void @vec3_fpext_cost(ptr %Colour, float %0) { 266; CHECK-LABEL: @vec3_fpext_cost( 267; CHECK-NEXT: entry: 268; CHECK-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2 269; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0 270; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer 271; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> 272; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) 273; CHECK-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float> 274; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4 275; CHECK-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double 276; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00) 277; CHECK-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float 278; CHECK-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4 279; CHECK-NEXT: ret void 280; 281entry: 282 %arrayidx72 = getelementptr float, ptr %Colour, i64 1 283 %arrayidx80 = getelementptr float, ptr %Colour, i64 2 284 %conv62 = fpext float %0 to double 285 %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00) 286 %conv66 = fptrunc double %1 to float 287 store float %conv66, ptr %Colour, align 4 288 %conv70 = fpext float %0 to double 289 %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00) 290 %conv74 = fptrunc double %2 to float 291 store float %conv74, ptr %arrayidx72, align 4 292 %conv78 = fpext float %0 to double 293 %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00) 294 %conv82 = fptrunc double %3 to float 295 store float %conv82, ptr %arrayidx80, align 4 296 ret void 297} 298 299define void @fpext_gather(ptr %dst, double %conv) { 300; CHECK-LABEL: @fpext_gather( 301; CHECK-NEXT: entry: 302; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[CONV:%.*]], i32 0 303; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> zeroinitializer 304; CHECK-NEXT: [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float> 305; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0 306; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 307; CHECK-NEXT: store float [[TMP3]], ptr [[LENGTHS]], align 4 308; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1 309; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[ARRAYIDX32]], align 4 310; CHECK-NEXT: ret void 311; 312entry: 313 %conv25 = fptrunc double %conv to float 314 %Lengths = getelementptr float, ptr %dst, i64 0 315 store float %conv25, ptr %Lengths, align 4 316 %arrayidx32 = getelementptr float, ptr %dst, i64 1 317 store float %conv25, ptr %arrayidx32, align 4 318 %conv34 = fptrunc double %conv to float 319 %arrayidx37 = getelementptr float, ptr %dst, i64 2 320 store float %conv34, ptr %arrayidx37, align 4 321 ret void 322} 323 324declare float @llvm.fmuladd.f32(float, float, float) 325 326declare double @llvm.fmuladd.f64(double, double, double) 327