1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 2; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=NON-POW2 %s 3; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=POW2-ONLY %s 4 5target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" 6 7define void @test_insert_loads(ptr %A, ptr noalias %B, float %0) #0 { 8; NON-POW2-LABEL: define void @test_insert_loads( 9; NON-POW2-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { 10; NON-POW2-NEXT: entry: 11; NON-POW2-NEXT: [[A_28:%.*]] = getelementptr i8, ptr [[A]], i64 28 12; NON-POW2-NEXT: [[L_A_28:%.*]] = load float, ptr [[A_28]], align 4 13; NON-POW2-NEXT: [[A_12:%.*]] = getelementptr i8, ptr [[A]], i64 12 14; NON-POW2-NEXT: [[L_A_12:%.*]] = load float, ptr [[A_12]], align 4 15; NON-POW2-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[B]], i64 4 16; NON-POW2-NEXT: [[L_B_0:%.*]] = load float, ptr [[B]], align 4 17; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <7 x float> poison, float [[TMP0]], i32 0 18; NON-POW2-NEXT: [[TMP2:%.*]] = shufflevector <7 x float> [[TMP1]], <7 x float> poison, <7 x i32> zeroinitializer 19; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <7 x float> <float poison, float poison, float poison, float 4.000000e+00, float 3.000000e+00, float 2.000000e+00, float 1.000000e+00>, float [[L_A_12]], i32 0 20; NON-POW2-NEXT: [[TMP4:%.*]] = insertelement <7 x float> [[TMP3]], float [[L_A_28]], i32 1 21; NON-POW2-NEXT: [[TMP5:%.*]] = shufflevector <7 x float> [[TMP4]], <7 x float> poison, <7 x i32> <i32 0, i32 1, i32 1, i32 3, i32 4, i32 5, i32 6> 22; NON-POW2-NEXT: [[TMP6:%.*]] = insertelement <7 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 4.000000e+00, float 3.000000e+00, float 2.000000e+00, float 1.000000e+00>, float [[L_B_0]], i32 0 23; NON-POW2-NEXT: [[TMP7:%.*]] = call <7 x float> @llvm.fmuladd.v7f32(<7 x float> [[TMP2]], <7 x float> [[TMP5]], <7 x float> [[TMP6]]) 24; NON-POW2-NEXT: store <7 x float> [[TMP7]], ptr [[GEP_4]], align 4 25; NON-POW2-NEXT: ret void 26; 27; POW2-ONLY-LABEL: define void @test_insert_loads( 28; POW2-ONLY-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { 29; POW2-ONLY-NEXT: entry: 30; POW2-ONLY-NEXT: [[MULADD_0:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 1.000000e+00, float 1.000000e+00) 31; POW2-ONLY-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 32; POW2-ONLY-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer 33; POW2-ONLY-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> <float 3.000000e+00, float 2.000000e+00>, <2 x float> <float 3.000000e+00, float 2.000000e+00>) 34; POW2-ONLY-NEXT: [[A_28:%.*]] = getelementptr i8, ptr [[A]], i64 28 35; POW2-ONLY-NEXT: [[L_A_28:%.*]] = load float, ptr [[A_28]], align 4 36; POW2-ONLY-NEXT: [[A_12:%.*]] = getelementptr i8, ptr [[A]], i64 12 37; POW2-ONLY-NEXT: [[L_A_12:%.*]] = load float, ptr [[A_12]], align 4 38; POW2-ONLY-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[B]], i64 4 39; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load float, ptr [[B]], align 4 40; POW2-ONLY-NEXT: [[GEP_28:%.*]] = getelementptr i8, ptr [[B]], i64 28 41; POW2-ONLY-NEXT: [[GEP_20:%.*]] = getelementptr i8, ptr [[B]], i64 20 42; POW2-ONLY-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 43; POW2-ONLY-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer 44; POW2-ONLY-NEXT: [[TMP6:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 4.000000e+00>, float [[L_A_12]], i32 0 45; POW2-ONLY-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[L_A_28]], i32 1 46; POW2-ONLY-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 3> 47; POW2-ONLY-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 4.000000e+00>, float [[L_B_0]], i32 0 48; POW2-ONLY-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]) 49; POW2-ONLY-NEXT: store <4 x float> [[TMP10]], ptr [[GEP_4]], align 4 50; POW2-ONLY-NEXT: store <2 x float> [[TMP3]], ptr [[GEP_20]], align 4 51; POW2-ONLY-NEXT: store float [[MULADD_0]], ptr [[GEP_28]], align 4 52; POW2-ONLY-NEXT: ret void 53; 54entry: 55 %muladd.0 = tail call float @llvm.fmuladd.f32(float %0, float 1.000000e+00, float 1.000000e+00) 56 %muladd.1 = tail call float @llvm.fmuladd.f32(float %0, float 2.000000e+00, float 2.000000e+00) 57 %muladd.2 = tail call float @llvm.fmuladd.f32(float %0, float 3.000000e+00, float 3.000000e+00) 58 %muladd.3 = tail call float @llvm.fmuladd.f32(float %0, float 4.000000e+00, float 4.000000e+00) 59 %A.28 = getelementptr i8, ptr %A, i64 28 60 %l.A.28 = load float, ptr %A.28, align 4 61 %muladd.4 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.28, float 0.000000e+00) 62 %muladd.5 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.28, float 0.000000e+00) 63 %A.12 = getelementptr i8, ptr %A, i64 12 64 %l.A.12 = load float, ptr %A.12, align 4 65 %gep.4 = getelementptr i8, ptr %B, i64 4 66 %gep.12 = getelementptr i8, ptr %B, i64 12 67 %l.B.0 = load float, ptr %B, align 4 68 %muladd.6 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.12, float %l.B.0) 69 %gep.28 = getelementptr i8, ptr %B, i64 28 70 %gep.24 = getelementptr i8, ptr %B, i64 24 71 %gep.20 = getelementptr i8, ptr %B, i64 20 72 %gep.16 = getelementptr i8, ptr %B, i64 16 73 %gep.8 = getelementptr i8, ptr %B, i64 8 74 store float %muladd.6, ptr %gep.4, align 4 75 store float %muladd.5, ptr %gep.8, align 8 76 store float %muladd.4, ptr %gep.12, align 4 77 store float %muladd.3, ptr %gep.16, align 16 78 store float %muladd.2, ptr %gep.20, align 4 79 store float %muladd.1, ptr %gep.24, align 8 80 store float %muladd.0, ptr %gep.28, align 4 81 ret void 82} 83 84declare float @llvm.fmuladd.f32(float, float, float) 85 86attributes #0 = { "target-cpu"="skylake-avx512" } 87