xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/vec3-gather-some-loads.ll (revision 6d66db3890a18e3926a49cbfeb28e99c464cfcd5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=NON-POW2 %s
3; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=x86_64-apple-macosx -S %s | FileCheck --check-prefixes=POW2-ONLY %s
4
5target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
6
7define void @test_insert_loads(ptr %A, ptr noalias %B, float %0) #0 {
8; NON-POW2-LABEL: define void @test_insert_loads(
9; NON-POW2-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
10; NON-POW2-NEXT:  entry:
11; NON-POW2-NEXT:    [[A_28:%.*]] = getelementptr i8, ptr [[A]], i64 28
12; NON-POW2-NEXT:    [[L_A_28:%.*]] = load float, ptr [[A_28]], align 4
13; NON-POW2-NEXT:    [[A_12:%.*]] = getelementptr i8, ptr [[A]], i64 12
14; NON-POW2-NEXT:    [[L_A_12:%.*]] = load float, ptr [[A_12]], align 4
15; NON-POW2-NEXT:    [[GEP_4:%.*]] = getelementptr i8, ptr [[B]], i64 4
16; NON-POW2-NEXT:    [[L_B_0:%.*]] = load float, ptr [[B]], align 4
17; NON-POW2-NEXT:    [[TMP1:%.*]] = insertelement <7 x float> poison, float [[TMP0]], i32 0
18; NON-POW2-NEXT:    [[TMP2:%.*]] = shufflevector <7 x float> [[TMP1]], <7 x float> poison, <7 x i32> zeroinitializer
19; NON-POW2-NEXT:    [[TMP3:%.*]] = insertelement <7 x float> <float poison, float poison, float poison, float 4.000000e+00, float 3.000000e+00, float 2.000000e+00, float 1.000000e+00>, float [[L_A_12]], i32 0
20; NON-POW2-NEXT:    [[TMP4:%.*]] = insertelement <7 x float> [[TMP3]], float [[L_A_28]], i32 1
21; NON-POW2-NEXT:    [[TMP5:%.*]] = shufflevector <7 x float> [[TMP4]], <7 x float> poison, <7 x i32> <i32 0, i32 1, i32 1, i32 3, i32 4, i32 5, i32 6>
22; NON-POW2-NEXT:    [[TMP6:%.*]] = insertelement <7 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 4.000000e+00, float 3.000000e+00, float 2.000000e+00, float 1.000000e+00>, float [[L_B_0]], i32 0
23; NON-POW2-NEXT:    [[TMP7:%.*]] = call <7 x float> @llvm.fmuladd.v7f32(<7 x float> [[TMP2]], <7 x float> [[TMP5]], <7 x float> [[TMP6]])
24; NON-POW2-NEXT:    store <7 x float> [[TMP7]], ptr [[GEP_4]], align 4
25; NON-POW2-NEXT:    ret void
26;
27; POW2-ONLY-LABEL: define void @test_insert_loads(
28; POW2-ONLY-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
29; POW2-ONLY-NEXT:  entry:
30; POW2-ONLY-NEXT:    [[MULADD_0:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 1.000000e+00, float 1.000000e+00)
31; POW2-ONLY-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
32; POW2-ONLY-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
33; POW2-ONLY-NEXT:    [[TMP3:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> <float 3.000000e+00, float 2.000000e+00>, <2 x float> <float 3.000000e+00, float 2.000000e+00>)
34; POW2-ONLY-NEXT:    [[A_28:%.*]] = getelementptr i8, ptr [[A]], i64 28
35; POW2-ONLY-NEXT:    [[L_A_28:%.*]] = load float, ptr [[A_28]], align 4
36; POW2-ONLY-NEXT:    [[A_12:%.*]] = getelementptr i8, ptr [[A]], i64 12
37; POW2-ONLY-NEXT:    [[L_A_12:%.*]] = load float, ptr [[A_12]], align 4
38; POW2-ONLY-NEXT:    [[GEP_4:%.*]] = getelementptr i8, ptr [[B]], i64 4
39; POW2-ONLY-NEXT:    [[L_B_0:%.*]] = load float, ptr [[B]], align 4
40; POW2-ONLY-NEXT:    [[GEP_28:%.*]] = getelementptr i8, ptr [[B]], i64 28
41; POW2-ONLY-NEXT:    [[GEP_20:%.*]] = getelementptr i8, ptr [[B]], i64 20
42; POW2-ONLY-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
43; POW2-ONLY-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> zeroinitializer
44; POW2-ONLY-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 4.000000e+00>, float [[L_A_12]], i32 0
45; POW2-ONLY-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[L_A_28]], i32 1
46; POW2-ONLY-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 3>
47; POW2-ONLY-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 4.000000e+00>, float [[L_B_0]], i32 0
48; POW2-ONLY-NEXT:    [[TMP10:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP5]], <4 x float> [[TMP8]], <4 x float> [[TMP9]])
49; POW2-ONLY-NEXT:    store <4 x float> [[TMP10]], ptr [[GEP_4]], align 4
50; POW2-ONLY-NEXT:    store <2 x float> [[TMP3]], ptr [[GEP_20]], align 4
51; POW2-ONLY-NEXT:    store float [[MULADD_0]], ptr [[GEP_28]], align 4
52; POW2-ONLY-NEXT:    ret void
53;
54entry:
55  %muladd.0 = tail call float @llvm.fmuladd.f32(float %0, float 1.000000e+00, float 1.000000e+00)
56  %muladd.1 = tail call float @llvm.fmuladd.f32(float %0, float 2.000000e+00, float 2.000000e+00)
57  %muladd.2 = tail call float @llvm.fmuladd.f32(float %0, float 3.000000e+00, float 3.000000e+00)
58  %muladd.3 = tail call float @llvm.fmuladd.f32(float %0, float 4.000000e+00, float 4.000000e+00)
59  %A.28 = getelementptr i8, ptr %A, i64 28
60  %l.A.28 = load float, ptr %A.28, align 4
61  %muladd.4 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.28, float 0.000000e+00)
62  %muladd.5 = tail call float @llvm.fmuladd.f32(float %0, float %l.A.28, float 0.000000e+00)
63  %A.12 = getelementptr i8, ptr %A, i64 12
64  %l.A.12  = load float, ptr %A.12, align 4
65  %gep.4  = getelementptr i8, ptr %B, i64 4
66  %gep.12 = getelementptr i8, ptr %B, i64 12
67  %l.B.0 = load float, ptr %B, align 4
68  %muladd.6  = tail call float @llvm.fmuladd.f32(float %0, float %l.A.12, float %l.B.0)
69  %gep.28 = getelementptr i8, ptr %B, i64 28
70  %gep.24 = getelementptr i8, ptr %B, i64 24
71  %gep.20 = getelementptr i8, ptr %B, i64 20
72  %gep.16 = getelementptr i8, ptr %B, i64 16
73  %gep.8 = getelementptr i8, ptr %B, i64 8
74  store float %muladd.6, ptr %gep.4, align 4
75  store float %muladd.5, ptr %gep.8, align 8
76  store float %muladd.4, ptr %gep.12, align 4
77  store float %muladd.3, ptr %gep.16, align 16
78  store float %muladd.2, ptr %gep.20, align 4
79  store float %muladd.1, ptr %gep.24, align 8
80  store float %muladd.0, ptr %gep.28, align 4
81  ret void
82}
83
84declare float @llvm.fmuladd.f32(float, float, float)
85
86attributes #0 = { "target-cpu"="skylake-avx512" }
87