xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll (revision f3d2609af3031ddb54030548e86335f295cf49ca)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake-avx512 -pass-remarks-output=%t | FileCheck %s
3; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
4
5; YAML-LABEL: --- !Passed
6; YAML-NEXT:  Pass:            slp-vectorizer
7; YAML-NEXT:  Name:            VectorizedList
8; YAML-NEXT:  Function:        test
9; YAML-NEXT:  Args:
10; YAML-NEXT:    - String:          'SLP vectorized with cost '
11; YAML-NEXT:    - Cost:            '-4'
12; YAML-NEXT:    - String:          ' and with tree size '
13; YAML-NEXT:    - TreeSize:        '5'
14
15define <4 x float> @test(ptr %x, float %v, float %a) {
16; CHECK-LABEL: define <4 x float> @test(
17; CHECK-SAME: ptr [[X:%.*]], float [[V:%.*]], float [[A:%.*]]) #[[ATTR0:[0-9]+]] {
18; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[X]], align 4
19; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> poison, float [[A]], i32 0
20; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> zeroinitializer
21; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[V]], i32 0
22; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
23; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP1]], i64 2)
24; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]]
25; CHECK-NEXT:    ret <4 x float> [[TMP8]]
26;
27  %gep1 = getelementptr inbounds <4 x float>, ptr %x, i64 0, i64 1
28  %x0 = load float, ptr %x, align 4
29  %x1 = load float, ptr %gep1, align 4
30  %add1 = fadd float %a, %v
31  %add2 = fadd float %a, %v
32  %add3 = fadd float %a, %x0
33  %add4 = fadd float %a, %x1
34  %i0 = insertelement <4 x float> undef, float %add1, i32 0
35  %i1 = insertelement <4 x float> %i0, float %add2, i32 1
36  %i2 = insertelement <4 x float> %i1, float %add3, i32 2
37  %i3 = insertelement <4 x float> %i2, float %add4, i32 3
38  ret <4 x float> %i3
39}
40