xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1e909f6bcSSanne Wouda; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
23be72f40SBjorn Pettersson; RUN: opt -passes=slp-vectorizer -S %s | FileCheck %s
3e909f6bcSSanne Wouda
43abf8ebdSPhilip Reamesdeclare float @memread(float) readonly nounwind willreturn #0
5e909f6bcSSanne Woudadeclare <4 x float> @vmemread(<4 x float>)
6e909f6bcSSanne Wouda
7*580210a0SNikita Popovdefine <4 x float> @memread_4x(ptr %a) {
8e909f6bcSSanne Wouda; CHECK-LABEL: @memread_4x(
9e909f6bcSSanne Wouda; CHECK-NEXT:  entry:
10*580210a0SNikita Popov; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
117b840455SSanne Wouda; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vmemread(<4 x float> [[TMP0]])
12ab2c499dSAnton Afanasyev; CHECK-NEXT:    ret <4 x float> [[TMP1]]
13e909f6bcSSanne Wouda;
14e909f6bcSSanne Woudaentry:
15*580210a0SNikita Popov  %0 = load <4 x float>, ptr %a, align 16
16e909f6bcSSanne Wouda  %vecext = extractelement <4 x float> %0, i32 0
17e909f6bcSSanne Wouda  %1 = tail call fast float @memread(float %vecext) #0
18e909f6bcSSanne Wouda  %vecins = insertelement <4 x float> undef, float %1, i32 0
19e909f6bcSSanne Wouda  %vecext.1 = extractelement <4 x float> %0, i32 1
20e909f6bcSSanne Wouda  %2 = tail call fast float @memread(float %vecext.1) #0
21e909f6bcSSanne Wouda  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
22e909f6bcSSanne Wouda  %vecext.2 = extractelement <4 x float> %0, i32 2
23e909f6bcSSanne Wouda  %3 = tail call fast float @memread(float %vecext.2) #0
24e909f6bcSSanne Wouda  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
25e909f6bcSSanne Wouda  %vecext.3 = extractelement <4 x float> %0, i32 3
26e909f6bcSSanne Wouda  %4 = tail call fast float @memread(float %vecext.3) #0
27e909f6bcSSanne Wouda  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
28e909f6bcSSanne Wouda  ret <4 x float> %vecins.3
29e909f6bcSSanne Wouda}
30e909f6bcSSanne Wouda
313abf8ebdSPhilip Reamesdeclare float @memwrite(float) nounwind willreturn #1
32e909f6bcSSanne Woudadeclare <4 x float> @vmemwrite(<4 x float>)
33e909f6bcSSanne Wouda
34*580210a0SNikita Popovdefine <4 x float> @memwrite_4x(ptr %a) {
35e909f6bcSSanne Wouda; CHECK-LABEL: @memwrite_4x(
36e909f6bcSSanne Wouda; CHECK-NEXT:  entry:
37*580210a0SNikita Popov; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16
38e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
393abf8ebdSPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @memwrite(float [[VECEXT]]) #[[ATTR2:[0-9]+]]
40e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
41e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
423abf8ebdSPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @memwrite(float [[VECEXT_1]]) #[[ATTR2]]
43e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
44e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
453abf8ebdSPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @memwrite(float [[VECEXT_2]]) #[[ATTR2]]
46e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
47e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
483abf8ebdSPhilip Reames; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @memwrite(float [[VECEXT_3]]) #[[ATTR2]]
49e909f6bcSSanne Wouda; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
50e909f6bcSSanne Wouda; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
51e909f6bcSSanne Wouda;
52e909f6bcSSanne Woudaentry:
53*580210a0SNikita Popov  %0 = load <4 x float>, ptr %a, align 16
54e909f6bcSSanne Wouda  %vecext = extractelement <4 x float> %0, i32 0
55e909f6bcSSanne Wouda  %1 = tail call fast float @memwrite(float %vecext) #1
56e909f6bcSSanne Wouda  %vecins = insertelement <4 x float> undef, float %1, i32 0
57e909f6bcSSanne Wouda  %vecext.1 = extractelement <4 x float> %0, i32 1
58e909f6bcSSanne Wouda  %2 = tail call fast float @memwrite(float %vecext.1) #1
59e909f6bcSSanne Wouda  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
60e909f6bcSSanne Wouda  %vecext.2 = extractelement <4 x float> %0, i32 2
61e909f6bcSSanne Wouda  %3 = tail call fast float @memwrite(float %vecext.2) #1
62e909f6bcSSanne Wouda  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
63e909f6bcSSanne Wouda  %vecext.3 = extractelement <4 x float> %0, i32 3
64e909f6bcSSanne Wouda  %4 = tail call fast float @memwrite(float %vecext.3) #1
65e909f6bcSSanne Wouda  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
66e909f6bcSSanne Wouda  ret <4 x float> %vecins.3
67e909f6bcSSanne Wouda}
68e909f6bcSSanne Wouda
69e909f6bcSSanne Woudaattributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_N4v_memread(vmemread)" }
70e909f6bcSSanne Woudaattributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_N4v_memwrite(vmemwrite)" }
71