xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll (revision f61c9b75690e8c374a99332b9b837bf8f77bfc90)
13b7f5166SHuihui Zhang; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
23be72f40SBjorn Pettersson; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
33b7f5166SHuihui Zhang
43b7f5166SHuihui Zhangtarget datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
53b7f5166SHuihui Zhangtarget triple = "aarch64-unknown-linux-gnu"
63b7f5166SHuihui Zhang
73b7f5166SHuihui Zhangdefine <2 x float> @insertelement-fixed-vector() {
83b7f5166SHuihui Zhang; CHECK-LABEL: @insertelement-fixed-vector(
928ac873bSAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
10ab2c499dSAnton Afanasyev; CHECK-NEXT:    ret <2 x float> [[TMP1]]
113b7f5166SHuihui Zhang;
123b7f5166SHuihui Zhang  %f0 = tail call fast float @llvm.fabs.f32(float undef)
133b7f5166SHuihui Zhang  %f1 = tail call fast float @llvm.fabs.f32(float undef)
143b7f5166SHuihui Zhang  %i0 = insertelement <2 x float> undef, float %f0, i32 0
153b7f5166SHuihui Zhang  %i1 = insertelement <2 x float> %i0, float %f1, i32 1
163b7f5166SHuihui Zhang  ret <2 x float> %i1
173b7f5166SHuihui Zhang}
183b7f5166SHuihui Zhang
193b7f5166SHuihui Zhang; TODO: llvm.fabs could be optimized in vector form. It's legal to extract
203b7f5166SHuihui Zhang; elements from fixed-length vector and insert into scalable vector.
213b7f5166SHuihui Zhangdefine <vscale x 2 x float> @insertelement-scalable-vector() {
223b7f5166SHuihui Zhang; CHECK-LABEL: @insertelement-scalable-vector(
233b7f5166SHuihui Zhang; CHECK-NEXT:    [[F0:%.*]] = tail call fast float @llvm.fabs.f32(float undef)
243b7f5166SHuihui Zhang; CHECK-NEXT:    [[F1:%.*]] = tail call fast float @llvm.fabs.f32(float undef)
253b7f5166SHuihui Zhang; CHECK-NEXT:    [[I0:%.*]] = insertelement <vscale x 2 x float> undef, float [[F0]], i32 0
263b7f5166SHuihui Zhang; CHECK-NEXT:    [[I1:%.*]] = insertelement <vscale x 2 x float> [[I0]], float [[F1]], i32 1
273b7f5166SHuihui Zhang; CHECK-NEXT:    ret <vscale x 2 x float> [[I1]]
283b7f5166SHuihui Zhang;
293b7f5166SHuihui Zhang  %f0 = tail call fast float @llvm.fabs.f32(float undef)
303b7f5166SHuihui Zhang  %f1 = tail call fast float @llvm.fabs.f32(float undef)
313b7f5166SHuihui Zhang  %i0 = insertelement <vscale x 2 x float> undef, float %f0, i32 0
323b7f5166SHuihui Zhang  %i1 = insertelement <vscale x 2 x float> %i0, float %f1, i32 1
333b7f5166SHuihui Zhang  ret <vscale x 2 x float> %i1
343b7f5166SHuihui Zhang}
353b7f5166SHuihui Zhang
363b7f5166SHuihui Zhang; Function Attrs: nounwind readnone speculatable willreturn
373b7f5166SHuihui Zhangdeclare float @llvm.fabs.f32(float)
38*f61c9b75SFlorian Hahn
39*f61c9b75SFlorian Hahn
40*f61c9b75SFlorian Hahndefine <4 x float> @insertelement_poison_lanes(ptr %0) {
41*f61c9b75SFlorian Hahn; CHECK-LABEL: @insertelement_poison_lanes(
42*f61c9b75SFlorian Hahn; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <4 x float> zeroinitializer, float poison, i64 0
43*f61c9b75SFlorian Hahn; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float 0.000000e+00, i64 0
44*f61c9b75SFlorian Hahn; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr double, ptr [[TMP0:%.*]], i64 1
45*f61c9b75SFlorian Hahn; CHECK-NEXT:    store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[GEP_1]], align 8
46*f61c9b75SFlorian Hahn; CHECK-NEXT:    ret <4 x float> [[INS_2]]
47*f61c9b75SFlorian Hahn;
48*f61c9b75SFlorian Hahn  %trunc.1 = fptrunc double 0.000000e+00 to float
49*f61c9b75SFlorian Hahn  %trunc.2 = fptrunc double 1.000000e+00 to float
50*f61c9b75SFlorian Hahn  %ins.1 = insertelement <4 x float> zeroinitializer, float poison, i64 0
51*f61c9b75SFlorian Hahn  %ins.2 = insertelement <4 x float> %ins.1, float %trunc.1, i64 0
52*f61c9b75SFlorian Hahn  %ext.1 = fpext float %trunc.1 to double
53*f61c9b75SFlorian Hahn  %gep.1  = getelementptr double, ptr %0, i64 1
54*f61c9b75SFlorian Hahn  store double %ext.1, ptr %gep.1, align 8
55*f61c9b75SFlorian Hahn  %ext.2 = fpext float %trunc.2 to double
56*f61c9b75SFlorian Hahn  %gep.2 = getelementptr double, ptr %0, i64 2
57*f61c9b75SFlorian Hahn  store double %ext.2, ptr %gep.2, align 8
58*f61c9b75SFlorian Hahn  ret <4 x float> %ins.2
59*f61c9b75SFlorian Hahn}
60