xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4target triple = "aarch64--linux-gnu"
5
6%structA = type { [2 x float] }
7
8define void @test1(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) {
9; CHECK-LABEL: @test1(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
12; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
13; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH:%.*]]
14; CHECK:       for.body3.lr.ph:
15; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
16; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4
17; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
18; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
19; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
20; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
21; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]]
22; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
23; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
24; CHECK:       for.end27:
25; CHECK-NEXT:    ret void
26;
27
28entry:
29  br label %for.body3.lr.ph
30
31for.body3.lr.ph:
32  %conv5 = sitofp i32 %ymin to float
33  %conv = sitofp i32 %xmin to float
34  %0 = load float, ptr %J, align 4
35  %sub = fsub fast float %conv, %0
36  %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1
37  %1 = load float, ptr %arrayidx9, align 4
38  %sub10 = fsub fast float %conv5, %1
39  %mul11 = fmul fast float %sub, %sub
40  %mul12 = fmul fast float %sub10, %sub10
41  %add = fadd fast float %mul11, %mul12
42  %cmp = fcmp oeq float %add, 0.000000e+00
43  br i1 %cmp, label %for.body3.lr.ph, label %for.end27
44
45for.end27:
46  ret void
47}
48
49define void @test2(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) {
50; CHECK-LABEL: @test2(
51; CHECK-NEXT:  entry:
52; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0
53; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1
54; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH:%.*]]
55; CHECK:       for.body3.lr.ph:
56; CHECK-NEXT:    [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
57; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4
58; CHECK-NEXT:    [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]]
59; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]]
60; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0
61; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1
62; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]]
63; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00
64; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]]
65; CHECK:       for.end27:
66; CHECK-NEXT:    ret void
67;
68
69entry:
70  br label %for.body3.lr.ph
71
72for.body3.lr.ph:
73  %conv5 = sitofp i32 %ymin to float
74  %conv = sitofp i32 %xmin to float
75  %0 = load float, ptr %J, align 4
76  %sub = fsub fast float %conv, %0
77  %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1
78  %1 = load float, ptr %arrayidx9, align 4
79  %sub10 = fsub fast float %conv5, %1
80  %mul11 = fmul fast float %sub, %sub
81  %mul12 = fmul fast float %sub10, %sub10
82  %add = fadd fast float %mul12, %mul11         ;;;<---- Operands commuted!!
83  %cmp = fcmp oeq float %add, 0.000000e+00
84  br i1 %cmp, label %for.body3.lr.ph, label %for.end27
85
86for.end27:
87  ret void
88}
89