xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/mul.ll (revision 580210a0c938531ef9fd79f9ffedb93eeb2e66c2)
1a2016dc8SSjoerd Meijer; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
23be72f40SBjorn Pettersson; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s
3a2016dc8SSjoerd Meijer
4a2016dc8SSjoerd Meijertarget datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5a2016dc8SSjoerd Meijertarget triple = "aarch64--linux-gnu"
6a2016dc8SSjoerd Meijer
7a2016dc8SSjoerd Meijer; These examples correspond to input code like:
8a2016dc8SSjoerd Meijer;
9a2016dc8SSjoerd Meijer;   void t(long * __restrict a, long * __restrict b) {
10*580210a0SNikita Popov;     aptr= b[0];
11*580210a0SNikita Popov;     aptr= b[1];
12a2016dc8SSjoerd Meijer;   }
13a2016dc8SSjoerd Meijer;
14a2016dc8SSjoerd Meijer; If we SLP vectorise this then we end up with something like this because we
15a2016dc8SSjoerd Meijer; don't have a mul.2d:
16a2016dc8SSjoerd Meijer;
17a2016dc8SSjoerd Meijer;        ldr     q0, [x1]
18a2016dc8SSjoerd Meijer;        ldr     q1, [x0]
19a2016dc8SSjoerd Meijer;        fmov    x8, d0
20a2016dc8SSjoerd Meijer;        mov     x10, v0.d[1]
21a2016dc8SSjoerd Meijer;        fmov    x9, d1
22a2016dc8SSjoerd Meijer;        mov     x11, v1.d[1]
23a2016dc8SSjoerd Meijer;        mul     x8, x9, x8
24a2016dc8SSjoerd Meijer;        mul     x9, x11, x10
25a2016dc8SSjoerd Meijer;        fmov    d0, x8
26a2016dc8SSjoerd Meijer;        mov     v0.d[1], x9
27a2016dc8SSjoerd Meijer;        str     q0, [x0]
28a2016dc8SSjoerd Meijer;        ret
29a2016dc8SSjoerd Meijer;
305110ff08SSjoerd Meijer; If we don't SLP vectorise but scalarize this we get this instead:
31a2016dc8SSjoerd Meijer;
32a2016dc8SSjoerd Meijer;        ldp     x8, x9, [x1]
33a2016dc8SSjoerd Meijer;        ldp     x10, x11, [x0]
34a2016dc8SSjoerd Meijer;        mul     x9, x11, x9
35a2016dc8SSjoerd Meijer;        mul     x8, x10, x8
36a2016dc8SSjoerd Meijer;        stp     x8, x9, [x0]
37a2016dc8SSjoerd Meijer;        ret
38a2016dc8SSjoerd Meijer;
39*580210a0SNikita Popovdefine void @mul(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
40a2016dc8SSjoerd Meijer; CHECK-LABEL: @mul(
41a2016dc8SSjoerd Meijer; CHECK-NEXT:  entry:
42*580210a0SNikita Popov; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8
43*580210a0SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8
445110ff08SSjoerd Meijer; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
45*580210a0SNikita Popov; CHECK-NEXT:    store i64 [[MUL]], ptr [[A]], align 8
46*580210a0SNikita Popov; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
47*580210a0SNikita Popov; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
48*580210a0SNikita Popov; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
49*580210a0SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
505110ff08SSjoerd Meijer; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
51*580210a0SNikita Popov; CHECK-NEXT:    store i64 [[MUL4]], ptr [[ARRAYIDX3]], align 8
52a2016dc8SSjoerd Meijer; CHECK-NEXT:    ret void
53a2016dc8SSjoerd Meijer;
54a2016dc8SSjoerd Meijerentry:
55*580210a0SNikita Popov  %0 = load i64, ptr %b, align 8
56*580210a0SNikita Popov  %1 = load i64, ptr %a, align 8
57a2016dc8SSjoerd Meijer  %mul = mul nsw i64 %1, %0
58*580210a0SNikita Popov  store i64 %mul, ptr %a, align 8
59*580210a0SNikita Popov  %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1
60*580210a0SNikita Popov  %2 = load i64, ptr %arrayidx2, align 8
61*580210a0SNikita Popov  %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1
62*580210a0SNikita Popov  %3 = load i64, ptr %arrayidx3, align 8
63a2016dc8SSjoerd Meijer  %mul4 = mul nsw i64 %3, %2
64*580210a0SNikita Popov  store i64 %mul4, ptr %arrayidx3, align 8
65a2016dc8SSjoerd Meijer  ret void
66a2016dc8SSjoerd Meijer}
67a2016dc8SSjoerd Meijer
68a2016dc8SSjoerd Meijer; Similar example, but now a multiply-accumulate:
69a2016dc8SSjoerd Meijer;
70a2016dc8SSjoerd Meijer;  void x (long * __restrict a, long * __restrict b) {
71*580210a0SNikita Popov;    aptr= b[0];
72*580210a0SNikita Popov;    aptr= b[1];
73a2016dc8SSjoerd Meijer;    a[0] += b[0];
74a2016dc8SSjoerd Meijer;    a[1] += b[1];
75a2016dc8SSjoerd Meijer;  }
76a2016dc8SSjoerd Meijer;
77*580210a0SNikita Popovdefine void @mac(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) {
78a2016dc8SSjoerd Meijer; CHECK-LABEL: @mac(
79a2016dc8SSjoerd Meijer; CHECK-NEXT:  entry:
80*580210a0SNikita Popov; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8
81*580210a0SNikita Popov; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8
825110ff08SSjoerd Meijer; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
83*580210a0SNikita Popov; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1
84*580210a0SNikita Popov; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8
85*580210a0SNikita Popov; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1
86*580210a0SNikita Popov; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
875110ff08SSjoerd Meijer; CHECK-NEXT:    [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]]
885110ff08SSjoerd Meijer; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]]
89*580210a0SNikita Popov; CHECK-NEXT:    store i64 [[ADD]], ptr [[A]], align 8
905110ff08SSjoerd Meijer; CHECK-NEXT:    [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]]
91*580210a0SNikita Popov; CHECK-NEXT:    store i64 [[ADD9]], ptr [[ARRAYIDX3]], align 8
92a2016dc8SSjoerd Meijer; CHECK-NEXT:    ret void
93a2016dc8SSjoerd Meijer;
94a2016dc8SSjoerd Meijerentry:
95*580210a0SNikita Popov  %0 = load i64, ptr %b, align 8
96*580210a0SNikita Popov  %1 = load i64, ptr %a, align 8
97a2016dc8SSjoerd Meijer  %mul = mul nsw i64 %1, %0
98*580210a0SNikita Popov  %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1
99*580210a0SNikita Popov  %2 = load i64, ptr %arrayidx2, align 8
100*580210a0SNikita Popov  %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1
101*580210a0SNikita Popov  %3 = load i64, ptr %arrayidx3, align 8
102a2016dc8SSjoerd Meijer  %mul4 = mul nsw i64 %3, %2
103a2016dc8SSjoerd Meijer  %add = add nsw i64 %mul, %0
104*580210a0SNikita Popov  store i64 %add, ptr %a, align 8
105a2016dc8SSjoerd Meijer  %add9 = add nsw i64 %mul4, %2
106*580210a0SNikita Popov  store i64 %add9, ptr %arrayidx3, align 8
107a2016dc8SSjoerd Meijer  ret void
108a2016dc8SSjoerd Meijer}
109