xref: /llvm-project/llvm/test/CodeGen/ARM/lowerMUL-newload.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-eabi -mcpu=krait | FileCheck %s
3
4define arm_aapcs_vfpcc <4 x i16> @mla_args(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
5; CHECK-LABEL: mla_args:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmull.u16 q8, d1, d0
8; CHECK-NEXT:    vaddw.u16 q8, q8, d2
9; CHECK-NEXT:    vmovn.i32 d0, q8
10; CHECK-NEXT:    bx lr
11entry:
12  %v0 = sext <4 x i16> %vec0 to <4 x i32>
13  %v1 = sext <4 x i16> %vec1 to <4 x i32>
14  %v2 = sext <4 x i16> %vec2 to <4 x i32>
15  %v3 = mul <4 x i32> %v1, %v0
16  %v4 = add <4 x i32> %v3, %v2
17  %v5 = trunc <4 x i32> %v4 to <4 x i16>
18  ret <4 x i16> %v5
19}
20
21define void @mla_loadstore(ptr %a, ptr %b, ptr %c) {
22; CHECK-LABEL: mla_loadstore:
23; CHECK:       @ %bb.0: @ %entry
24; CHECK-NEXT:    vldr d16, [r0, #16]
25; CHECK-NEXT:    vldr d17, [r1, #16]
26; CHECK-NEXT:    vldr d18, [r2, #16]
27; CHECK-NEXT:    vmull.u16 q8, d17, d16
28; CHECK-NEXT:    vaddw.u16 q8, q8, d18
29; CHECK-NEXT:    vmovn.i32 d16, q8
30; CHECK-NEXT:    vstr d16, [r0, #16]
31; CHECK-NEXT:    bx lr
32entry:
33  %scevgep0 = getelementptr i16, ptr %a, i32 8
34  %vec0 = load <4 x i16>, ptr %scevgep0, align 8
35  %v0 = sext <4 x i16> %vec0 to <4 x i32>
36  %scevgep1 = getelementptr i16, ptr %b, i32 8
37  %vec1 = load <4 x i16>, ptr %scevgep1, align 8
38  %v1 = sext <4 x i16> %vec1 to <4 x i32>
39  %scevgep2 = getelementptr i16, ptr %c, i32 8
40  %vec2 = load <4 x i16>, ptr %scevgep2, align 8
41  %v2 = sext <4 x i16> %vec2 to <4 x i32>
42  %v3 = mul <4 x i32> %v1, %v0
43  %v4 = add <4 x i32> %v3, %v2
44  %v5 = trunc <4 x i32> %v4 to <4 x i16>
45  %scevgep3 = getelementptr i16, ptr %a, i32 8
46  store <4 x i16> %v5, ptr %scevgep3, align 8
47  ret void
48}
49
50define arm_aapcs_vfpcc <4 x i16> @addmul_args(<4 x i16> %vec0, <4 x i16> %vec1, <4 x i16> %vec2) {
51; CHECK-LABEL: addmul_args:
52; CHECK:       @ %bb.0: @ %entry
53; CHECK-NEXT:    vmull.u16 q8, d1, d2
54; CHECK-NEXT:    vmlal.u16 q8, d0, d2
55; CHECK-NEXT:    vmovn.i32 d0, q8
56; CHECK-NEXT:    bx lr
57entry:
58  %v0 = sext <4 x i16> %vec0 to <4 x i32>
59  %v1 = sext <4 x i16> %vec1 to <4 x i32>
60  %v2 = sext <4 x i16> %vec2 to <4 x i32>
61  %v3 = add <4 x i32> %v1, %v0
62  %v4 = mul <4 x i32> %v3, %v2
63  %v5 = trunc <4 x i32> %v4 to <4 x i16>
64  ret <4 x i16> %v5
65}
66
67define void @addmul_loadstore(ptr %a, ptr %b, ptr %c) {
68; CHECK-LABEL: addmul_loadstore:
69; CHECK:       @ %bb.0: @ %entry
70; CHECK-NEXT:    vldr d16, [r2, #16]
71; CHECK-NEXT:    vldr d17, [r1, #16]
72; CHECK-NEXT:    vmull.u16 q9, d17, d16
73; CHECK-NEXT:    vldr d17, [r0, #16]
74; CHECK-NEXT:    vmlal.u16 q9, d17, d16
75; CHECK-NEXT:    vmovn.i32 d16, q9
76; CHECK-NEXT:    vstr d16, [r0, #16]
77; CHECK-NEXT:    bx lr
78entry:
79  %scevgep0 = getelementptr i16, ptr %a, i32 8
80  %vec0 = load <4 x i16>, ptr %scevgep0, align 8
81  %v0 = sext <4 x i16> %vec0 to <4 x i32>
82  %scevgep1 = getelementptr i16, ptr %b, i32 8
83  %vec1 = load <4 x i16>, ptr %scevgep1, align 8
84  %v1 = sext <4 x i16> %vec1 to <4 x i32>
85  %scevgep2 = getelementptr i16, ptr %c, i32 8
86  %vec2 = load <4 x i16>, ptr %scevgep2, align 8
87  %v2 = sext <4 x i16> %vec2 to <4 x i32>
88  %v3 = add <4 x i32> %v1, %v0
89  %v4 = mul <4 x i32> %v3, %v2
90  %v5 = trunc <4 x i32> %v4 to <4 x i16>
91  %scevgep3 = getelementptr i16, ptr %a, i32 8
92  store <4 x i16> %v5, ptr %scevgep3, align 8
93  ret void
94}
95
96define void @func1(ptr %a, ptr %b, ptr %c) {
97; CHECK-LABEL: func1:
98; CHECK:       @ %bb.0: @ %entry
99; CHECK-NEXT:    add r3, r1, #16
100; CHECK-NEXT:    vldr d18, [r2, #16]
101; CHECK-NEXT:    vld1.16 {d16}, [r3:64]
102; CHECK-NEXT:    vmovl.u16 q8, d16
103; CHECK-NEXT:    vaddw.u16 q10, q8, d18
104; CHECK-NEXT:    vmovn.i32 d19, q10
105; CHECK-NEXT:    vldr d20, [r0, #16]
106; CHECK-NEXT:    vstr d19, [r0, #16]
107; CHECK-NEXT:    vldr d19, [r2, #16]
108; CHECK-NEXT:    vmull.s16 q11, d18, d19
109; CHECK-NEXT:    vmovl.s16 q9, d19
110; CHECK-NEXT:    vmla.i32 q11, q8, q9
111; CHECK-NEXT:    vmovn.i32 d16, q11
112; CHECK-NEXT:    vstr d16, [r1, #16]
113; CHECK-NEXT:    vldr d16, [r2, #16]
114; CHECK-NEXT:    vmlal.u16 q11, d16, d20
115; CHECK-NEXT:    vmovn.i32 d16, q11
116; CHECK-NEXT:    vstr d16, [r0, #16]
117; CHECK-NEXT:    bx lr
118entry:
119; The test case trying to vectorize the pseudo code below.
120; a[i] = b[i] + c[i];
121; b[i] = aptr c[i];
122; a[i] = b[i] + aptr c[i];
123; Checking that vector load a[i] for "a[i] = b[i] + aptr c[i]" is
124; scheduled before the first vector store to "a[i] = b[i] + c[i]".
125; Checking that there is no vector load a[i] scheduled between the vector
126; stores to a[i], otherwise the load of a[i] will be polluted by the first
127; vector store to a[i].
128; This test case check that the chain information is updated during
129; lowerMUL for the new created Load SDNode.
130
131
132  %scevgep0 = getelementptr i16, ptr %a, i32 8
133  %vec0 = load <4 x i16>, ptr %scevgep0, align 8
134  %scevgep1 = getelementptr i16, ptr %b, i32 8
135  %vec1 = load <4 x i16>, ptr %scevgep1, align 8
136  %0 = zext <4 x i16> %vec1 to <4 x i32>
137  %scevgep2 = getelementptr i16, ptr %c, i32 8
138  %vec2 = load <4 x i16>, ptr %scevgep2, align 8
139  %1 = sext <4 x i16> %vec2 to <4 x i32>
140  %vec3 = add <4 x i32> %1, %0
141  %2 = trunc <4 x i32> %vec3 to <4 x i16>
142  %scevgep3 = getelementptr i16, ptr %a, i32 8
143  store <4 x i16> %2, ptr %scevgep3, align 8
144  %vec4 = load <4 x i16>, ptr %scevgep2, align 8
145  %3 = sext <4 x i16> %vec4 to <4 x i32>
146  %vec5 = mul <4 x i32> %3, %vec3
147  %4 = trunc <4 x i32> %vec5 to <4 x i16>
148  store <4 x i16> %4, ptr %scevgep1, align 8
149  %5 = sext <4 x i16> %vec0 to <4 x i32>
150  %vec6 = load <4 x i16>, ptr %scevgep2, align 8
151  %6 = sext <4 x i16> %vec6 to <4 x i32>
152  %vec7 = mul <4 x i32> %6, %5
153  %vec8 = add <4 x i32> %vec7, %vec5
154  %7 = trunc <4 x i32> %vec8 to <4 x i16>
155  store <4 x i16> %7, ptr %scevgep3, align 8
156  ret void
157}
158
159define void @func2(ptr %a, ptr %b, ptr %c) {
160; CHECK-LABEL: func2:
161; CHECK:       @ %bb.0: @ %entry
162; CHECK-NEXT:    vldr d16, [r1, #16]
163; CHECK-NEXT:    add r3, r0, #16
164; CHECK-NEXT:    vldr d17, [r2, #16]
165; CHECK-NEXT:    vaddl.u16 q9, d17, d16
166; CHECK-NEXT:    vmovn.i32 d18, q9
167; CHECK-NEXT:    vld1.16 {d19}, [r3:64]
168; CHECK-NEXT:    vstr d18, [r0, #16]
169; CHECK-NEXT:    vldr d18, [r2, #16]
170; CHECK-NEXT:    vmull.s16 q10, d17, d18
171; CHECK-NEXT:    vmovl.s16 q11, d18
172; CHECK-NEXT:    vmovl.u16 q8, d16
173; CHECK-NEXT:    vmovl.s16 q9, d19
174; CHECK-NEXT:    vmla.i32 q10, q8, q11
175; CHECK-NEXT:    vmovn.i32 d16, q10
176; CHECK-NEXT:    vstr d16, [r1, #16]
177; CHECK-NEXT:    add r1, r2, #16
178; CHECK-NEXT:    vld1.16 {d16}, [r1:64]
179; CHECK-NEXT:    vmovl.u16 q8, d16
180; CHECK-NEXT:    vmla.i32 q10, q8, q9
181; CHECK-NEXT:    vadd.i32 q8, q10, q9
182; CHECK-NEXT:    vmovn.i32 d16, q8
183; CHECK-NEXT:    vstr d16, [r0, #16]
184; CHECK-NEXT:    bx lr
185entry:
186; The test case trying to vectorize the pseudo code below.
187; a[i] = b[i] + c[i];
188; b[i] = aptr c[i];
189; a[i] = b[i] + aptr c[i] + a[i];
190; Checking that vector load a[i] for "a[i] = b[i] + aptr c[i] + a[i]"
191; is scheduled before the first vector store to "a[i] = b[i] + c[i]".
192; Checking that there is no vector load a[i] scheduled between the first
193; vector store to a[i] and the vector add of a[i], otherwise the load of
194; a[i] will be polluted by the first vector store to a[i].
195; This test case check that both the chain and value of the new created
196; Load SDNode are updated during lowerMUL.
197
198
199  %scevgep0 = getelementptr i16, ptr %a, i32 8
200  %vec0 = load <4 x i16>, ptr %scevgep0, align 8
201  %scevgep1 = getelementptr i16, ptr %b, i32 8
202  %vec1 = load <4 x i16>, ptr %scevgep1, align 8
203  %0 = zext <4 x i16> %vec1 to <4 x i32>
204  %scevgep2 = getelementptr i16, ptr %c, i32 8
205  %vec2 = load <4 x i16>, ptr %scevgep2, align 8
206  %1 = sext <4 x i16> %vec2 to <4 x i32>
207  %vec3 = add <4 x i32> %1, %0
208  %2 = trunc <4 x i32> %vec3 to <4 x i16>
209  %scevgep3 = getelementptr i16, ptr %a, i32 8
210  store <4 x i16> %2, ptr %scevgep3, align 8
211  %vec4 = load <4 x i16>, ptr %scevgep2, align 8
212  %3 = sext <4 x i16> %vec4 to <4 x i32>
213  %vec5 = mul <4 x i32> %3, %vec3
214  %4 = trunc <4 x i32> %vec5 to <4 x i16>
215  store <4 x i16> %4, ptr %scevgep1, align 8
216  %5 = sext <4 x i16> %vec0 to <4 x i32>
217  %vec6 = load <4 x i16>, ptr %scevgep2, align 8
218  %6 = sext <4 x i16> %vec6 to <4 x i32>
219  %vec7 = mul <4 x i32> %6, %5
220  %vec8 = add <4 x i32> %vec7, %vec5
221  %vec9 = add <4 x i32> %vec8, %5
222  %7 = trunc <4 x i32> %vec9 to <4 x i16>
223  store <4 x i16> %7, ptr %scevgep3, align 8
224  ret void
225}
226