xref: /llvm-project/llvm/test/CodeGen/AArch64/avg.ll (revision 28d071803387b6b5c2ba6bc8321aa8e35168167a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
3
4define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
5; CHECK-LABEL: zext_avgflooru:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    uhadd v0.16b, v0.16b, v1.16b
8; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
9; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
10; CHECK-NEXT:    ret
11  %x0 = zext <16 x i8> %a0 to <16 x i16>
12  %x1 = zext <16 x i8> %a1 to <16 x i16>
13  %and = and <16 x i16> %x0, %x1
14  %xor = xor <16 x i16> %x0, %x1
15  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
16  %avg = add <16 x i16> %and, %shift
17  ret <16 x i16> %avg
18}
19
20define <16 x i16> @zext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
21; CHECK-LABEL: zext_avgflooru_mismatch:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    movi v2.16b, #15
24; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
25; CHECK-NEXT:    uhadd v0.16b, v0.16b, v1.16b
26; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
27; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
28; CHECK-NEXT:    ret
29  %x0 = zext <16 x i8> %a0 to <16 x i16>
30  %x1 = zext <16 x i4> %a1 to <16 x i16>
31  %and = and <16 x i16> %x0, %x1
32  %xor = xor <16 x i16> %x0, %x1
33  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
34  %avg = add <16 x i16> %and, %shift
35  ret <16 x i16> %avg
36}
37
38define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
39; CHECK-LABEL: zext_avgceilu:
40; CHECK:       // %bb.0:
41; CHECK-NEXT:    urhadd v0.16b, v0.16b, v1.16b
42; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
43; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
44; CHECK-NEXT:    ret
45  %x0 = zext <16 x i8> %a0 to <16 x i16>
46  %x1 = zext <16 x i8> %a1 to <16 x i16>
47  %or = or <16 x i16> %x0, %x1
48  %xor = xor <16 x i16> %x0, %x1
49  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
50  %avg = sub <16 x i16> %or, %shift
51  ret <16 x i16> %avg
52}
53
54define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
55; CHECK-LABEL: zext_avgceilu_mismatch:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    movi v2.16b, #15
58; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
59; CHECK-NEXT:    urhadd v0.16b, v0.16b, v1.16b
60; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
61; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
62; CHECK-NEXT:    ret
63  %x0 = zext <16 x i4> %a0 to <16 x i16>
64  %x1 = zext <16 x i8> %a1 to <16 x i16>
65  %or = or <16 x i16> %x0, %x1
66  %xor = xor <16 x i16> %x0, %x1
67  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
68  %avg = sub <16 x i16> %or, %shift
69  ret <16 x i16> %avg
70}
71
72define <16 x i16> @sext_avgfloors(<16 x i8> %a0, <16 x i8> %a1) {
73; CHECK-LABEL: sext_avgfloors:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    shadd v0.16b, v0.16b, v1.16b
76; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
77; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
78; CHECK-NEXT:    ret
79  %x0 = sext <16 x i8> %a0 to <16 x i16>
80  %x1 = sext <16 x i8> %a1 to <16 x i16>
81  %and = and <16 x i16> %x0, %x1
82  %xor = xor <16 x i16> %x0, %x1
83  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
84  %avg = add <16 x i16> %and, %shift
85  ret <16 x i16> %avg
86}
87
88define <16 x i16> @sext_avgfloors_mismatch(<16 x i8> %a0, <16 x i4> %a1) {
89; CHECK-LABEL: sext_avgfloors_mismatch:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    ushll2 v2.8h, v1.16b, #0
92; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
93; CHECK-NEXT:    sshll v3.8h, v0.8b, #0
94; CHECK-NEXT:    sshll2 v0.8h, v0.16b, #0
95; CHECK-NEXT:    shl v1.8h, v1.8h, #12
96; CHECK-NEXT:    shl v2.8h, v2.8h, #12
97; CHECK-NEXT:    sshr v4.8h, v1.8h, #12
98; CHECK-NEXT:    sshr v1.8h, v2.8h, #12
99; CHECK-NEXT:    shadd v1.8h, v0.8h, v1.8h
100; CHECK-NEXT:    shadd v0.8h, v3.8h, v4.8h
101; CHECK-NEXT:    ret
102  %x0 = sext <16 x i8> %a0 to <16 x i16>
103  %x1 = sext <16 x i4> %a1 to <16 x i16>
104  %and = and <16 x i16> %x0, %x1
105  %xor = xor <16 x i16> %x0, %x1
106  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
107  %avg = add <16 x i16> %and, %shift
108  ret <16 x i16> %avg
109}
110
111define <16 x i16> @sext_avgceils(<16 x i8> %a0, <16 x i8> %a1) {
112; CHECK-LABEL: sext_avgceils:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    srhadd v0.16b, v0.16b, v1.16b
115; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
116; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
117; CHECK-NEXT:    ret
118  %x0 = sext <16 x i8> %a0 to <16 x i16>
119  %x1 = sext <16 x i8> %a1 to <16 x i16>
120  %or = or <16 x i16> %x0, %x1
121  %xor = xor <16 x i16> %x0, %x1
122  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
123  %avg = sub <16 x i16> %or, %shift
124  ret <16 x i16> %avg
125}
126
127define <16 x i16> @sext_avgceils_mismatch(<16 x i4> %a0, <16 x i8> %a1) {
128; CHECK-LABEL: sext_avgceils_mismatch:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    ushll v2.8h, v0.8b, #0
131; CHECK-NEXT:    ushll2 v0.8h, v0.16b, #0
132; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
133; CHECK-NEXT:    sshll2 v1.8h, v1.16b, #0
134; CHECK-NEXT:    shl v2.8h, v2.8h, #12
135; CHECK-NEXT:    shl v0.8h, v0.8h, #12
136; CHECK-NEXT:    sshr v2.8h, v2.8h, #12
137; CHECK-NEXT:    sshr v0.8h, v0.8h, #12
138; CHECK-NEXT:    srhadd v1.8h, v0.8h, v1.8h
139; CHECK-NEXT:    srhadd v0.8h, v2.8h, v3.8h
140; CHECK-NEXT:    ret
141  %x0 = sext <16 x i4> %a0 to <16 x i16>
142  %x1 = sext <16 x i8> %a1 to <16 x i16>
143  %or = or <16 x i16> %x0, %x1
144  %xor = xor <16 x i16> %x0, %x1
145  %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
146  %avg = sub <16 x i16> %or, %shift
147  ret <16 x i16> %avg
148}
149
150define <8 x i16> @add_avgflooru(<8 x i16> %a0, <8 x i16> %a1) {
151; CHECK-LABEL: add_avgflooru:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
154; CHECK-NEXT:    ret
155  %add = add nuw <8 x i16> %a0, %a1
156  %avg = lshr <8 x i16> %add, splat(i16 1)
157  ret <8 x i16> %avg
158}
159
160define <8 x i16> @add_avgflooru_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
161; CHECK-LABEL: add_avgflooru_mismatch:
162; CHECK:       // %bb.0:
163; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
164; CHECK-NEXT:    ushr v0.8h, v0.8h, #1
165; CHECK-NEXT:    ret
166  %add = add <8 x i16> %a0, %a1
167  %avg = lshr <8 x i16> %add, splat(i16 1)
168  ret <8 x i16> %avg
169}
170
171define <8 x i16> @add_avgceilu(<8 x i16> %a0, <8 x i16> %a1) {
172; CHECK-LABEL: add_avgceilu:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    urhadd v0.8h, v0.8h, v1.8h
175; CHECK-NEXT:    ret
176  %add0 = add nuw <8 x i16> %a0, splat(i16 1)
177  %add = add nuw <8 x i16> %a1, %add0
178  %avg = lshr <8 x i16> %add, splat(i16 1)
179  ret <8 x i16> %avg
180}
181
182define <8 x i16> @add_avgceilu2(<8 x i16> %a0, <8 x i16> %a1) {
183; CHECK-LABEL: add_avgceilu2:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    urhadd v0.8h, v1.8h, v0.8h
186; CHECK-NEXT:    ret
187  %add0 = add nuw <8 x i16> %a1, %a0
188  %add = add nuw <8 x i16> %add0, splat(i16 1)
189  %avg = lshr <8 x i16> %add, splat(i16 1)
190  ret <8 x i16> %avg
191}
192
193define <8 x i16> @add_avgceilu_mismatch1(<8 x i16> %a0, <8 x i16> %a1) {
194; CHECK-LABEL: add_avgceilu_mismatch1:
195; CHECK:       // %bb.0:
196; CHECK-NEXT:    movi v2.8h, #1
197; CHECK-NEXT:    add v0.8h, v1.8h, v0.8h
198; CHECK-NEXT:    uhadd v0.8h, v0.8h, v2.8h
199; CHECK-NEXT:    ret
200  %add0 = add <8 x i16> %a1, %a0
201  %add = add nuw <8 x i16> %add0, splat(i16 1)
202  %avg = lshr <8 x i16> %add, splat(i16 1)
203  ret <8 x i16> %avg
204}
205
206define <8 x i16> @add_avgceilu_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
207; CHECK-LABEL: add_avgceilu_mismatch2:
208; CHECK:       // %bb.0:
209; CHECK-NEXT:    mvn v1.16b, v1.16b
210; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
211; CHECK-NEXT:    ushr v0.8h, v0.8h, #1
212; CHECK-NEXT:    ret
213  %add0 = add nuw <8 x i16> %a1, %a0
214  %add = add <8 x i16> %add0, splat(i16 1)
215  %avg = lshr <8 x i16> %add, splat(i16 1)
216  ret <8 x i16> %avg
217}
218
219define <8 x i16> @add_avgceilu_mismatch3(<8 x i16> %a0, <8 x i16> %a1) {
220; CHECK-LABEL: add_avgceilu_mismatch3:
221; CHECK:       // %bb.0:
222; CHECK-NEXT:    mvn v1.16b, v1.16b
223; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
224; CHECK-NEXT:    ushr v0.8h, v0.8h, #1
225; CHECK-NEXT:    ret
226  %add0 = add nuw <8 x i16> %a1, %a0
227  %add = add <8 x i16> %add0, splat(i16 1)
228  %avg = lshr <8 x i16> %add, splat(i16 1)
229  ret <8 x i16> %avg
230}
231
232define <8 x i16> @add_avgfloors(<8 x i16> %a0, <8 x i16> %a1) {
233; CHECK-LABEL: add_avgfloors:
234; CHECK:       // %bb.0:
235; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
236; CHECK-NEXT:    ret
237  %add = add nsw <8 x i16> %a0, %a1
238  %avg = ashr <8 x i16> %add, splat(i16 1)
239  ret <8 x i16> %avg
240}
241
242define <8 x i16> @add_avgfloors_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
243; CHECK-LABEL: add_avgfloors_mismatch:
244; CHECK:       // %bb.0:
245; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
246; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
247; CHECK-NEXT:    ret
248  %add = add <8 x i16> %a0, %a1
249  %avg = ashr <8 x i16> %add, splat(i16 1)
250  ret <8 x i16> %avg
251}
252
253define <8 x i16> @add_avgfoor_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
254; CHECK-LABEL: add_avgfoor_mismatch2:
255; CHECK:       // %bb.0:
256; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
257; CHECK-NEXT:    sshr v0.8h, v0.8h, #2
258; CHECK-NEXT:    ret
259  %add = add nsw <8 x i16> %a0, %a1
260  %avg = ashr <8 x i16> %add, splat(i16 2)
261  ret <8 x i16> %avg
262}
263
264define <8 x i16> @add_avgceils(<8 x i16> %a0, <8 x i16> %a1) {
265; CHECK-LABEL: add_avgceils:
266; CHECK:       // %bb.0:
267; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
268; CHECK-NEXT:    ret
269  %add0 = add nsw <8 x i16> %a0, splat(i16 1)
270  %add = add nsw <8 x i16> %a1, %add0
271  %avg = ashr <8 x i16> %add, splat(i16 1)
272  ret <8 x i16> %avg
273}
274
275define <8 x i16> @add_avgceils2(<8 x i16> %a0, <8 x i16> %a1) {
276; CHECK-LABEL: add_avgceils2:
277; CHECK:       // %bb.0:
278; CHECK-NEXT:    srhadd v0.8h, v1.8h, v0.8h
279; CHECK-NEXT:    ret
280  %add0 = add nsw <8 x i16> %a1, %a0
281  %add = add nsw <8 x i16> %add0, splat(i16 1)
282  %avg = ashr <8 x i16> %add, splat(i16 1)
283  ret <8 x i16> %avg
284}
285
286define <8 x i16> @add_avgceils_mismatch1(<8 x i16> %a0, <8 x i16> %a1) {
287; CHECK-LABEL: add_avgceils_mismatch1:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    movi v2.8h, #1
290; CHECK-NEXT:    add v0.8h, v1.8h, v0.8h
291; CHECK-NEXT:    shadd v0.8h, v0.8h, v2.8h
292; CHECK-NEXT:    ret
293  %add0 = add <8 x i16> %a1, %a0
294  %add = add nsw <8 x i16> %add0, splat(i16 1)
295  %avg = ashr <8 x i16> %add, splat(i16 1)
296  ret <8 x i16> %avg
297}
298
299define <8 x i16> @add_avgceils_mismatch2(<8 x i16> %a0, <8 x i16> %a1) {
300; CHECK-LABEL: add_avgceils_mismatch2:
301; CHECK:       // %bb.0:
302; CHECK-NEXT:    mvn v1.16b, v1.16b
303; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
304; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
305; CHECK-NEXT:    ret
306  %add0 = add nsw <8 x i16> %a1, %a0
307  %add = add <8 x i16> %add0, splat(i16 1)
308  %avg = ashr <8 x i16> %add, splat(i16 1)
309  ret <8 x i16> %avg
310}
311
312define <8 x i16> @add_avgceils_mismatch3(<8 x i16> %a0, <8 x i16> %a1) {
313; CHECK-LABEL: add_avgceils_mismatch3:
314; CHECK:       // %bb.0:
315; CHECK-NEXT:    mvn v1.16b, v1.16b
316; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
317; CHECK-NEXT:    sshr v0.8h, v0.8h, #1
318; CHECK-NEXT:    ret
319  %add0 = add nsw <8 x i16> %a1, %a0
320  %add = add <8 x i16> %add0, splat(i16 1)
321  %avg = ashr <8 x i16> %add, splat(i16 1)
322  ret <8 x i16> %avg
323}
324
325define <8 x i16> @add_avgceils_mismatch4(<8 x i16> %a0, <8 x i16> %a1) {
326; CHECK-LABEL: add_avgceils_mismatch4:
327; CHECK:       // %bb.0:
328; CHECK-NEXT:    mvn v0.16b, v0.16b
329; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
330; CHECK-NEXT:    sshr v0.8h, v0.8h, #2
331; CHECK-NEXT:    ret
332  %add0 = add nsw <8 x i16> %a0, splat(i16 1)
333  %add = add nsw <8 x i16> %a1, %add0
334  %avg = ashr <8 x i16> %add, splat(i16 2)
335  ret <8 x i16> %avg
336}
337
338define <8 x i16> @add_avgceilu_mismatch(<8 x i16> %a0, <8 x i16> %a1) {
339; CHECK-LABEL: add_avgceilu_mismatch:
340; CHECK:       // %bb.0:
341; CHECK-NEXT:    movi v2.8h, #1
342; CHECK-NEXT:    add v0.8h, v1.8h, v0.8h
343; CHECK-NEXT:    add v0.8h, v0.8h, v2.8h
344; CHECK-NEXT:    ushr v0.8h, v0.8h, #2
345; CHECK-NEXT:    ret
346  %add0 = add nuw <8 x i16> %a1, %a0
347  %add = add nuw <8 x i16> %add0, splat(i16 1)
348  %avg = lshr <8 x i16> %add, splat(i16 2)
349  ret <8 x i16> %avg
350}
351