Lines Matching full:8

4 define <8 x i16> @haddu_base(<8 x i16> %src1, <8 x i16> %src2) {
7 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
9 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
10 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
11 %add = add <8 x i32> %zextsrc1, %zextsrc2
12 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
13 %result = trunc <8 x i32> %resulti16 to <8 x i16>
14 ret <8 x i16> %result
17 define <8 x i16> @haddu_const(<8 x i16> %src1) {
20 ; CHECK-NEXT: movi v1.8h, #1
21 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
23 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
24 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
25 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
26 %result = trunc <8 x i32> %resulti16 to <8 x i16>
27 ret <8 x i16> %result
30 define <8 x i16> @haddu_const_lhs(<8 x i16> %src1) {
33 ; CHECK-NEXT: movi v1.8h, #1
34 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
36 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
37 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
38 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
39 %result = trunc <8 x i32> %resulti16 to <8 x i16>
40 ret <8 x i16> %result
43 define <8 x i16> @haddu_const_zero(<8 x i16> %src1) {
47 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
49 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
51 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
52 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
53 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
54 %result = trunc <8 x i32> %resulti16 to <8 x i16>
55 ret <8 x i16> %result
58 define <8 x i16> @haddu_const_both() {
61 ; CHECK-NEXT: movi v0.8h, #2
63 …%add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 …
64 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
65 %result = trunc <8 x i32> %resulti16 to <8 x i16>
66 ret <8 x i16> %result
69 define <8 x i16> @haddu_const_bothhigh() {
72 ; CHECK-NEXT: mvni v0.8h, #1
74 …%ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 6553…
75 …%ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 6553…
76 %add = add <8 x i32> %ext1, %ext2
77 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
78 %result = trunc <8 x i32> %resulti16 to <8 x i16>
79 ret <8 x i16> %result
82 define <8 x i16> @haddu_undef(<8 x i16> %src1) {
86 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
88 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
90 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
91 %zextsrc2 = zext <8 x i16> undef to <8 x i32>
92 %add = add <8 x i32> %zextsrc2, %zextsrc1
93 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
94 %result = trunc <8 x i32> %resulti16 to <8 x i16>
95 ret <8 x i16> %result
100 define <8 x i16> @haddu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
103 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
105 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
106 ret <8 x i16> %result
109 define <8 x i16> @haddu_i_const(<8 x i16> %src1) {
112 ; CHECK-NEXT: movi v1.8h, #1
113 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
115 …%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, …
116 ret <8 x i16> %result
119 define <8 x i16> @haddu_i_const_lhs(<8 x i16> %src1) {
122 ; CHECK-NEXT: movi v1.8h, #1
123 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
125 …%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16…
126 ret <8 x i16> %result
129 define <8 x i16> @haddu_i_const_zero(<8 x i16> %src1) {
132 ; CHECK-NEXT: ushr v0.8h, v0.8h, #1
134 …%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16…
135 ret <8 x i16> %result
138 define <8 x i16> @haddu_i_const_both() {
141 ; CHECK-NEXT: movi v0.8h, #2
143 …%result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16…
144 ret <8 x i16> %result
147 define <8 x i16> @haddu_i_const_bothhigh() {
150 ; CHECK-NEXT: mvni v0.8h, #1
152 …all <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534…
153 ret <8 x i16> %result
156 define <8 x i16> @haddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
161 %result = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
162 ret <8 x i16> %result
169 define <8 x i16> @hadds_base(<8 x i16> %src1, <8 x i16> %src2) {
172 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
174 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
175 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
176 %add = add <8 x i32> %zextsrc1, %zextsrc2
177 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
178 %result = trunc <8 x i32> %resulti16 to <8 x i16>
179 ret <8 x i16> %result
182 define <8 x i16> @hadds_const(<8 x i16> %src1) {
185 ; CHECK-NEXT: movi v1.8h, #1
186 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
188 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
189 %add = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
190 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
191 %result = trunc <8 x i32> %resulti16 to <8 x i16>
192 ret <8 x i16> %result
195 define <8 x i16> @hadds_const_lhs(<8 x i16> %src1) {
198 ; CHECK-NEXT: movi v1.8h, #1
199 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
201 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
202 %add = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
203 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
204 %result = trunc <8 x i32> %resulti16 to <8 x i16>
205 ret <8 x i16> %result
208 define <8 x i16> @hadds_const_zero(<8 x i16> %src1) {
212 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
214 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
216 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
217 %add = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
218 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
219 %result = trunc <8 x i32> %resulti16 to <8 x i16>
220 ret <8 x i16> %result
223 define <8 x i16> @hadds_const_both() {
226 ; CHECK-NEXT: movi v0.8h, #2
228 …%add = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32 …
229 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
230 %result = trunc <8 x i32> %resulti16 to <8 x i16>
231 ret <8 x i16> %result
234 define <8 x i16> @hadds_const_bothhigh() {
238 ; CHECK-NEXT: dup v0.8h, w8
240 …%ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 3276…
241 …%ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 3276…
242 %add = add <8 x i32> %ext1, %ext2
243 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
244 %result = trunc <8 x i32> %resulti16 to <8 x i16>
245 ret <8 x i16> %result
248 define <8 x i16> @hadds_undef(<8 x i16> %src1) {
252 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0
254 ; CHECK-NEXT: shrn2 v0.8h, v2.4s, #1
256 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
257 %zextsrc2 = sext <8 x i16> undef to <8 x i32>
258 %add = add <8 x i32> %zextsrc2, %zextsrc1
259 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
260 %result = trunc <8 x i32> %resulti16 to <8 x i16>
261 ret <8 x i16> %result
266 define <8 x i16> @hadds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
269 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
271 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
272 ret <8 x i16> %result
275 define <8 x i16> @hadds_i_const(<8 x i16> %src1) {
278 ; CHECK-NEXT: movi v1.8h, #1
279 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
281 …%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1, …
282 ret <8 x i16> %result
285 define <8 x i16> @hadds_i_const_lhs(<8 x i16> %src1) {
288 ; CHECK-NEXT: movi v1.8h, #1
289 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
291 …%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16…
292 ret <8 x i16> %result
295 define <8 x i16> @hadds_i_const_zero(<8 x i16> %src1) {
298 ; CHECK-NEXT: sshr v0.8h, v0.8h, #1
300 …%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16…
301 ret <8 x i16> %result
304 define <8 x i16> @hadds_i_const_both() {
307 ; CHECK-NEXT: movi v0.8h, #2
309 …%result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i16…
310 ret <8 x i16> %result
313 define <8 x i16> @hadds_i_const_bothhigh() {
317 ; CHECK-NEXT: dup v0.8h, w8
319 …all <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766…
320 ret <8 x i16> %result
323 define <8 x i16> @hadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
328 %result = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
329 ret <8 x i16> %result
332 define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
335 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
337 %or = or <8 x i16> %a0, %a1
338 %xor = xor <8 x i16> %a0, %a1
339 %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
340 %res = sub <8 x i16> %or, %srl
341 ret <8 x i16> %res
344 define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
347 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
349 %or = or <8 x i16> %a0, %a1
350 %xor = xor <8 x i16> %a0, %a1
351 %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
352 %res = sub <8 x i16> %or, %srl
353 ret <8 x i16> %res
356 define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
359 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
361 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
362 %zextsrc2 = zext <8 x i16> %src2 to <8 x i32>
363 %add1 = add <8 x i32> %zextsrc1, %zextsrc2
364 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
365 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
366 %result = trunc <8 x i32> %resulti16 to <8 x i16>
367 ret <8 x i16> %result
370 define <8 x i16> @rhaddu_const(<8 x i16> %src1) {
373 ; CHECK-NEXT: movi v1.8h, #1
374 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
376 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
377 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
378 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
379 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
380 %result = trunc <8 x i32> %resulti16 to <8 x i16>
381 ret <8 x i16> %result
384 define <8 x i16> @rhaddu_const_lhs(<8 x i16> %src1) {
387 ; CHECK-NEXT: movi v1.8h, #1
388 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
390 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
391 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
392 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
393 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
394 %result = trunc <8 x i32> %resulti16 to <8 x i16>
395 ret <8 x i16> %result
398 define <8 x i16> @rhaddu_const_zero(<8 x i16> %src1) {
401 ; CHECK-NEXT: movi v1.8h, #1
402 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
404 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
405 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
406 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
407 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
408 %result = trunc <8 x i32> %resulti16 to <8 x i16>
409 ret <8 x i16> %result
412 define <8 x i16> @rhaddu_const_both() {
415 ; CHECK-NEXT: movi v0.8h, #2
417 …%add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32…
418 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
419 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
420 %result = trunc <8 x i32> %resulti16 to <8 x i16>
421 ret <8 x i16> %result
424 define <8 x i16> @rhaddu_const_bothhigh() {
429 …%ext1 = zext <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 6553…
430 …%ext2 = zext <8 x i16> <i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 65535, i16 6553…
431 %add1 = add <8 x i32> %ext1, %ext2
432 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
433 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
434 %result = trunc <8 x i32> %resulti16 to <8 x i16>
435 ret <8 x i16> %result
438 define <8 x i16> @rhaddu_undef(<8 x i16> %src1) {
441 ; CHECK-NEXT: movi v1.8h, #1
442 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
444 %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
445 %zextsrc2 = zext <8 x i16> undef to <8 x i32>
446 %add1 = add <8 x i32> %zextsrc2, %zextsrc1
447 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
448 %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
449 %result = trunc <8 x i32> %resulti16 to <8 x i16>
450 ret <8 x i16> %result
455 define <8 x i16> @rhaddu_i_base(<8 x i16> %src1, <8 x i16> %src2) {
458 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
460 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
461 ret <8 x i16> %result
464 define <8 x i16> @rhaddu_i_const(<8 x i16> %src1) {
467 ; CHECK-NEXT: movi v1.8h, #1
468 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
470 …%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1,…
471 ret <8 x i16> %result
474 define <8 x i16> @rhaddu_i_const_lhs(<8 x i16> %src1) {
477 ; CHECK-NEXT: movi v1.8h, #1
478 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
480 …%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i1…
481 ret <8 x i16> %result
484 define <8 x i16> @rhaddu_i_const_zero(<8 x i16> %src1) {
488 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
490 …%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i1…
491 ret <8 x i16> %result
494 define <8 x i16> @rhaddu_i_const_both() {
497 ; CHECK-NEXT: movi v0.8h, #2
499 …%result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i1…
500 ret <8 x i16> %result
503 define <8 x i16> @rhaddu_i_const_bothhigh() {
508 …all <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> <i16 65534, i16 65534, i16 65534, i16 6553…
509 ret <8 x i16> %result
512 define <8 x i16> @rhaddu_i_undef(<8 x i16> %t, <8 x i16> %src1) {
517 %result = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
518 ret <8 x i16> %result
525 define <8 x i16> @rhadds_base(<8 x i16> %src1, <8 x i16> %src2) {
528 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
530 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
531 %zextsrc2 = sext <8 x i16> %src2 to <8 x i32>
532 %add1 = add <8 x i32> %zextsrc1, %zextsrc2
533 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
534 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
535 %result = trunc <8 x i32> %resulti16 to <8 x i16>
536 ret <8 x i16> %result
539 define <8 x i16> @rhadds_const(<8 x i16> %src1) {
542 ; CHECK-NEXT: movi v1.8h, #1
543 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
545 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
546 %add1 = add <8 x i32> %zextsrc1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
547 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
548 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
549 %result = trunc <8 x i32> %resulti16 to <8 x i16>
550 ret <8 x i16> %result
553 define <8 x i16> @rhadds_const_lhs(<8 x i16> %src1) {
556 ; CHECK-NEXT: movi v1.8h, #1
557 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
559 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
560 %add1 = add <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %zextsrc1
561 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
562 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
563 %result = trunc <8 x i32> %resulti16 to <8 x i16>
564 ret <8 x i16> %result
567 define <8 x i16> @rhadds_const_zero(<8 x i16> %src1) {
570 ; CHECK-NEXT: movi v1.8h, #1
571 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
573 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
574 %add1 = add <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, %zextsrc1
575 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
576 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
577 %result = trunc <8 x i32> %resulti16 to <8 x i16>
578 ret <8 x i16> %result
581 define <8 x i16> @rhadds_const_both() {
584 ; CHECK-NEXT: movi v0.8h, #2
586 …%add1 = add <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <i32 1, i32 1, i32…
587 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
588 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
589 %result = trunc <8 x i32> %resulti16 to <8 x i16>
590 ret <8 x i16> %result
593 define <8 x i16> @rhadds_const_bothhigh() {
596 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
598 …%ext1 = sext <8 x i16> <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 3276…
599 …%ext2 = sext <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 3276…
600 %add1 = add <8 x i32> %ext1, %ext2
601 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
602 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
603 %result = trunc <8 x i32> %resulti16 to <8 x i16>
604 ret <8 x i16> %result
607 define <8 x i16> @rhadds_undef(<8 x i16> %src1) {
610 ; CHECK-NEXT: movi v1.8h, #1
611 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
613 %zextsrc1 = sext <8 x i16> %src1 to <8 x i32>
614 %zextsrc2 = sext <8 x i16> undef to <8 x i32>
615 %add1 = add <8 x i32> %zextsrc2, %zextsrc1
616 %add = add <8 x i32> %add1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
617 %resulti16 = ashr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
618 %result = trunc <8 x i32> %resulti16 to <8 x i16>
619 ret <8 x i16> %result
624 define <8 x i16> @rhadds_i_base(<8 x i16> %src1, <8 x i16> %src2) {
627 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
629 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> %src2)
630 ret <8 x i16> %result
633 define <8 x i16> @rhadds_i_const(<8 x i16> %src1) {
636 ; CHECK-NEXT: movi v1.8h, #1
637 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
639 …%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %src1, <8 x i16> <i16 1, i16 1,…
640 ret <8 x i16> %result
643 define <8 x i16> @rhadds_i_const_lhs(<8 x i16> %src1) {
646 ; CHECK-NEXT: movi v1.8h, #1
647 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
649 …%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i1…
650 ret <8 x i16> %result
653 define <8 x i16> @rhadds_i_const_zero(<8 x i16> %src1) {
657 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
659 …%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i1…
660 ret <8 x i16> %result
663 define <8 x i16> @rhadds_i_const_both() {
666 ; CHECK-NEXT: movi v0.8h, #2
668 …%result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 3, i16 3, i16 3, i16 3, i1…
669 ret <8 x i16> %result
672 define <8 x i16> @rhadds_i_const_bothhigh() {
675 ; CHECK-NEXT: mvni v0.8h, #128, lsl #8
677 …all <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> <i16 32766, i16 32766, i16 32766, i16 3276…
678 ret <8 x i16> %result
681 define <8 x i16> @rhadds_i_undef(<8 x i16> %t, <8 x i16> %src1) {
686 %result = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> undef, <8 x i16> %src1)
687 ret <8 x i16> %result
691 define <8 x i8> @shadd_v8i8(<8 x i8> %x) {
695 %r = tail call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %x, <8 x i8> %x)
696 ret <8 x i8> %r
723 define <8 x i16> @shadd_v8i16(<8 x i16> %x) {
727 %r = tail call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %x, <8 x i16> %x)
728 ret <8 x i16> %r
739 define <8 x i8> @uhadd_v8i8(<8 x i8> %x) {
743 %r = tail call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
744 ret <8 x i8> %r
771 define <8 x i16> @uhadd_v8i16(<8 x i16> %x) {
775 %r = tail call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
776 ret <8 x i16> %r
786 define <8 x i8> @srhadd_v8i8(<8 x i8> %x) {
790 %r = tail call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
791 ret <8 x i8> %r
818 define <8 x i16> @srhadd_v8i16(<8 x i16> %x) {
822 %r = tail call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
823 ret <8 x i16> %r
834 define <8 x i8> @urhadd_v8i8(<8 x i8> %x) {
838 %r = tail call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %x, <8 x i8> %x)
839 ret <8 x i8> %r
866 define <8 x i16> @urhadd_v8i16(<8 x i16> %x) {
870 %r = tail call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %x, <8 x i16> %x)
871 ret <8 x i16> %r
882 define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
885 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
887 %and = and <8 x i16> %a0, %a1
888 %xor = xor <8 x i16> %a0, %a1
889 %srl = lshr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
890 %res = add <8 x i16> %and, %srl
891 ret <8 x i16> %res
894 define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
897 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
899 %and = and <8 x i16> %a0, %a1
900 %xor = xor <8 x i16> %a0, %a1
901 %srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
902 %res = add <8 x i16> %and, %srl
903 ret <8 x i16> %res
906 define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
909 ; CHECK-NEXT: dup v0.8h, v0.h[0]
910 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
911 ; CHECK-NEXT: dup v0.8h, v0.h[0]
913 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
914 %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
915 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
916 ret <8 x i16> %r0
919 define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
922 ; CHECK-NEXT: dup v0.8h, v0.h[0]
923 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
924 ; CHECK-NEXT: dup v0.8h, v0.h[0]
926 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
927 %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
928 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
929 ret <8 x i16> %r0
932 define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
935 ; CHECK-NEXT: dup v0.8h, v0.h[0]
936 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h
937 ; CHECK-NEXT: dup v0.8h, v0.h[0]
939 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
940 %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
941 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
942 ret <8 x i16> %r0
945 define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
948 ; CHECK-NEXT: dup v0.8h, v0.h[0]
949 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h
950 ; CHECK-NEXT: dup v0.8h, v0.h[0]
952 %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
953 %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1)
954 %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer
955 ret <8 x i16> %r0
1023 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
1026 declare <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>)
1030 declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>)
1033 declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>)
1036 declare <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>)
1039 declare <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>)
1043 declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
1046 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)