xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll (revision 365aa1574a1b4a3cdee6648227d095d00536ffde)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
3
4define i16 @test_add_zext_v8i16(<8 x i8> %a, <8 x i8> %b) local_unnamed_addr #0 {
5; CHECK-LABEL: test_add_zext_v8i16:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
8; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
9; CHECK-NEXT:    mov v0.d[1], v1.d[0]
10; CHECK-NEXT:    uaddlv h0, v0.16b
11; CHECK-NEXT:    umov w0, v0.h[0]
12; CHECK-NEXT:    ret
13  %z1 = zext <8 x i8> %a to <8 x i16>
14  %z2 = zext <8 x i8> %b to <8 x i16>
15  %z = add <8 x i16> %z1, %z2
16  %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %z)
17  ret i16 %r
18}
19
20define i32 @test_add_zext_v4i32(<4 x i16> %a, <4 x i16> %b) local_unnamed_addr #0 {
21; CHECK-LABEL: test_add_zext_v4i32:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
24; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
25; CHECK-NEXT:    mov v0.d[1], v1.d[0]
26; CHECK-NEXT:    uaddlv s0, v0.8h
27; CHECK-NEXT:    fmov w0, s0
28; CHECK-NEXT:    ret
29  %z1 = zext <4 x i16> %a to <4 x i32>
30  %z2 = zext <4 x i16> %b to <4 x i32>
31  %z = add <4 x i32> %z1, %z2
32  %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %z)
33  ret i32 %r
34}
35
36define i64 @test_add_zext_v2i64(<2 x i32> %a, <2 x i32> %b) local_unnamed_addr #0 {
37; CHECK-LABEL: test_add_zext_v2i64:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
40; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
41; CHECK-NEXT:    mov v0.d[1], v1.d[0]
42; CHECK-NEXT:    uaddlv d0, v0.4s
43; CHECK-NEXT:    fmov x0, d0
44; CHECK-NEXT:    ret
45  %z1 = zext <2 x i32> %a to <2 x i64>
46  %z2 = zext <2 x i32> %b to <2 x i64>
47  %z = add <2 x i64> %z1, %z2
48  %r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %z)
49  ret i64 %r
50}
51
52declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
53declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
54declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
55