xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll (revision f6ace2bc15bfde4cc9bd140859fa92618568a006)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64
3; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6
7target triple = "aarch64-unknown-linux-gnu"
8
9define half @fadda_v4f16(half %start, <4 x half> %a) {
10; FA64-LABEL: fadda_v4f16:
11; FA64:       // %bb.0:
12; FA64-NEXT:    ptrue p0.h, vl4
13; FA64-NEXT:    // kill: def $h0 killed $h0 def $z0
14; FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
15; FA64-NEXT:    fadda h0, p0, h0, z1.h
16; FA64-NEXT:    // kill: def $h0 killed $h0 killed $z0
17; FA64-NEXT:    ret
18;
19; NO-FA64-LABEL: fadda_v4f16:
20; NO-FA64:       // %bb.0:
21; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
22; NO-FA64-NEXT:    fadd h0, h0, h1
23; NO-FA64-NEXT:    mov z2.h, z1.h[1]
24; NO-FA64-NEXT:    fadd h0, h0, h2
25; NO-FA64-NEXT:    mov z2.h, z1.h[2]
26; NO-FA64-NEXT:    mov z1.h, z1.h[3]
27; NO-FA64-NEXT:    fadd h0, h0, h2
28; NO-FA64-NEXT:    fadd h0, h0, h1
29; NO-FA64-NEXT:    ret
30;
31; NONEON-NOSVE-LABEL: fadda_v4f16:
32; NONEON-NOSVE:       // %bb.0:
33; NONEON-NOSVE-NEXT:    sub sp, sp, #16
34; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
35; NONEON-NOSVE-NEXT:    str d1, [sp, #8]
36; NONEON-NOSVE-NEXT:    fcvt s0, h0
37; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
38; NONEON-NOSVE-NEXT:    fcvt s1, h1
39; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
40; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
41; NONEON-NOSVE-NEXT:    fcvt s1, h1
42; NONEON-NOSVE-NEXT:    fcvt h0, s0
43; NONEON-NOSVE-NEXT:    fcvt s0, h0
44; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
45; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
46; NONEON-NOSVE-NEXT:    fcvt s1, h1
47; NONEON-NOSVE-NEXT:    fcvt h0, s0
48; NONEON-NOSVE-NEXT:    fcvt s0, h0
49; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
50; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
51; NONEON-NOSVE-NEXT:    fcvt s1, h1
52; NONEON-NOSVE-NEXT:    fcvt h0, s0
53; NONEON-NOSVE-NEXT:    fcvt s0, h0
54; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
55; NONEON-NOSVE-NEXT:    fcvt h0, s0
56; NONEON-NOSVE-NEXT:    add sp, sp, #16
57; NONEON-NOSVE-NEXT:    ret
58  %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
59  ret half %res
60}
61
62declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
63