xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll (revision 1015f51dd94a6154df7183004743e1a86e566858)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve < %s | FileCheck %s
3
4; Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile!
5; RUN: not --crash llc -mattr=+sve -force-streaming-compatible < %s
6
7target triple = "aarch64-linux-gnu"
8
9; FADD
10
11define half @fadda_nxv2f16(half %init, <vscale x 2 x half> %a) {
12; CHECK-LABEL: fadda_nxv2f16:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    ptrue p0.d
15; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
16; CHECK-NEXT:    fadda h0, p0, h0, z1.h
17; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
18; CHECK-NEXT:    ret
19  %res = call half @llvm.vector.reduce.fadd.nxv2f16(half %init, <vscale x 2 x half> %a)
20  ret half %res
21}
22
23define half @fadda_nxv4f16(half %init, <vscale x 4 x half> %a) {
24; CHECK-LABEL: fadda_nxv4f16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    ptrue p0.s
27; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
28; CHECK-NEXT:    fadda h0, p0, h0, z1.h
29; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
30; CHECK-NEXT:    ret
31  %res = call half @llvm.vector.reduce.fadd.nxv4f16(half %init, <vscale x 4 x half> %a)
32  ret half %res
33}
34
35define half @fadda_nxv8f16(half %init, <vscale x 8 x half> %a) {
36; CHECK-LABEL: fadda_nxv8f16:
37; CHECK:       // %bb.0:
38; CHECK-NEXT:    ptrue p0.h
39; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
40; CHECK-NEXT:    fadda h0, p0, h0, z1.h
41; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
42; CHECK-NEXT:    ret
43  %res = call half @llvm.vector.reduce.fadd.nxv8f16(half %init, <vscale x 8 x half> %a)
44  ret half %res
45}
46
47define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
48; CHECK-LABEL: fadda_nxv6f16:
49; CHECK:       // %bb.0:
50; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
51; CHECK-NEXT:    addvl sp, sp, #-1
52; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
53; CHECK-NEXT:    .cfi_offset w29, -16
54; CHECK-NEXT:    mov w8, #32768 // =0x8000
55; CHECK-NEXT:    ptrue p0.h
56; CHECK-NEXT:    mov z2.h, w8
57; CHECK-NEXT:    ptrue p1.d
58; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
59; CHECK-NEXT:    fmov s0, s1
60; CHECK-NEXT:    st1h { z2.d }, p1, [sp, #3, mul vl]
61; CHECK-NEXT:    ld1h { z2.h }, p0/z, [sp]
62; CHECK-NEXT:    fadda h0, p0, h0, z2.h
63; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
64; CHECK-NEXT:    addvl sp, sp, #1
65; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
66; CHECK-NEXT:    ret
67  %res = call half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v)
68  ret half %res
69}
70
71define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
72; CHECK-LABEL: fadda_nxv10f16:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
75; CHECK-NEXT:    addvl sp, sp, #-3
76; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
77; CHECK-NEXT:    .cfi_offset w29, -16
78; CHECK-NEXT:    ptrue p0.h
79; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
80; CHECK-NEXT:    mov w8, #32768 // =0x8000
81; CHECK-NEXT:    ptrue p1.d
82; CHECK-NEXT:    fadda h2, p0, h2, z0.h
83; CHECK-NEXT:    st1h { z1.h }, p0, [sp]
84; CHECK-NEXT:    mov z0.h, w8
85; CHECK-NEXT:    addvl x8, sp, #1
86; CHECK-NEXT:    st1h { z0.d }, p1, [sp, #1, mul vl]
87; CHECK-NEXT:    ld1h { z1.h }, p0/z, [sp]
88; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #1, mul vl]
89; CHECK-NEXT:    st1h { z0.d }, p1, [sp, #6, mul vl]
90; CHECK-NEXT:    ld1h { z1.h }, p0/z, [sp, #1, mul vl]
91; CHECK-NEXT:    st1h { z1.h }, p0, [sp, #2, mul vl]
92; CHECK-NEXT:    st1h { z0.d }, p1, [x8, #7, mul vl]
93; CHECK-NEXT:    ld1h { z0.h }, p0/z, [sp, #2, mul vl]
94; CHECK-NEXT:    fadda h2, p0, h2, z0.h
95; CHECK-NEXT:    fmov s0, s2
96; CHECK-NEXT:    addvl sp, sp, #3
97; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
98; CHECK-NEXT:    ret
99  %res = call half @llvm.vector.reduce.fadd.nxv10f16(half %s, <vscale x 10 x half> %v)
100  ret half %res
101}
102
103define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
104; CHECK-LABEL: fadda_nxv12f16:
105; CHECK:       // %bb.0:
106; CHECK-NEXT:    ptrue p0.h
107; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
108; CHECK-NEXT:    mov w8, #32768 // =0x8000
109; CHECK-NEXT:    fadda h2, p0, h2, z0.h
110; CHECK-NEXT:    uunpklo z0.s, z1.h
111; CHECK-NEXT:    mov z1.h, w8
112; CHECK-NEXT:    uzp1 z0.h, z0.h, z1.h
113; CHECK-NEXT:    fadda h2, p0, h2, z0.h
114; CHECK-NEXT:    fmov s0, s2
115; CHECK-NEXT:    ret
116  %res = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v)
117  ret half %res
118}
119
120define float @fadda_nxv2f32(float %init, <vscale x 2 x float> %a) {
121; CHECK-LABEL: fadda_nxv2f32:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ptrue p0.d
124; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
125; CHECK-NEXT:    fadda s0, p0, s0, z1.s
126; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
127; CHECK-NEXT:    ret
128  %res = call float @llvm.vector.reduce.fadd.nxv2f32(float %init, <vscale x 2 x float> %a)
129  ret float %res
130}
131
132define float @fadda_nxv4f32(float %init, <vscale x 4 x float> %a) {
133; CHECK-LABEL: fadda_nxv4f32:
134; CHECK:       // %bb.0:
135; CHECK-NEXT:    ptrue p0.s
136; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
137; CHECK-NEXT:    fadda s0, p0, s0, z1.s
138; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
139; CHECK-NEXT:    ret
140  %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %init, <vscale x 4 x float> %a)
141  ret float %res
142}
143
144define double @fadda_nxv2f64(double %init, <vscale x 2 x double> %a) {
145; CHECK-LABEL: fadda_nxv2f64:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ptrue p0.d
148; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
149; CHECK-NEXT:    fadda d0, p0, d0, z1.d
150; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
151; CHECK-NEXT:    ret
152  %res = call double @llvm.vector.reduce.fadd.nxv2f64(double %init, <vscale x 2 x double> %a)
153  ret double %res
154}
155
156declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>)
157declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>)
158declare half @llvm.vector.reduce.fadd.nxv8f16(half, <vscale x 8 x half>)
159declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>)
160declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>)
161declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>)
162declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>)
163declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>)
164declare float @llvm.vector.reduce.fadd.nxv8f32(float, <vscale x 8 x float>)
165declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>)
166