xref: /llvm-project/llvm/test/CodeGen/AArch64/faddp-half.ll (revision edc1c3d24e6f8ed548340ce0369138fb40427a24)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --mtriple=aarch64 -mattr=+fullfp16 < %s | FileCheck %s
3; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=CHECKNOFP16
4
5define half @faddp_2xhalf(<2 x half> %a) {
6; CHECK-LABEL: faddp_2xhalf:
7; CHECK:       // %bb.0: // %entry
8; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
9; CHECK-NEXT:    faddp h0, v0.2h
10; CHECK-NEXT:    ret
11;
12; CHECKNOFP16-LABEL: faddp_2xhalf:
13; CHECKNOFP16:       // %bb.0: // %entry
14; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
15; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
16; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
17; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
18; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
19; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
20; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
21; CHECKNOFP16-NEXT:    ret
22entry:
23  %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
24  %0 = fadd <2 x half> %a, %shift
25  %1 = extractelement <2 x half> %0, i32 0
26  ret half %1
27}
28
29define half @faddp_2xhalf_commute(<2 x half> %a) {
30; CHECK-LABEL: faddp_2xhalf_commute:
31; CHECK:       // %bb.0: // %entry
32; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
33; CHECK-NEXT:    faddp h0, v0.2h
34; CHECK-NEXT:    ret
35;
36; CHECKNOFP16-LABEL: faddp_2xhalf_commute:
37; CHECKNOFP16:       // %bb.0: // %entry
38; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
39; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
40; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
41; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
42; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
43; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
44; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
45; CHECKNOFP16-NEXT:    ret
46entry:
47  %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
48  %0 = fadd <2 x half> %shift, %a
49  %1 = extractelement <2 x half> %0, i32 0
50  ret half %1
51}
52
53define half @faddp_4xhalf(<4 x half> %a) {
54; CHECK-LABEL: faddp_4xhalf:
55; CHECK:       // %bb.0: // %entry
56; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
57; CHECK-NEXT:    faddp h0, v0.2h
58; CHECK-NEXT:    ret
59;
60; CHECKNOFP16-LABEL: faddp_4xhalf:
61; CHECKNOFP16:       // %bb.0: // %entry
62; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
63; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
64; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
65; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
66; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
67; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
68; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
69; CHECKNOFP16-NEXT:    ret
70entry:
71  %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
72  %0 = fadd <4 x half> %a, %shift
73  %1 = extractelement <4 x half> %0, i32 0
74  ret half %1
75}
76
77define half @faddp_4xhalf_commute(<4 x half> %a) {
78; CHECK-LABEL: faddp_4xhalf_commute:
79; CHECK:       // %bb.0: // %entry
80; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
81; CHECK-NEXT:    faddp h0, v0.2h
82; CHECK-NEXT:    ret
83;
84; CHECKNOFP16-LABEL: faddp_4xhalf_commute:
85; CHECKNOFP16:       // %bb.0: // %entry
86; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
87; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
88; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
89; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
90; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
91; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
92; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
93; CHECKNOFP16-NEXT:    ret
94entry:
95  %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
96  %0 = fadd <4 x half> %shift, %a
97  %1 = extractelement <4 x half> %0, i32 0
98  ret half %1
99}
100
101define half @faddp_8xhalf(<8 x half> %a) {
102; CHECK-LABEL: faddp_8xhalf:
103; CHECK:       // %bb.0: // %entry
104; CHECK-NEXT:    faddp h0, v0.2h
105; CHECK-NEXT:    ret
106;
107; CHECKNOFP16-LABEL: faddp_8xhalf:
108; CHECKNOFP16:       // %bb.0: // %entry
109; CHECKNOFP16-NEXT:    dup v1.8h, v0.h[1]
110; CHECKNOFP16-NEXT:    fcvtl v2.4s, v0.4h
111; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
112; CHECKNOFP16-NEXT:    fcvtl v3.4s, v1.4h
113; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
114; CHECKNOFP16-NEXT:    fadd v2.4s, v2.4s, v3.4s
115; CHECKNOFP16-NEXT:    fadd v1.4s, v0.4s, v1.4s
116; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
117; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
118; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
119; CHECKNOFP16-NEXT:    ret
120entry:
121  %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
122  %0 = fadd <8 x half> %a, %shift
123  %1 = extractelement <8 x half> %0, i32 0
124  ret half %1
125}
126
127define half @faddp_8xhalf_commute(<8 x half> %a) {
128; CHECK-LABEL: faddp_8xhalf_commute:
129; CHECK:       // %bb.0: // %entry
130; CHECK-NEXT:    faddp h0, v0.2h
131; CHECK-NEXT:    ret
132;
133; CHECKNOFP16-LABEL: faddp_8xhalf_commute:
134; CHECKNOFP16:       // %bb.0: // %entry
135; CHECKNOFP16-NEXT:    dup v1.8h, v0.h[1]
136; CHECKNOFP16-NEXT:    fcvtl v2.4s, v0.4h
137; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
138; CHECKNOFP16-NEXT:    fcvtl v3.4s, v1.4h
139; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
140; CHECKNOFP16-NEXT:    fadd v2.4s, v3.4s, v2.4s
141; CHECKNOFP16-NEXT:    fadd v1.4s, v1.4s, v0.4s
142; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
143; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
144; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
145; CHECKNOFP16-NEXT:    ret
146entry:
147  %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
148  %0 = fadd <8 x half> %shift, %a
149  %1 = extractelement <8 x half> %0, i32 0
150  ret half %1
151}
152
153define <8 x half> @addp_v8f16(<8 x half> %a) {
154; CHECK-LABEL: addp_v8f16:
155; CHECK:       // %bb.0: // %entry
156; CHECK-NEXT:    rev32 v1.8h, v0.8h
157; CHECK-NEXT:    fadd v0.8h, v1.8h, v0.8h
158; CHECK-NEXT:    ret
159;
160; CHECKNOFP16-LABEL: addp_v8f16:
161; CHECKNOFP16:       // %bb.0: // %entry
162; CHECKNOFP16-NEXT:    rev32 v1.8h, v0.8h
163; CHECKNOFP16-NEXT:    fcvtl v2.4s, v0.4h
164; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
165; CHECKNOFP16-NEXT:    fcvtl v3.4s, v1.4h
166; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
167; CHECKNOFP16-NEXT:    fadd v2.4s, v3.4s, v2.4s
168; CHECKNOFP16-NEXT:    fadd v1.4s, v1.4s, v0.4s
169; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
170; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
171; CHECKNOFP16-NEXT:    ret
172entry:
173  %s = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
174  %b = fadd reassoc <8 x half> %s, %a
175  ret <8 x half> %b
176}
177
178define <16 x half> @addp_v16f16(<16 x half> %a) {
179; CHECK-LABEL: addp_v16f16:
180; CHECK:       // %bb.0: // %entry
181; CHECK-NEXT:    faddp v1.8h, v0.8h, v1.8h
182; CHECK-NEXT:    zip1 v0.8h, v1.8h, v1.8h
183; CHECK-NEXT:    zip2 v1.8h, v1.8h, v1.8h
184; CHECK-NEXT:    ret
185;
186; CHECKNOFP16-LABEL: addp_v16f16:
187; CHECKNOFP16:       // %bb.0: // %entry
188; CHECKNOFP16-NEXT:    rev32 v2.8h, v0.8h
189; CHECKNOFP16-NEXT:    rev32 v3.8h, v1.8h
190; CHECKNOFP16-NEXT:    fcvtl v4.4s, v0.4h
191; CHECKNOFP16-NEXT:    fcvtl v6.4s, v1.4h
192; CHECKNOFP16-NEXT:    fcvtl2 v0.4s, v0.8h
193; CHECKNOFP16-NEXT:    fcvtl2 v1.4s, v1.8h
194; CHECKNOFP16-NEXT:    fcvtl v5.4s, v2.4h
195; CHECKNOFP16-NEXT:    fcvtl v7.4s, v3.4h
196; CHECKNOFP16-NEXT:    fcvtl2 v2.4s, v2.8h
197; CHECKNOFP16-NEXT:    fcvtl2 v3.4s, v3.8h
198; CHECKNOFP16-NEXT:    fadd v4.4s, v5.4s, v4.4s
199; CHECKNOFP16-NEXT:    fadd v5.4s, v7.4s, v6.4s
200; CHECKNOFP16-NEXT:    fadd v2.4s, v2.4s, v0.4s
201; CHECKNOFP16-NEXT:    fadd v3.4s, v3.4s, v1.4s
202; CHECKNOFP16-NEXT:    fcvtn v0.4h, v4.4s
203; CHECKNOFP16-NEXT:    fcvtn v1.4h, v5.4s
204; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v2.4s
205; CHECKNOFP16-NEXT:    fcvtn2 v1.8h, v3.4s
206; CHECKNOFP16-NEXT:    ret
207entry:
208  %s = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
209  %b = fadd reassoc <16 x half> %s, %a
210  ret <16 x half> %b
211}
212