xref: /llvm-project/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16  | FileCheck %s
3
4declare half @llvm.aarch64.neon.fmulx.f16(half, half)
5declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>)
6declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>)
7declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
8declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
9declare half @llvm.fma.f16(half, half, half) #1
10
11define <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
12; CHECK-LABEL: t_vfma_lane_f16:
13; CHECK:       // %bb.0: // %entry
14; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
15; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
16; CHECK-NEXT:    ret
17entry:
18  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
19  %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a)
20  ret <4 x half> %fmla3
21}
22
23define <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
24; CHECK-LABEL: t_vfmaq_lane_f16:
25; CHECK:       // %bb.0: // %entry
26; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
27; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
28; CHECK-NEXT:    ret
29entry:
30  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
31  %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a)
32  ret <8 x half> %fmla3
33}
34
35define <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
36; CHECK-LABEL: t_vfma_laneq_f16:
37; CHECK:       // %bb.0: // %entry
38; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
39; CHECK-NEXT:    ret
40entry:
41  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
42  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a)
43  ret <4 x half> %0
44}
45
46define <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
47; CHECK-LABEL: t_vfmaq_laneq_f16:
48; CHECK:       // %bb.0: // %entry
49; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
50; CHECK-NEXT:    ret
51entry:
52  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
53  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a)
54  ret <8 x half> %0
55}
56
57define <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
58; CHECK-LABEL: t_vfma_n_f16:
59; CHECK:       // %bb.0: // %entry
60; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
61; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
62; CHECK-NEXT:    ret
63entry:
64  %vecinit = insertelement <4 x half> undef, half %c, i32 0
65  %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
66  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4
67  ret <4 x half> %0
68}
69
70define <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
71; CHECK-LABEL: t_vfmaq_n_f16:
72; CHECK:       // %bb.0: // %entry
73; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
74; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
75; CHECK-NEXT:    ret
76entry:
77  %vecinit = insertelement <8 x half> undef, half %c, i32 0
78  %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
79  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4
80  ret <8 x half> %0
81}
82
83define half @t_vfmah_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
84; CHECK-LABEL: t_vfmah_lane_f16_0:
85; CHECK:       // %bb.0: // %entry
86; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
87; CHECK-NEXT:    fmadd h0, h1, h2, h0
88; CHECK-NEXT:    ret
89entry:
90  %extract = extractelement <4 x half> %c, i32 0
91  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
92  ret half %0
93}
94
95define half @t_vfmah_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
96; CHECK-LABEL: t_vfmah_lane_f16_0_swap:
97; CHECK:       // %bb.0: // %entry
98; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
99; CHECK-NEXT:    fmadd h0, h2, h1, h0
100; CHECK-NEXT:    ret
101entry:
102  %extract = extractelement <4 x half> %c, i32 0
103  %0 = tail call half @llvm.fma.f16(half %extract, half %b, half %a)
104  ret half %0
105}
106
107define half @t_vfmah_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
108; CHECK-LABEL: t_vfmah_lane_f16_3:
109; CHECK:       // %bb.0: // %entry
110; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
111; CHECK-NEXT:    fmla h0, h1, v2.h[3]
112; CHECK-NEXT:    ret
113entry:
114  %extract = extractelement <4 x half> %c, i32 3
115  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
116  ret half %0
117}
118
119define half @t_vfmah_lane_f16_3_0(half %a, <4 x half> %c) {
120; CHECK-LABEL: t_vfmah_lane_f16_3_0:
121; CHECK:       // %bb.0: // %entry
122; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
123; CHECK-NEXT:    fmla h0, h1, v1.h[3]
124; CHECK-NEXT:    ret
125entry:
126  %b = extractelement <4 x half> %c, i32 0
127  %extract = extractelement <4 x half> %c, i32 3
128  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
129  ret half %0
130}
131
132define half @t_vfmah_lane_f16_0_0(half %a, <4 x half> %b, <4 x half> %c) {
133; CHECK-LABEL: t_vfmah_lane_f16_0_0:
134; CHECK:       // %bb.0: // %entry
135; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
136; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
137; CHECK-NEXT:    fmadd h0, h1, h2, h0
138; CHECK-NEXT:    ret
139entry:
140  %b0 = extractelement <4 x half> %b, i32 0
141  %c0 = extractelement <4 x half> %c, i32 0
142  %0 = tail call half @llvm.fma.f16(half %b0, half %c0, half %a)
143  ret half %0
144}
145
146define half @t_vfmah_laneq_f16_0(half %a, half %b, <8 x half> %c, i32 %lane) {
147; CHECK-LABEL: t_vfmah_laneq_f16_0:
148; CHECK:       // %bb.0: // %entry
149; CHECK-NEXT:    fmadd h0, h1, h2, h0
150; CHECK-NEXT:    ret
151entry:
152  %extract = extractelement <8 x half> %c, i32 0
153  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
154  ret half %0
155}
156
157define half @t_vfmah_laneq_f16_0_swap(half %a, half %b, <8 x half> %c, i32 %lane) {
158; CHECK-LABEL: t_vfmah_laneq_f16_0_swap:
159; CHECK:       // %bb.0: // %entry
160; CHECK-NEXT:    fmadd h0, h2, h1, h0
161; CHECK-NEXT:    ret
162entry:
163  %extract = extractelement <8 x half> %c, i32 0
164  %0 = tail call half @llvm.fma.f16(half %extract, half %b, half %a)
165  ret half %0
166}
167
168define half @t_vfmah_laneq_f16_7(half %a, half %b, <8 x half> %c, i32 %lane) {
169; CHECK-LABEL: t_vfmah_laneq_f16_7:
170; CHECK:       // %bb.0: // %entry
171; CHECK-NEXT:    fmla h0, h1, v2.h[7]
172; CHECK-NEXT:    ret
173entry:
174  %extract = extractelement <8 x half> %c, i32 7
175  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
176  ret half %0
177}
178
179define <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
180; CHECK-LABEL: t_vfms_lane_f16:
181; CHECK:       // %bb.0: // %entry
182; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
183; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
184; CHECK-NEXT:    ret
185entry:
186  %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
187  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
188  %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a)
189  ret <4 x half> %fmla3
190}
191
192define <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
193; CHECK-LABEL: t_vfmsq_lane_f16:
194; CHECK:       // %bb.0: // %entry
195; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
196; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
197; CHECK-NEXT:    ret
198entry:
199  %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
200  %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
201  %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a)
202  ret <8 x half> %fmla3
203}
204
205define <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
206; CHECK-LABEL: t_vfms_laneq_f16:
207; CHECK:       // %bb.0: // %entry
208; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
209; CHECK-NEXT:    ret
210entry:
211  %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
212  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
213  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a)
214  ret <4 x half> %0
215}
216
217define <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
218; CHECK-LABEL: t_vfmsq_laneq_f16:
219; CHECK:       // %bb.0: // %entry
220; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
221; CHECK-NEXT:    ret
222entry:
223  %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
224  %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
225  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a)
226  ret <8 x half> %0
227}
228
229define <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
230; CHECK-LABEL: t_vfms_n_f16:
231; CHECK:       // %bb.0: // %entry
232; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
233; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
234; CHECK-NEXT:    ret
235entry:
236  %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
237  %vecinit = insertelement <4 x half> undef, half %c, i32 0
238  %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
239  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4
240  ret <4 x half> %0
241}
242
243define <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
244; CHECK-LABEL: t_vfmsq_n_f16:
245; CHECK:       // %bb.0: // %entry
246; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
247; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
248; CHECK-NEXT:    ret
249entry:
250  %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
251  %vecinit = insertelement <8 x half> undef, half %c, i32 0
252  %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
253  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4
254  ret <8 x half> %0
255}
256
257define half @t_vfmsh_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
258; CHECK-LABEL: t_vfmsh_lane_f16_0:
259; CHECK:       // %bb.0: // %entry
260; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
261; CHECK-NEXT:    fmsub h0, h2, h1, h0
262; CHECK-NEXT:    ret
263entry:
264  %0 = fsub half 0xH8000, %b
265  %extract = extractelement <4 x half> %c, i32 0
266  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
267  ret half %1
268}
269
270define half @t_vfmsh_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
271; CHECK-LABEL: t_vfmsh_lane_f16_0_swap:
272; CHECK:       // %bb.0: // %entry
273; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
274; CHECK-NEXT:    fmsub h0, h2, h1, h0
275; CHECK-NEXT:    ret
276entry:
277  %0 = fsub half 0xH8000, %b
278  %extract = extractelement <4 x half> %c, i32 0
279  %1 = tail call half @llvm.fma.f16(half %extract, half %0, half %a)
280  ret half %1
281}
282
283define half @t_vfmsh_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
284; CHECK-LABEL: t_vfmsh_lane_f16_3:
285; CHECK:       // %bb.0: // %entry
286; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
287; CHECK-NEXT:    fmls h0, h1, v2.h[3]
288; CHECK-NEXT:    ret
289entry:
290  %0 = fsub half 0xH8000, %b
291  %extract = extractelement <4 x half> %c, i32 3
292  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
293  ret half %1
294}
295
296define half @t_vfmsh_laneq_f16_0(half %a, half %b, <8 x half> %c, i32 %lane) {
297; CHECK-LABEL: t_vfmsh_laneq_f16_0:
298; CHECK:       // %bb.0: // %entry
299; CHECK-NEXT:    fmsub h0, h2, h1, h0
300; CHECK-NEXT:    ret
301entry:
302  %0 = fsub half 0xH8000, %b
303  %extract = extractelement <8 x half> %c, i32 0
304  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
305  ret half %1
306}
307
308define half @t_vfmsh_lane_f16_0_3(half %a, <4 x half> %c, i32 %lane) {
309; CHECK-LABEL: t_vfmsh_lane_f16_0_3:
310; CHECK:       // %bb.0: // %entry
311; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
312; CHECK-NEXT:    fmls h0, h1, v1.h[3]
313; CHECK-NEXT:    ret
314entry:
315  %b = extractelement <4 x half> %c, i32 0
316  %0 = fsub half 0xH8000, %b
317  %extract = extractelement <4 x half> %c, i32 3
318  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
319  ret half %1
320}
321
322define half @t_vfmsh_laneq_f16_0_swap(half %a, half %b, <8 x half> %c, i32 %lane) {
323; CHECK-LABEL: t_vfmsh_laneq_f16_0_swap:
324; CHECK:       // %bb.0: // %entry
325; CHECK-NEXT:    fmsub h0, h2, h1, h0
326; CHECK-NEXT:    ret
327entry:
328  %0 = fsub half 0xH8000, %b
329  %extract = extractelement <8 x half> %c, i32 0
330  %1 = tail call half @llvm.fma.f16(half %extract, half %0, half %a)
331  ret half %1
332}
333
334define half @t_vfmsh_laneq_f16_7(half %a, half %b, <8 x half> %c, i32 %lane) {
335; CHECK-LABEL: t_vfmsh_laneq_f16_7:
336; CHECK:       // %bb.0: // %entry
337; CHECK-NEXT:    fmls h0, h1, v2.h[7]
338; CHECK-NEXT:    ret
339entry:
340  %0 = fsub half 0xH8000, %b
341  %extract = extractelement <8 x half> %c, i32 7
342  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
343  ret half %1
344}
345
346define <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
347; CHECK-LABEL: t_vmul_laneq_f16:
348; CHECK:       // %bb.0: // %entry
349; CHECK-NEXT:    fmul v0.4h, v0.4h, v1.h[0]
350; CHECK-NEXT:    ret
351entry:
352  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
353  %mul = fmul <4 x half> %shuffle, %a
354  ret <4 x half> %mul
355}
356
357define <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
358; CHECK-LABEL: t_vmulq_laneq_f16:
359; CHECK:       // %bb.0: // %entry
360; CHECK-NEXT:    fmul v0.8h, v0.8h, v1.h[0]
361; CHECK-NEXT:    ret
362entry:
363  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
364  %mul = fmul <8 x half> %shuffle, %a
365  ret <8 x half> %mul
366}
367
368define half @t_vmulh_lane0_f16(half %a, <4 x half> %c, i32 %lane) {
369; CHECK-LABEL: t_vmulh_lane0_f16:
370; CHECK:       // %bb.0: // %entry
371; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
372; CHECK-NEXT:    fmul h0, h0, h1
373; CHECK-NEXT:    ret
374entry:
375  %0 = extractelement <4 x half> %c, i32 0
376  %1 = fmul half %0, %a
377  ret half %1
378}
379
380define half @t_vmulh_lane3_f16(half %a, <4 x half> %c, i32 %lane) {
381; CHECK-LABEL: t_vmulh_lane3_f16:
382; CHECK:       // %bb.0: // %entry
383; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
384; CHECK-NEXT:    fmul h0, h0, v1.h[3]
385; CHECK-NEXT:    ret
386entry:
387  %0 = extractelement <4 x half> %c, i32 3
388  %1 = fmul half %0, %a
389  ret half %1
390}
391
392define half @t_vmulh_laneq0_f16(half %a, <8 x half> %c, i32 %lane) {
393; CHECK-LABEL: t_vmulh_laneq0_f16:
394; CHECK:       // %bb.0: // %entry
395; CHECK-NEXT:    fmul h0, h0, h1
396; CHECK-NEXT:    ret
397entry:
398  %0 = extractelement <8 x half> %c, i32 0
399  %1 = fmul half %0, %a
400  ret half %1
401}
402
403define half @t_vmulh_laneq7_f16(half %a, <8 x half> %c, i32 %lane) {
404; CHECK-LABEL: t_vmulh_laneq7_f16:
405; CHECK:       // %bb.0: // %entry
406; CHECK-NEXT:    fmul h0, h0, v1.h[7]
407; CHECK-NEXT:    ret
408entry:
409  %0 = extractelement <8 x half> %c, i32 7
410  %1 = fmul half %0, %a
411  ret half %1
412}
413
414define half @t_vmulx_f16(half %a, half %b) {
415; CHECK-LABEL: t_vmulx_f16:
416; CHECK:       // %bb.0: // %entry
417; CHECK-NEXT:    fmulx h0, h0, h1
418; CHECK-NEXT:    ret
419entry:
420  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
421  ret half %fmulx.i
422}
423
424define half @t_vmulxh_lane0_f16(half %a, <4 x half> %b) {
425; CHECK-LABEL: t_vmulxh_lane0_f16:
426; CHECK:       // %bb.0: // %entry
427; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
428; CHECK-NEXT:    fmulx h0, h0, h1
429; CHECK-NEXT:    ret
430entry:
431  %extract = extractelement <4 x half> %b, i32 0
432  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
433  ret half %fmulx.i
434}
435
436define half @t_vmulxh_lane3_f16(half %a, <4 x half> %b, i32 %lane) {
437; CHECK-LABEL: t_vmulxh_lane3_f16:
438; CHECK:       // %bb.0: // %entry
439; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
440; CHECK-NEXT:    fmulx h0, h0, v1.h[3]
441; CHECK-NEXT:    ret
442entry:
443  %extract = extractelement <4 x half> %b, i32 3
444  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
445  ret half %fmulx.i
446}
447
448define <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
449; CHECK-LABEL: t_vmulx_lane_f16:
450; CHECK:       // %bb.0: // %entry
451; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
452; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.h[0]
453; CHECK-NEXT:    ret
454entry:
455  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer
456  %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4
457  ret <4 x half> %vmulx2.i
458}
459
460define <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
461; CHECK-LABEL: t_vmulxq_lane_f16:
462; CHECK:       // %bb.0: // %entry
463; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
464; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.h[0]
465; CHECK-NEXT:    ret
466entry:
467  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer
468  %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4
469  ret <8 x half> %vmulx2.i
470}
471
472define <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
473; CHECK-LABEL: t_vmulx_laneq_f16:
474; CHECK:       // %bb.0: // %entry
475; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.h[0]
476; CHECK-NEXT:    ret
477entry:
478  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
479  %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4
480  ret <4 x half> %vmulx2.i
481}
482
483define <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
484; CHECK-LABEL: t_vmulxq_laneq_f16:
485; CHECK:       // %bb.0: // %entry
486; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.h[0]
487; CHECK-NEXT:    ret
488entry:
489  %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
490  %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4
491  ret <8 x half> %vmulx2.i
492}
493
494define half @t_vmulxh_laneq0_f16(half %a, <8 x half> %b) {
495; CHECK-LABEL: t_vmulxh_laneq0_f16:
496; CHECK:       // %bb.0: // %entry
497; CHECK-NEXT:    fmulx h0, h0, h1
498; CHECK-NEXT:    ret
499entry:
500  %extract = extractelement <8 x half> %b, i32 0
501  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
502  ret half %fmulx.i
503}
504
505define half @t_vmulxh_laneq7_f16(half %a, <8 x half> %b, i32 %lane) {
506; CHECK-LABEL: t_vmulxh_laneq7_f16:
507; CHECK:       // %bb.0: // %entry
508; CHECK-NEXT:    fmulx h0, h0, v1.h[7]
509; CHECK-NEXT:    ret
510entry:
511  %extract = extractelement <8 x half> %b, i32 7
512  %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
513  ret half %fmulx.i
514}
515
516define <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
517; CHECK-LABEL: t_vmulx_n_f16:
518; CHECK:       // %bb.0: // %entry
519; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
520; CHECK-NEXT:    dup v1.4h, v1.h[0]
521; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.4h
522; CHECK-NEXT:    ret
523entry:
524  %vecinit = insertelement <4 x half> undef, half %c, i32 0
525  %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
526  %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4
527  ret <4 x half> %vmulx2.i
528}
529
530define <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
531; CHECK-LABEL: t_vmulxq_n_f16:
532; CHECK:       // %bb.0: // %entry
533; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
534; CHECK-NEXT:    dup v1.8h, v1.h[0]
535; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.8h
536; CHECK-NEXT:    ret
537entry:
538  %vecinit = insertelement <8 x half> undef, half %c, i32 0
539  %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
540  %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4
541  ret <8 x half> %vmulx2.i
542}
543
544define half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
545; CHECK-LABEL: t_vfmah_lane3_f16:
546; CHECK:       // %bb.0: // %entry
547; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
548; CHECK-NEXT:    fmla h0, h1, v2.h[3]
549; CHECK-NEXT:    ret
550entry:
551  %extract = extractelement <4 x half> %c, i32 3
552  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
553  ret half %0
554}
555
556define half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) {
557; CHECK-LABEL: t_vfmah_laneq7_f16:
558; CHECK:       // %bb.0: // %entry
559; CHECK-NEXT:    fmla h0, h1, v2.h[7]
560; CHECK-NEXT:    ret
561entry:
562  %extract = extractelement <8 x half> %c, i32 7
563  %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
564  ret half %0
565}
566
567define half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
568; CHECK-LABEL: t_vfmsh_lane3_f16:
569; CHECK:       // %bb.0: // %entry
570; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
571; CHECK-NEXT:    fmls h0, h1, v2.h[3]
572; CHECK-NEXT:    ret
573entry:
574  %0 = fsub half 0xH8000, %b
575  %extract = extractelement <4 x half> %c, i32 3
576  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
577  ret half %1
578}
579
580define half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) {
581; CHECK-LABEL: t_vfmsh_laneq7_f16:
582; CHECK:       // %bb.0: // %entry
583; CHECK-NEXT:    fmls h0, h1, v2.h[7]
584; CHECK-NEXT:    ret
585entry:
586  %0 = fsub half 0xH8000, %b
587  %extract = extractelement <8 x half> %c, i32 7
588  %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
589  ret half %1
590}
591
592define half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
593; CHECK-LABEL: t_fadd_vfmah_f16:
594; CHECK:       // %bb.0: // %entry
595; CHECK-NEXT:    fadd v2.4h, v2.4h, v3.4h
596; CHECK-NEXT:    fmla h0, h1, v2.h[3]
597; CHECK-NEXT:    ret
598entry:
599  %0 = fadd <4 x half> %c, %d
600  %extract = extractelement <4 x half> %0, i32 3
601  %1 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
602  ret half %1
603}
604
605define half @test_fmulx_horizontal_f16(<2 x half> %v) {
606; CHECK-LABEL: test_fmulx_horizontal_f16:
607; CHECK:       // %bb.0: // %entry
608; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
609; CHECK-NEXT:    fmulx h0, h0, v0.h[1]
610; CHECK-NEXT:    ret
611entry:
612  %0 = extractelement <2 x half> %v, i32 0
613  %1 = extractelement <2 x half> %v, i32 1
614  %2 = call half @llvm.aarch64.neon.fmulx.f16(half %0, half %1)
615  ret half %2
616}
617