xref: /llvm-project/llvm/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s
3
4%struct.float16x4x2_t = type { [2 x <4 x half>] }
5%struct.float16x8x2_t = type { [2 x <8 x half>] }
6
7define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) {
8; CHECKLABEL: test_vabs_f16:
9; CHECK-LABEL: test_vabs_f16:
10; CHECK:       @ %bb.0: @ %entry
11; CHECK-NEXT:    vabs.f16 d0, d0
12; CHECK-NEXT:    bx lr
13entry:
14  %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a)
15  ret <4 x half> %vabs1.i
16}
17
18define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) {
19; CHECKLABEL: test_vabsq_f16:
20; CHECK-LABEL: test_vabsq_f16:
21; CHECK:       @ %bb.0: @ %entry
22; CHECK-NEXT:    vabs.f16 q0, q0
23; CHECK-NEXT:    bx lr
24entry:
25  %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
26  ret <8 x half> %vabs1.i
27}
28
29define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) {
30; CHECKLABEL: test_vceqz_f16:
31; CHECK-LABEL: test_vceqz_f16:
32; CHECK:       @ %bb.0: @ %entry
33; CHECK-NEXT:    vceq.f16 d0, d0, #0
34; CHECK-NEXT:    bx lr
35entry:
36  %0 = fcmp oeq <4 x half> %a, zeroinitializer
37  %vceqz.i = sext <4 x i1> %0 to <4 x i16>
38  ret <4 x i16> %vceqz.i
39}
40
41define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) {
42; CHECKLABEL: test_vceqzq_f16:
43; CHECK-LABEL: test_vceqzq_f16:
44; CHECK:       @ %bb.0: @ %entry
45; CHECK-NEXT:    vceq.f16 q0, q0, #0
46; CHECK-NEXT:    bx lr
47entry:
48  %0 = fcmp oeq <8 x half> %a, zeroinitializer
49  %vceqz.i = sext <8 x i1> %0 to <8 x i16>
50  ret <8 x i16> %vceqz.i
51}
52
53define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) {
54; CHECKLABEL: test_vcgez_f16:
55; CHECK-LABEL: test_vcgez_f16:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vcge.f16 d0, d0, #0
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = fcmp oge <4 x half> %a, zeroinitializer
61  %vcgez.i = sext <4 x i1> %0 to <4 x i16>
62  ret <4 x i16> %vcgez.i
63}
64
65define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) {
66; CHECKLABEL: test_vcgezq_f16:
67; CHECK-LABEL: test_vcgezq_f16:
68; CHECK:       @ %bb.0: @ %entry
69; CHECK-NEXT:    vcge.f16 q0, q0, #0
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = fcmp oge <8 x half> %a, zeroinitializer
73  %vcgez.i = sext <8 x i1> %0 to <8 x i16>
74  ret <8 x i16> %vcgez.i
75}
76
77define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) {
78; CHECKLABEL: test_vcgtz_f16:
79; CHECK-LABEL: test_vcgtz_f16:
80; CHECK:       @ %bb.0: @ %entry
81; CHECK-NEXT:    vcgt.f16 d0, d0, #0
82; CHECK-NEXT:    bx lr
83entry:
84  %0 = fcmp ogt <4 x half> %a, zeroinitializer
85  %vcgtz.i = sext <4 x i1> %0 to <4 x i16>
86  ret <4 x i16> %vcgtz.i
87}
88
89define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) {
90; CHECKLABEL: test_vcgtzq_f16:
91; CHECK-LABEL: test_vcgtzq_f16:
92; CHECK:       @ %bb.0: @ %entry
93; CHECK-NEXT:    vcgt.f16 q0, q0, #0
94; CHECK-NEXT:    bx lr
95entry:
96  %0 = fcmp ogt <8 x half> %a, zeroinitializer
97  %vcgtz.i = sext <8 x i1> %0 to <8 x i16>
98  ret <8 x i16> %vcgtz.i
99}
100
101define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) {
102; CHECKLABEL: test_vclez_f16:
103; CHECK-LABEL: test_vclez_f16:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vcle.f16 d0, d0, #0
106; CHECK-NEXT:    bx lr
107entry:
108  %0 = fcmp ole <4 x half> %a, zeroinitializer
109  %vclez.i = sext <4 x i1> %0 to <4 x i16>
110  ret <4 x i16> %vclez.i
111}
112
113define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) {
114; CHECKLABEL: test_vclezq_f16:
115; CHECK-LABEL: test_vclezq_f16:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vcle.f16 q0, q0, #0
118; CHECK-NEXT:    bx lr
119entry:
120  %0 = fcmp ole <8 x half> %a, zeroinitializer
121  %vclez.i = sext <8 x i1> %0 to <8 x i16>
122  ret <8 x i16> %vclez.i
123}
124
125define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) {
126; CHECKLABEL: test_vcltz_f16:
127; CHECK-LABEL: test_vcltz_f16:
128; CHECK:       @ %bb.0: @ %entry
129; CHECK-NEXT:    vclt.f16 d0, d0, #0
130; CHECK-NEXT:    bx lr
131entry:
132  %0 = fcmp olt <4 x half> %a, zeroinitializer
133  %vcltz.i = sext <4 x i1> %0 to <4 x i16>
134  ret <4 x i16> %vcltz.i
135}
136
137define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) {
138; CHECKLABEL: test_vcltzq_f16:
139; CHECK-LABEL: test_vcltzq_f16:
140; CHECK:       @ %bb.0: @ %entry
141; CHECK-NEXT:    vclt.f16 q0, q0, #0
142; CHECK-NEXT:    bx lr
143entry:
144  %0 = fcmp olt <8 x half> %a, zeroinitializer
145  %vcltz.i = sext <8 x i1> %0 to <8 x i16>
146  ret <8 x i16> %vcltz.i
147}
148
149define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
150; CHECK-LABEL: test_vcvt_f16_s16:
151; CHECK:       @ %bb.0: @ %entry
152; CHECK-NEXT:    vcvt.f16.s16 d0, d0
153; CHECK-NEXT:    bx lr
154entry:
155  %vcvt.i = sitofp <4 x i16> %a to <4 x half>
156  ret <4 x half> %vcvt.i
157}
158
159define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
160; CHECK-LABEL: test_vcvtq_f16_s16:
161; CHECK:       @ %bb.0: @ %entry
162; CHECK-NEXT:    vcvt.f16.s16 q0, q0
163; CHECK-NEXT:    bx lr
164entry:
165  %vcvt.i = sitofp <8 x i16> %a to <8 x half>
166  ret <8 x half> %vcvt.i
167}
168
169define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
170; CHECK-LABEL: test_vcvt_f16_u16:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vcvt.f16.u16 d0, d0
173; CHECK-NEXT:    bx lr
174entry:
175  %vcvt.i = uitofp <4 x i16> %a to <4 x half>
176  ret <4 x half> %vcvt.i
177}
178
179define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
180; CHECK-LABEL: test_vcvtq_f16_u16:
181; CHECK:       @ %bb.0: @ %entry
182; CHECK-NEXT:    vcvt.f16.u16 q0, q0
183; CHECK-NEXT:    bx lr
184entry:
185  %vcvt.i = uitofp <8 x i16> %a to <8 x half>
186  ret <8 x half> %vcvt.i
187}
188
189define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
190; CHECK-LABEL: test_vcvt_s16_f16:
191; CHECK:       @ %bb.0: @ %entry
192; CHECK-NEXT:    vcvt.s16.f16 d0, d0
193; CHECK-NEXT:    bx lr
194entry:
195  %vcvt.i = fptosi <4 x half> %a to <4 x i16>
196  ret <4 x i16> %vcvt.i
197}
198
199define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
200; CHECK-LABEL: test_vcvtq_s16_f16:
201; CHECK:       @ %bb.0: @ %entry
202; CHECK-NEXT:    vcvt.s16.f16 q0, q0
203; CHECK-NEXT:    bx lr
204entry:
205  %vcvt.i = fptosi <8 x half> %a to <8 x i16>
206  ret <8 x i16> %vcvt.i
207}
208
209define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
210; CHECK-LABEL: test_vcvt_u16_f16:
211; CHECK:       @ %bb.0: @ %entry
212; CHECK-NEXT:    vcvt.u16.f16 d0, d0
213; CHECK-NEXT:    bx lr
214entry:
215  %vcvt.i = fptoui <4 x half> %a to <4 x i16>
216  ret <4 x i16> %vcvt.i
217}
218
219define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
220; CHECK-LABEL: test_vcvtq_u16_f16:
221; CHECK:       @ %bb.0: @ %entry
222; CHECK-NEXT:    vcvt.u16.f16 q0, q0
223; CHECK-NEXT:    bx lr
224entry:
225  %vcvt.i = fptoui <8 x half> %a to <8 x i16>
226  ret <8 x i16> %vcvt.i
227}
228
229define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) {
230; CHECK-LABEL: test_vcvta_s16_f16:
231; CHECK:       @ %bb.0: @ %entry
232; CHECK-NEXT:    vcvta.s16.f16 d0, d0
233; CHECK-NEXT:    bx lr
234entry:
235  %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a)
236  ret <4 x i16> %vcvta_s16_v1.i
237}
238
239define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) {
240; CHECK-LABEL: test_vcvta_u16_f16:
241; CHECK:       @ %bb.0: @ %entry
242; CHECK-NEXT:    vcvta.u16.f16 d0, d0
243; CHECK-NEXT:    bx lr
244entry:
245  %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a)
246  ret <4 x i16> %vcvta_u16_v1.i
247}
248
249define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) {
250; CHECK-LABEL: test_vcvtaq_s16_f16:
251; CHECK:       @ %bb.0: @ %entry
252; CHECK-NEXT:    vcvta.s16.f16 q0, q0
253; CHECK-NEXT:    bx lr
254entry:
255  %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a)
256  ret <8 x i16> %vcvtaq_s16_v1.i
257}
258
259define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) {
260; CHECK-LABEL: test_vcvtm_s16_f16:
261; CHECK:       @ %bb.0: @ %entry
262; CHECK-NEXT:    vcvtm.s16.f16 d0, d0
263; CHECK-NEXT:    bx lr
264entry:
265  %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a)
266  ret <4 x i16> %vcvtm_s16_v1.i
267}
268
269define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) {
270; CHECK-LABEL: test_vcvtmq_s16_f16:
271; CHECK:       @ %bb.0: @ %entry
272; CHECK-NEXT:    vcvtm.s16.f16 q0, q0
273; CHECK-NEXT:    bx lr
274entry:
275  %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a)
276  ret <8 x i16> %vcvtmq_s16_v1.i
277}
278
279define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) {
280; CHECK-LABEL: test_vcvtm_u16_f16:
281; CHECK:       @ %bb.0: @ %entry
282; CHECK-NEXT:    vcvtm.u16.f16 d0, d0
283; CHECK-NEXT:    bx lr
284entry:
285  %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a)
286  ret <4 x i16> %vcvtm_u16_v1.i
287}
288
289define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) {
290; CHECK-LABEL: test_vcvtmq_u16_f16:
291; CHECK:       @ %bb.0: @ %entry
292; CHECK-NEXT:    vcvtm.u16.f16 q0, q0
293; CHECK-NEXT:    bx lr
294entry:
295  %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a)
296  ret <8 x i16> %vcvtmq_u16_v1.i
297}
298
299define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) {
300; CHECK-LABEL: test_vcvtn_s16_f16:
301; CHECK:       @ %bb.0: @ %entry
302; CHECK-NEXT:    vcvtn.s16.f16 d0, d0
303; CHECK-NEXT:    bx lr
304entry:
305  %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a)
306  ret <4 x i16> %vcvtn_s16_v1.i
307}
308
309define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) {
310; CHECK-LABEL: test_vcvtnq_s16_f16:
311; CHECK:       @ %bb.0: @ %entry
312; CHECK-NEXT:    vcvtn.s16.f16 q0, q0
313; CHECK-NEXT:    bx lr
314entry:
315  %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a)
316  ret <8 x i16> %vcvtnq_s16_v1.i
317}
318
319define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) {
320; CHECK-LABEL: test_vcvtn_u16_f16:
321; CHECK:       @ %bb.0: @ %entry
322; CHECK-NEXT:    vcvtn.u16.f16 d0, d0
323; CHECK-NEXT:    bx lr
324entry:
325  %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a)
326  ret <4 x i16> %vcvtn_u16_v1.i
327}
328
329define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) {
330; CHECK-LABEL: test_vcvtnq_u16_f16:
331; CHECK:       @ %bb.0: @ %entry
332; CHECK-NEXT:    vcvtn.u16.f16 q0, q0
333; CHECK-NEXT:    bx lr
334entry:
335  %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a)
336  ret <8 x i16> %vcvtnq_u16_v1.i
337}
338
339define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) {
340; CHECK-LABEL: test_vcvtp_s16_f16:
341; CHECK:       @ %bb.0: @ %entry
342; CHECK-NEXT:    vcvtp.s16.f16 d0, d0
343; CHECK-NEXT:    bx lr
344entry:
345  %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a)
346  ret <4 x i16> %vcvtp_s16_v1.i
347}
348
349define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) {
350; CHECK-LABEL: test_vcvtpq_s16_f16:
351; CHECK:       @ %bb.0: @ %entry
352; CHECK-NEXT:    vcvtp.s16.f16 q0, q0
353; CHECK-NEXT:    bx lr
354entry:
355  %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a)
356  ret <8 x i16> %vcvtpq_s16_v1.i
357}
358
359define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) {
360; CHECK-LABEL: test_vcvtp_u16_f16:
361; CHECK:       @ %bb.0: @ %entry
362; CHECK-NEXT:    vcvtp.u16.f16 d0, d0
363; CHECK-NEXT:    bx lr
364entry:
365  %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a)
366  ret <4 x i16> %vcvtp_u16_v1.i
367}
368
369define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) {
370; CHECK-LABEL: test_vcvtpq_u16_f16:
371; CHECK:       @ %bb.0: @ %entry
372; CHECK-NEXT:    vcvtp.u16.f16 q0, q0
373; CHECK-NEXT:    bx lr
374entry:
375  %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a)
376  ret <8 x i16> %vcvtpq_u16_v1.i
377}
378
379define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) {
380; CHECKLABEL: test_vneg_f16:
381; CHECK-LABEL: test_vneg_f16:
382; CHECK:       @ %bb.0: @ %entry
383; CHECK-NEXT:    vneg.f16 d0, d0
384; CHECK-NEXT:    bx lr
385entry:
386  %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
387  ret <4 x half> %sub.i
388}
389
390define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) {
391; CHECKLABEL: test_vnegq_f16:
392; CHECK-LABEL: test_vnegq_f16:
393; CHECK:       @ %bb.0: @ %entry
394; CHECK-NEXT:    vneg.f16 q0, q0
395; CHECK-NEXT:    bx lr
396entry:
397  %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a
398  ret <8 x half> %sub.i
399}
400
401define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) {
402; CHECKLABEL: test_vrecpe_f16:
403; CHECK-LABEL: test_vrecpe_f16:
404; CHECK:       @ %bb.0: @ %entry
405; CHECK-NEXT:    vrecpe.f16 d0, d0
406; CHECK-NEXT:    bx lr
407entry:
408  %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a)
409  ret <4 x half> %vrecpe_v1.i
410}
411
412define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) {
413; CHECKLABEL: test_vrecpeq_f16:
414; CHECK-LABEL: test_vrecpeq_f16:
415; CHECK:       @ %bb.0: @ %entry
416; CHECK-NEXT:    vrecpe.f16 q0, q0
417; CHECK-NEXT:    bx lr
418entry:
419  %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a)
420  ret <8 x half> %vrecpeq_v1.i
421}
422
423define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) {
424; CHECKLABEL: test_vrnd_f16:
425; CHECK-LABEL: test_vrnd_f16:
426; CHECK:       @ %bb.0: @ %entry
427; CHECK-NEXT:    vrintz.f16 d0, d0
428; CHECK-NEXT:    bx lr
429entry:
430  %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a)
431  ret <4 x half> %vrnd_v1.i
432}
433
434define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) {
435; CHECKLABEL: test_vrndq_f16:
436; CHECK-LABEL: test_vrndq_f16:
437; CHECK:       @ %bb.0: @ %entry
438; CHECK-NEXT:    vrintz.f16 q0, q0
439; CHECK-NEXT:    bx lr
440entry:
441  %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a)
442  ret <8 x half> %vrndq_v1.i
443}
444
445define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) {
446; CHECKLABEL: test_vrnda_f16:
447; CHECK-LABEL: test_vrnda_f16:
448; CHECK:       @ %bb.0: @ %entry
449; CHECK-NEXT:    vrinta.f16 d0, d0
450; CHECK-NEXT:    bx lr
451entry:
452  %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a)
453  ret <4 x half> %vrnda_v1.i
454}
455
456define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) {
457; CHECKLABEL: test_vrndaq_f16:
458; CHECK-LABEL: test_vrndaq_f16:
459; CHECK:       @ %bb.0: @ %entry
460; CHECK-NEXT:    vrinta.f16 q0, q0
461; CHECK-NEXT:    bx lr
462entry:
463  %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a)
464  ret <8 x half> %vrndaq_v1.i
465}
466
467define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) {
468; CHECKLABEL: test_vrndm_f16:
469; CHECK-LABEL: test_vrndm_f16:
470; CHECK:       @ %bb.0: @ %entry
471; CHECK-NEXT:    vrintm.f16 d0, d0
472; CHECK-NEXT:    bx lr
473entry:
474  %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a)
475  ret <4 x half> %vrndm_v1.i
476}
477
478define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) {
479; CHECKLABEL: test_vrndmq_f16:
480; CHECK-LABEL: test_vrndmq_f16:
481; CHECK:       @ %bb.0: @ %entry
482; CHECK-NEXT:    vrintm.f16 q0, q0
483; CHECK-NEXT:    bx lr
484entry:
485  %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a)
486  ret <8 x half> %vrndmq_v1.i
487}
488
489define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) {
490; CHECKLABEL: test_vrndn_f16:
491; CHECK-LABEL: test_vrndn_f16:
492; CHECK:       @ %bb.0: @ %entry
493; CHECK-NEXT:    vrintn.f16 d0, d0
494; CHECK-NEXT:    bx lr
495entry:
496  %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a)
497  ret <4 x half> %vrndn_v1.i
498}
499
500define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) {
501; CHECKLABEL: test_vrndnq_f16:
502; CHECK-LABEL: test_vrndnq_f16:
503; CHECK:       @ %bb.0: @ %entry
504; CHECK-NEXT:    vrintn.f16 q0, q0
505; CHECK-NEXT:    bx lr
506entry:
507  %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a)
508  ret <8 x half> %vrndnq_v1.i
509}
510
511define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) {
512; CHECKLABEL: test_vrndp_f16:
513; CHECK-LABEL: test_vrndp_f16:
514; CHECK:       @ %bb.0: @ %entry
515; CHECK-NEXT:    vrintp.f16 d0, d0
516; CHECK-NEXT:    bx lr
517entry:
518  %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a)
519  ret <4 x half> %vrndp_v1.i
520}
521
522define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) {
523; CHECKLABEL: test_vrndpq_f16:
524; CHECK-LABEL: test_vrndpq_f16:
525; CHECK:       @ %bb.0: @ %entry
526; CHECK-NEXT:    vrintp.f16 q0, q0
527; CHECK-NEXT:    bx lr
528entry:
529  %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a)
530  ret <8 x half> %vrndpq_v1.i
531}
532
533define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) {
534; CHECKLABEL: test_vrndx_f16:
535; CHECK-LABEL: test_vrndx_f16:
536; CHECK:       @ %bb.0: @ %entry
537; CHECK-NEXT:    vrintx.f16 d0, d0
538; CHECK-NEXT:    bx lr
539entry:
540  %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a)
541  ret <4 x half> %vrndx_v1.i
542}
543
544define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) {
545; CHECKLABEL: test_vrndxq_f16:
546; CHECK-LABEL: test_vrndxq_f16:
547; CHECK:       @ %bb.0: @ %entry
548; CHECK-NEXT:    vrintx.f16 q0, q0
549; CHECK-NEXT:    bx lr
550entry:
551  %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a)
552  ret <8 x half> %vrndxq_v1.i
553}
554
555define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) {
556; CHECKLABEL: test_vrsqrte_f16:
557; CHECK-LABEL: test_vrsqrte_f16:
558; CHECK:       @ %bb.0: @ %entry
559; CHECK-NEXT:    vrsqrte.f16 d0, d0
560; CHECK-NEXT:    bx lr
561entry:
562  %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a)
563  ret <4 x half> %vrsqrte_v1.i
564}
565
566define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) {
567; CHECKLABEL: test_vrsqrteq_f16:
568; CHECK-LABEL: test_vrsqrteq_f16:
569; CHECK:       @ %bb.0: @ %entry
570; CHECK-NEXT:    vrsqrte.f16 q0, q0
571; CHECK-NEXT:    bx lr
572entry:
573  %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a)
574  ret <8 x half> %vrsqrteq_v1.i
575}
576
577define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) {
578; CHECKLABEL: test_vadd_f16:
579; CHECK-LABEL: test_vadd_f16:
580; CHECK:       @ %bb.0: @ %entry
581; CHECK-NEXT:    vadd.f16 d0, d0, d1
582; CHECK-NEXT:    bx lr
583entry:
584  %add.i = fadd <4 x half> %a, %b
585  ret <4 x half> %add.i
586}
587
588define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) {
589; CHECKLABEL: test_vaddq_f16:
590; CHECK-LABEL: test_vaddq_f16:
591; CHECK:       @ %bb.0: @ %entry
592; CHECK-NEXT:    vadd.f16 q0, q0, q1
593; CHECK-NEXT:    bx lr
594entry:
595  %add.i = fadd <8 x half> %a, %b
596  ret <8 x half> %add.i
597}
598
599define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) {
600; CHECKLABEL: test_vabd_f16:
601; CHECK-LABEL: test_vabd_f16:
602; CHECK:       @ %bb.0: @ %entry
603; CHECK-NEXT:    vabd.f16 d0, d0, d1
604; CHECK-NEXT:    bx lr
605entry:
606  %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b)
607  ret <4 x half> %vabd_v2.i
608}
609
610define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) {
611; CHECKLABEL: test_vabdq_f16:
612; CHECK-LABEL: test_vabdq_f16:
613; CHECK:       @ %bb.0: @ %entry
614; CHECK-NEXT:    vabd.f16 q0, q0, q1
615; CHECK-NEXT:    bx lr
616entry:
617  %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b)
618  ret <8 x half> %vabdq_v2.i
619}
620
621define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) {
622; CHECKLABEL: test_vcage_f16:
623; CHECK-LABEL: test_vcage_f16:
624; CHECK:       @ %bb.0: @ %entry
625; CHECK-NEXT:    vacge.f16 d0, d0, d1
626; CHECK-NEXT:    bx lr
627entry:
628  %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
629  ret <4 x i16> %vcage_v2.i
630}
631
632define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) {
633; CHECKLABEL: test_vcageq_f16:
634; CHECK-LABEL: test_vcageq_f16:
635; CHECK:       @ %bb.0: @ %entry
636; CHECK-NEXT:    vacge.f16 q0, q0, q1
637; CHECK-NEXT:    bx lr
638entry:
639  %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
640  ret <8 x i16> %vcageq_v2.i
641}
642
643define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) {
644; CHECK-LABEL: test_vcagt_f16:
645; CHECK:       @ %bb.0: @ %entry
646; CHECK-NEXT:    vacgt.f16 d0, d0, d1
647; CHECK-NEXT:    bx lr
648entry:
649  %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b)
650  ret <4 x i16> %vcagt_v2.i
651}
652
653define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) {
654; CHECK-LABEL: test_vcagtq_f16:
655; CHECK:       @ %bb.0: @ %entry
656; CHECK-NEXT:    vacgt.f16 q0, q0, q1
657; CHECK-NEXT:    bx lr
658entry:
659  %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b)
660  ret <8 x i16> %vcagtq_v2.i
661}
662
663define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) {
664; CHECKLABEL: test_vcale_f16:
665; CHECK-LABEL: test_vcale_f16:
666; CHECK:       @ %bb.0: @ %entry
667; CHECK-NEXT:    vacge.f16 d0, d1, d0
668; CHECK-NEXT:    bx lr
669entry:
670  %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a)
671  ret <4 x i16> %vcale_v2.i
672}
673
674define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) {
675; CHECKLABEL: test_vcaleq_f16:
676; CHECK-LABEL: test_vcaleq_f16:
677; CHECK:       @ %bb.0: @ %entry
678; CHECK-NEXT:    vacge.f16 q0, q1, q0
679; CHECK-NEXT:    bx lr
680entry:
681  %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a)
682  ret <8 x i16> %vcaleq_v2.i
683}
684
685define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) {
686; CHECKLABEL: test_vceq_f16:
687; CHECK-LABEL: test_vceq_f16:
688; CHECK:       @ %bb.0: @ %entry
689; CHECK-NEXT:    vceq.f16 d0, d0, d1
690; CHECK-NEXT:    bx lr
691entry:
692  %cmp.i = fcmp oeq <4 x half> %a, %b
693  %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
694  ret <4 x i16> %sext.i
695}
696
697define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) {
698; CHECKLABEL: test_vceqq_f16:
699; CHECK-LABEL: test_vceqq_f16:
700; CHECK:       @ %bb.0: @ %entry
701; CHECK-NEXT:    vceq.f16 q0, q0, q1
702; CHECK-NEXT:    bx lr
703entry:
704  %cmp.i = fcmp oeq <8 x half> %a, %b
705  %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
706  ret <8 x i16> %sext.i
707}
708
709define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) {
710; CHECKLABEL: test_vcge_f16:
711; CHECK-LABEL: test_vcge_f16:
712; CHECK:       @ %bb.0: @ %entry
713; CHECK-NEXT:    vcge.f16 d0, d0, d1
714; CHECK-NEXT:    bx lr
715entry:
716  %cmp.i = fcmp oge <4 x half> %a, %b
717  %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
718  ret <4 x i16> %sext.i
719}
720
721define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) {
722; CHECKLABEL: test_vcgeq_f16:
723; CHECK-LABEL: test_vcgeq_f16:
724; CHECK:       @ %bb.0: @ %entry
725; CHECK-NEXT:    vcge.f16 q0, q0, q1
726; CHECK-NEXT:    bx lr
727entry:
728  %cmp.i = fcmp oge <8 x half> %a, %b
729  %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
730  ret <8 x i16> %sext.i
731}
732
733define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) {
734; CHECKLABEL: test_vcgt_f16:
735; CHECK-LABEL: test_vcgt_f16:
736; CHECK:       @ %bb.0: @ %entry
737; CHECK-NEXT:    vcgt.f16 d0, d0, d1
738; CHECK-NEXT:    bx lr
739entry:
740  %cmp.i = fcmp ogt <4 x half> %a, %b
741  %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
742  ret <4 x i16> %sext.i
743}
744
745define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) {
746; CHECKLABEL: test_vcgtq_f16:
747; CHECK-LABEL: test_vcgtq_f16:
748; CHECK:       @ %bb.0: @ %entry
749; CHECK-NEXT:    vcgt.f16 q0, q0, q1
750; CHECK-NEXT:    bx lr
751entry:
752  %cmp.i = fcmp ogt <8 x half> %a, %b
753  %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
754  ret <8 x i16> %sext.i
755}
756
757define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) {
758; CHECKLABEL: test_vcle_f16:
759; CHECK-LABEL: test_vcle_f16:
760; CHECK:       @ %bb.0: @ %entry
761; CHECK-NEXT:    vcge.f16 d0, d1, d0
762; CHECK-NEXT:    bx lr
763entry:
764  %cmp.i = fcmp ole <4 x half> %a, %b
765  %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
766  ret <4 x i16> %sext.i
767}
768
769define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) {
770; CHECKLABEL: test_vcleq_f16:
771; CHECK-LABEL: test_vcleq_f16:
772; CHECK:       @ %bb.0: @ %entry
773; CHECK-NEXT:    vcge.f16 q0, q1, q0
774; CHECK-NEXT:    bx lr
775entry:
776  %cmp.i = fcmp ole <8 x half> %a, %b
777  %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
778  ret <8 x i16> %sext.i
779}
780
781define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) {
782; CHECKLABEL: test_vclt_f16:
783; CHECK-LABEL: test_vclt_f16:
784; CHECK:       @ %bb.0: @ %entry
785; CHECK-NEXT:    vcgt.f16 d0, d1, d0
786; CHECK-NEXT:    bx lr
787entry:
788  %cmp.i = fcmp olt <4 x half> %a, %b
789  %sext.i = sext <4 x i1> %cmp.i to <4 x i16>
790  ret <4 x i16> %sext.i
791}
792
793define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) {
794; CHECKLABEL: test_vcltq_f16:
795; CHECK-LABEL: test_vcltq_f16:
796; CHECK:       @ %bb.0: @ %entry
797; CHECK-NEXT:    vcgt.f16 q0, q1, q0
798; CHECK-NEXT:    bx lr
799entry:
800  %cmp.i = fcmp olt <8 x half> %a, %b
801  %sext.i = sext <8 x i1> %cmp.i to <8 x i16>
802  ret <8 x i16> %sext.i
803}
804
805define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) {
806; CHECKLABEL: test_vcvt_n_f16_s16:
807; CHECK-LABEL: test_vcvt_n_f16_s16:
808; CHECK:       @ %bb.0: @ %entry
809; CHECK-NEXT:    vcvt.f16.s16 d0, d0, #2
810; CHECK-NEXT:    bx lr
811entry:
812  %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
813  ret <4 x half> %vcvt_n1
814}
815
816declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2
817
818define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) {
819; CHECKLABEL: test_vcvtq_n_f16_s16:
820; CHECK-LABEL: test_vcvtq_n_f16_s16:
821; CHECK:       @ %bb.0: @ %entry
822; CHECK-NEXT:    vcvt.f16.s16 q0, q0, #2
823; CHECK-NEXT:    bx lr
824entry:
825  %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
826  ret <8 x half> %vcvt_n1
827}
828
829declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2
830
831define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) {
832; CHECKLABEL: test_vcvt_n_f16_u16:
833; CHECK-LABEL: test_vcvt_n_f16_u16:
834; CHECK:       @ %bb.0: @ %entry
835; CHECK-NEXT:    vcvt.f16.u16 d0, d0, #2
836; CHECK-NEXT:    bx lr
837entry:
838  %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2)
839  ret <4 x half> %vcvt_n1
840}
841
842declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2
843
844define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) {
845; CHECKLABEL: test_vcvtq_n_f16_u16:
846; CHECK-LABEL: test_vcvtq_n_f16_u16:
847; CHECK:       @ %bb.0: @ %entry
848; CHECK-NEXT:    vcvt.f16.u16 q0, q0, #2
849; CHECK-NEXT:    bx lr
850entry:
851  %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2)
852  ret <8 x half> %vcvt_n1
853}
854
855declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2
856
857define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) {
858; CHECKLABEL: test_vcvt_n_s16_f16:
859; CHECK-LABEL: test_vcvt_n_s16_f16:
860; CHECK:       @ %bb.0: @ %entry
861; CHECK-NEXT:    vcvt.s16.f16 d0, d0, #2
862; CHECK-NEXT:    bx lr
863entry:
864  %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2)
865  ret <4 x i16> %vcvt_n1
866}
867
868declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2
869
870define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) {
871; CHECKLABEL: test_vcvtq_n_s16_f16:
872; CHECK-LABEL: test_vcvtq_n_s16_f16:
873; CHECK:       @ %bb.0: @ %entry
874; CHECK-NEXT:    vcvt.s16.f16 q0, q0, #2
875; CHECK-NEXT:    bx lr
876entry:
877  %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2)
878  ret <8 x i16> %vcvt_n1
879}
880
881declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2
882
883define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) {
884; CHECKLABEL: test_vcvt_n_u16_f16:
885; CHECK-LABEL: test_vcvt_n_u16_f16:
886; CHECK:       @ %bb.0: @ %entry
887; CHECK-NEXT:    vcvt.u16.f16 d0, d0, #2
888; CHECK-NEXT:    bx lr
889entry:
890  %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2)
891  ret <4 x i16> %vcvt_n1
892}
893
894declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2
895
896define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) {
897; CHECKLABEL: test_vcvtq_n_u16_f16:
898; CHECK-LABEL: test_vcvtq_n_u16_f16:
899; CHECK:       @ %bb.0: @ %entry
900; CHECK-NEXT:    vcvt.u16.f16 q0, q0, #2
901; CHECK-NEXT:    bx lr
902entry:
903  %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2)
904  ret <8 x i16> %vcvt_n1
905}
906
907declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2
908
909define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) {
910; CHECKLABEL: test_vmax_f16:
911; CHECK-LABEL: test_vmax_f16:
912; CHECK:       @ %bb.0: @ %entry
913; CHECK-NEXT:    vmax.f16 d0, d0, d1
914; CHECK-NEXT:    bx lr
915entry:
916  %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b)
917  ret <4 x half> %vmax_v2.i
918}
919
920define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) {
921; CHECKLABEL: test_vmaxq_f16:
922; CHECK-LABEL: test_vmaxq_f16:
923; CHECK:       @ %bb.0: @ %entry
924; CHECK-NEXT:    vmax.f16 q0, q0, q1
925; CHECK-NEXT:    bx lr
926entry:
927  %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b)
928  ret <8 x half> %vmaxq_v2.i
929}
930
931define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) {
932; CHECK-LABEL: test_vmaxnm_f16:
933; CHECK:       @ %bb.0: @ %entry
934; CHECK-NEXT:    vmaxnm.f16 d0, d0, d1
935; CHECK-NEXT:    bx lr
936entry:
937  %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b)
938  ret <4 x half> %vmaxnm_v2.i
939}
940
941define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) {
942; CHECK-LABEL: test_vmaxnmq_f16:
943; CHECK:       @ %bb.0: @ %entry
944; CHECK-NEXT:    vmaxnm.f16 q0, q0, q1
945; CHECK-NEXT:    bx lr
946entry:
947  %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b)
948  ret <8 x half> %vmaxnmq_v2.i
949}
950
951define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) {
952; CHECK-LABEL: test_vmin_f16:
953; CHECK:       @ %bb.0: @ %entry
954; CHECK-NEXT:    vmin.f16 d0, d0, d1
955; CHECK-NEXT:    bx lr
956entry:
957  %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b)
958  ret <4 x half> %vmin_v2.i
959}
960
961define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) {
962; CHECK-LABEL: test_vminq_f16:
963; CHECK:       @ %bb.0: @ %entry
964; CHECK-NEXT:    vmin.f16 q0, q0, q1
965; CHECK-NEXT:    bx lr
966entry:
967  %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b)
968  ret <8 x half> %vminq_v2.i
969}
970
971define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) {
972; CHECK-LABEL: test_vminnm_f16:
973; CHECK:       @ %bb.0: @ %entry
974; CHECK-NEXT:    vminnm.f16 d0, d0, d1
975; CHECK-NEXT:    bx lr
976entry:
977  %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b)
978  ret <4 x half> %vminnm_v2.i
979}
980
981define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) {
982; CHECK-LABEL: test_vminnmq_f16:
983; CHECK:       @ %bb.0: @ %entry
984; CHECK-NEXT:    vminnm.f16 q0, q0, q1
985; CHECK-NEXT:    bx lr
986entry:
987  %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b)
988  ret <8 x half> %vminnmq_v2.i
989}
990
991define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) {
992; CHECKLABEL: test_vmul_f16:
993; CHECK-LABEL: test_vmul_f16:
994; CHECK:       @ %bb.0: @ %entry
995; CHECK-NEXT:    vmul.f16 d0, d0, d1
996; CHECK-NEXT:    bx lr
997entry:
998  %mul.i = fmul <4 x half> %a, %b
999  ret <4 x half> %mul.i
1000}
1001
1002define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) {
1003; CHECKLABEL: test_vmulq_f16:
1004; CHECK-LABEL: test_vmulq_f16:
1005; CHECK:       @ %bb.0: @ %entry
1006; CHECK-NEXT:    vmul.f16 q0, q0, q1
1007; CHECK-NEXT:    bx lr
1008entry:
1009  %mul.i = fmul <8 x half> %a, %b
1010  ret <8 x half> %mul.i
1011}
1012
1013define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) {
1014; CHECKLABEL: test_vpadd_f16:
1015; CHECK-LABEL: test_vpadd_f16:
1016; CHECK:       @ %bb.0: @ %entry
1017; CHECK-NEXT:    vpadd.f16 d0, d0, d1
1018; CHECK-NEXT:    bx lr
1019entry:
1020  %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b)
1021  ret <4 x half> %vpadd_v2.i
1022}
1023
1024define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) {
1025; CHECKLABEL: test_vpmax_f16:
1026; CHECK-LABEL: test_vpmax_f16:
1027; CHECK:       @ %bb.0: @ %entry
1028; CHECK-NEXT:    vpmax.f16 d0, d0, d1
1029; CHECK-NEXT:    bx lr
1030entry:
1031  %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b)
1032  ret <4 x half> %vpmax_v2.i
1033}
1034
1035define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) {
1036; CHECKLABEL: test_vpmin_f16:
1037; CHECK-LABEL: test_vpmin_f16:
1038; CHECK:       @ %bb.0: @ %entry
1039; CHECK-NEXT:    vpmin.f16 d0, d0, d1
1040; CHECK-NEXT:    bx lr
1041entry:
1042  %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b)
1043  ret <4 x half> %vpmin_v2.i
1044}
1045
1046define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) {
1047; CHECKLABEL: test_vrecps_f16:
1048; CHECK-LABEL: test_vrecps_f16:
1049; CHECK:       @ %bb.0: @ %entry
1050; CHECK-NEXT:    vrecps.f16 d0, d0, d1
1051; CHECK-NEXT:    bx lr
1052entry:
1053  %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b)
1054  ret <4 x half> %vrecps_v2.i
1055}
1056
1057define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) {
1058; CHECKLABEL: test_vrecpsq_f16:
1059; CHECK-LABEL: test_vrecpsq_f16:
1060; CHECK:       @ %bb.0: @ %entry
1061; CHECK-NEXT:    vrecps.f16 q0, q0, q1
1062; CHECK-NEXT:    bx lr
1063entry:
1064  %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b)
1065  ret <8 x half> %vrecpsq_v2.i
1066}
1067
1068define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) {
1069; CHECKLABEL: test_vrsqrts_f16:
1070; CHECK-LABEL: test_vrsqrts_f16:
1071; CHECK:       @ %bb.0: @ %entry
1072; CHECK-NEXT:    vrsqrts.f16 d0, d0, d1
1073; CHECK-NEXT:    bx lr
1074entry:
1075  %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b)
1076  ret <4 x half> %vrsqrts_v2.i
1077}
1078
1079define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) {
1080; CHECKLABEL: test_vrsqrtsq_f16:
1081; CHECK-LABEL: test_vrsqrtsq_f16:
1082; CHECK:       @ %bb.0: @ %entry
1083; CHECK-NEXT:    vrsqrts.f16 q0, q0, q1
1084; CHECK-NEXT:    bx lr
1085entry:
1086  %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b)
1087  ret <8 x half> %vrsqrtsq_v2.i
1088}
1089
1090define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) {
1091; CHECKLABEL: test_vsub_f16:
1092; CHECK-LABEL: test_vsub_f16:
1093; CHECK:       @ %bb.0: @ %entry
1094; CHECK-NEXT:    vsub.f16 d0, d0, d1
1095; CHECK-NEXT:    bx lr
1096entry:
1097  %sub.i = fsub <4 x half> %a, %b
1098  ret <4 x half> %sub.i
1099}
1100
1101define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) {
1102; CHECKLABEL: test_vsubq_f16:
1103; CHECK-LABEL: test_vsubq_f16:
1104; CHECK:       @ %bb.0: @ %entry
1105; CHECK-NEXT:    vsub.f16 q0, q0, q1
1106; CHECK-NEXT:    bx lr
1107entry:
1108  %sub.i = fsub <8 x half> %a, %b
1109  ret <8 x half> %sub.i
1110}
1111
1112define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1113; CHECK-LABEL: test_vfma_f16:
1114; CHECK:       @ %bb.0: @ %entry
1115; CHECK-NEXT:    vfma.f16 d0, d1, d2
1116; CHECK-NEXT:    bx lr
1117entry:
1118  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
1119  ret <4 x half> %0
1120}
1121
1122define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1123; CHECK-LABEL: test_vfmaq_f16:
1124; CHECK:       @ %bb.0: @ %entry
1125; CHECK-NEXT:    vfma.f16 q0, q1, q2
1126; CHECK-NEXT:    bx lr
1127entry:
1128  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
1129  ret <8 x half> %0
1130}
1131
1132define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
1133; CHECK-LABEL: test_vfms_f16:
1134; CHECK:       @ %bb.0: @ %entry
1135; CHECK-NEXT:    vneg.f16 d16, d1
1136; CHECK-NEXT:    vfma.f16 d0, d16, d2
1137; CHECK-NEXT:    bx lr
1138entry:
1139  %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
1140  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
1141  ret <4 x half> %0
1142}
1143
1144define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
1145; CHECK-LABEL: test_vfmsq_f16:
1146; CHECK:       @ %bb.0: @ %entry
1147; CHECK-NEXT:    vneg.f16 q8, q1
1148; CHECK-NEXT:    vfma.f16 q0, q8, q2
1149; CHECK-NEXT:    bx lr
1150entry:
1151  %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
1152  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
1153  ret <8 x half> %0
1154}
1155
1156define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
1157; CHECK-LABEL: test_vmul_lane_f16:
1158; CHECK:       @ %bb.0: @ %entry
1159; CHECK-NEXT:    vmul.f16 d0, d0, d1[3]
1160; CHECK-NEXT:    bx lr
1161entry:
1162  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1163  %mul = fmul <4 x half> %shuffle, %a
1164  ret <4 x half> %mul
1165}
1166
1167define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) {
1168; CHECK-LABEL: test_vmulq_lane_f16:
1169; CHECK:       @ %bb.0: @ %entry
1170; CHECK-NEXT:    @ kill: def $d2 killed $d2 def $q1
1171; CHECK-NEXT:    vmul.f16 q0, q0, d2[3]
1172; CHECK-NEXT:    bx lr
1173entry:
1174  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1175  %mul = fmul <8 x half> %shuffle, %a
1176  ret <8 x half> %mul
1177}
1178
1179define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) {
1180; CHECK-LABEL: test_vmul_n_f16:
1181; CHECK:       @ %bb.0: @ %entry
1182; CHECK-NEXT:    @ kill: def $s2 killed $s2 def $d1
1183; CHECK-NEXT:    vmul.f16 d0, d0, d1[0]
1184; CHECK-NEXT:    bx lr
1185entry:
1186  %0 = bitcast float %b.coerce to i32
1187  %tmp.0.extract.trunc = trunc i32 %0 to i16
1188  %1 = bitcast i16 %tmp.0.extract.trunc to half
1189  %vecinit = insertelement <4 x half> undef, half %1, i32 0
1190  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1191  %mul = fmul <4 x half> %vecinit4, %a
1192  ret <4 x half> %mul
1193}
1194
1195define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) {
1196; CHECK-LABEL: test_vmulq_n_f16:
1197; CHECK:       @ %bb.0: @ %entry
1198; CHECK-NEXT:    @ kill: def $s4 killed $s4 def $d2
1199; CHECK-NEXT:    vmul.f16 q0, q0, d2[0]
1200; CHECK-NEXT:    bx lr
1201entry:
1202  %0 = bitcast float %b.coerce to i32
1203  %tmp.0.extract.trunc = trunc i32 %0 to i16
1204  %1 = bitcast i16 %tmp.0.extract.trunc to half
1205  %vecinit = insertelement <8 x half> undef, half %1, i32 0
1206  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1207  %mul = fmul <8 x half> %vecinit8, %a
1208  ret <8 x half> %mul
1209}
1210
1211define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) {
1212; CHECKLABEL: test_vbsl_f16:
1213; CHECK-LABEL: test_vbsl_f16:
1214; CHECK:       @ %bb.0: @ %entry
1215; CHECK-NEXT:    vbsl d0, d1, d2
1216; CHECK-NEXT:    bx lr
1217entry:
1218  %0 = bitcast <4 x i16> %a to <8 x i8>
1219  %1 = bitcast <4 x half> %b to <8 x i8>
1220  %2 = bitcast <4 x half> %c to <8 x i8>
1221  %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2)
1222  %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half>
1223  ret <4 x half> %3
1224}
1225
1226define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) {
1227; CHECKLABEL: test_vbslq_f16:
1228; CHECK-LABEL: test_vbslq_f16:
1229; CHECK:       @ %bb.0: @ %entry
1230; CHECK-NEXT:    vbsl q0, q1, q2
1231; CHECK-NEXT:    bx lr
1232entry:
1233  %0 = bitcast <8 x i16> %a to <16 x i8>
1234  %1 = bitcast <8 x half> %b to <16 x i8>
1235  %2 = bitcast <8 x half> %c to <16 x i8>
1236  %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
1237  %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half>
1238  ret <8 x half> %3
1239}
1240
1241define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) {
1242; CHECK-LABEL: test_vzip_f16:
1243; CHECK:       @ %bb.0: @ %entry
1244; CHECK-NEXT:    vzip.16 d0, d1
1245; CHECK-NEXT:    bx lr
1246entry:
1247  %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1248  %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1249  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0
1250  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1
1251  ret %struct.float16x4x2_t %.fca.0.1.insert
1252}
1253
1254define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) {
1255; CHECK-LABEL: test_vzipq_f16:
1256; CHECK:       @ %bb.0: @ %entry
1257; CHECK-NEXT:    vzip.16 q0, q1
1258; CHECK-NEXT:    bx lr
1259entry:
1260  %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1261  %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1262  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0
1263  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1
1264  ret %struct.float16x8x2_t %.fca.0.1.insert
1265}
1266
1267define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) {
1268; CHECK-LABEL: test_vuzp_f16:
1269; CHECK:       @ %bb.0: @ %entry
1270; CHECK-NEXT:    vuzp.16 d0, d1
1271; CHECK-NEXT:    bx lr
1272entry:
1273  %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1274  %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1275  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0
1276  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1
1277  ret %struct.float16x4x2_t %.fca.0.1.insert
1278}
1279
1280define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) {
1281; CHECK-LABEL: test_vuzpq_f16:
1282; CHECK:       @ %bb.0: @ %entry
1283; CHECK-NEXT:    vuzp.16 q0, q1
1284; CHECK-NEXT:    bx lr
1285entry:
1286  %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1287  %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1288  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0
1289  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1
1290  ret %struct.float16x8x2_t %.fca.0.1.insert
1291}
1292
1293define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
1294; CHECK-LABEL: test_vtrn_f16:
1295; CHECK:       @ %bb.0: @ %entry
1296; CHECK-NEXT:    vtrn.16 d0, d1
1297; CHECK-NEXT:    bx lr
1298entry:
1299  %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1300  %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1301  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
1302  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
1303  ret %struct.float16x4x2_t %.fca.0.1.insert
1304}
1305
1306define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
1307; CHECK-LABEL: test_vtrnq_f16:
1308; CHECK:       @ %bb.0: @ %entry
1309; CHECK-NEXT:    vtrn.16 q0, q1
1310; CHECK-NEXT:    bx lr
1311entry:
1312  %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1313  %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1314  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
1315  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
1316  ret %struct.float16x8x2_t %.fca.0.1.insert
1317}
1318
1319define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
1320; CHECK-LABEL: test_vmov_n_f16:
1321; CHECK:       @ %bb.0: @ %entry
1322; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $d0
1323; CHECK-NEXT:    vdup.16 d0, d0[0]
1324; CHECK-NEXT:    bx lr
1325entry:
1326  %0 = bitcast float %a.coerce to i32
1327  %tmp.0.extract.trunc = trunc i32 %0 to i16
1328  %1 = bitcast i16 %tmp.0.extract.trunc to half
1329  %vecinit = insertelement <4 x half> undef, half %1, i32 0
1330  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1331  ret <4 x half> %vecinit4
1332}
1333
1334define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) {
1335; CHECK-LABEL: test_vmovq_n_f16:
1336; CHECK:       @ %bb.0: @ %entry
1337; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $d0
1338; CHECK-NEXT:    vdup.16 q0, d0[0]
1339; CHECK-NEXT:    bx lr
1340entry:
1341  %0 = bitcast float %a.coerce to i32
1342  %tmp.0.extract.trunc = trunc i32 %0 to i16
1343  %1 = bitcast i16 %tmp.0.extract.trunc to half
1344  %vecinit = insertelement <8 x half> undef, half %1, i32 0
1345  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1346  ret <8 x half> %vecinit8
1347}
1348
1349define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) {
1350; CHECK-LABEL: test_vdup_n_f16:
1351; CHECK:       @ %bb.0: @ %entry
1352; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $d0
1353; CHECK-NEXT:    vdup.16 d0, d0[0]
1354; CHECK-NEXT:    bx lr
1355entry:
1356  %0 = bitcast float %a.coerce to i32
1357  %tmp.0.extract.trunc = trunc i32 %0 to i16
1358  %1 = bitcast i16 %tmp.0.extract.trunc to half
1359  %vecinit = insertelement <4 x half> undef, half %1, i32 0
1360  %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
1361  ret <4 x half> %vecinit4
1362}
1363
1364define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) {
1365; CHECK-LABEL: test_vdupq_n_f16:
1366; CHECK:       @ %bb.0: @ %entry
1367; CHECK-NEXT:    @ kill: def $s0 killed $s0 def $d0
1368; CHECK-NEXT:    vdup.16 q0, d0[0]
1369; CHECK-NEXT:    bx lr
1370entry:
1371  %0 = bitcast float %a.coerce to i32
1372  %tmp.0.extract.trunc = trunc i32 %0 to i16
1373  %1 = bitcast i16 %tmp.0.extract.trunc to half
1374  %vecinit = insertelement <8 x half> undef, half %1, i32 0
1375  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1376  ret <8 x half> %vecinit8
1377}
1378
1379define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) {
1380; CHECK-LABEL: test_vdup_lane_f16:
1381; CHECK:       @ %bb.0: @ %entry
1382; CHECK-NEXT:    vdup.16 d0, d0[3]
1383; CHECK-NEXT:    bx lr
1384entry:
1385  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
1386  ret <4 x half> %shuffle
1387}
1388
1389define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) {
1390; CHECK-LABEL: test_vdupq_lane_f16:
1391; CHECK:       @ %bb.0: @ %entry
1392; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
1393; CHECK-NEXT:    vdup.16 q0, d0[3]
1394; CHECK-NEXT:    bx lr
1395entry:
1396  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
1397  ret <8 x half> %shuffle
1398}
1399
1400define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
1401; CHECK-LABEL: test_vext_f16:
1402; CHECK:       @ %bb.0: @ %entry
1403; CHECK-NEXT:    vext.16 d0, d0, d1, #2
1404; CHECK-NEXT:    bx lr
1405entry:
1406  %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1407  ret <4 x half> %vext
1408}
1409
1410define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
1411; CHECK-LABEL: test_vextq_f16:
1412; CHECK:       @ %bb.0: @ %entry
1413; CHECK-NEXT:    vext.16 q0, q0, q1, #5
1414; CHECK-NEXT:    bx lr
1415entry:
1416  %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
1417  ret <8 x half> %vext
1418}
1419
1420define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) {
1421; CHECK-LABEL: test_vrev64_f16:
1422; CHECK:       @ %bb.0: @ %entry
1423; CHECK-NEXT:    vrev64.16 d0, d0
1424; CHECK-NEXT:    bx lr
1425entry:
1426  %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1427  ret <4 x half> %shuffle.i
1428}
1429
1430define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) {
1431; CHECK-LABEL: test_vrev64q_f16:
1432; CHECK:       @ %bb.0: @ %entry
1433; CHECK-NEXT:    vrev64.16 q0, q0
1434; CHECK-NEXT:    bx lr
1435entry:
1436  %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
1437  ret <8 x half> %shuffle.i
1438}
1439
1440define <4 x half> @test_vld_dup1_4xhalf(ptr %b) {
1441; CHECK-LABEL: test_vld_dup1_4xhalf:
1442; CHECK:       @ %bb.0: @ %entry
1443; CHECK-NEXT:    vld1.16 {d0[]}, [r0:16]
1444; CHECK-NEXT:    bx lr
1445
1446entry:
1447  %b1 = load half, ptr %b, align 2
1448  %vecinit = insertelement <4 x half> undef, half %b1, i32 0
1449  %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1
1450  %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2
1451  %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3
1452  ret <4 x half> %vecinit4
1453}
1454
1455define <8 x half> @test_vld_dup1_8xhalf(ptr %b) local_unnamed_addr {
1456; CHECK-LABEL: test_vld_dup1_8xhalf:
1457; CHECK:       @ %bb.0: @ %entry
1458; CHECK-NEXT:    vld1.16 {d0[], d1[]}, [r0:16]
1459; CHECK-NEXT:    bx lr
1460
1461entry:
1462  %b1 = load half, ptr %b, align 2
1463  %vecinit = insertelement <8 x half> undef, half %b1, i32 0
1464  %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
1465  ret <8 x half> %vecinit8
1466}
1467
1468define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) {
1469; CHECK-LABEL: test_shufflevector8xhalf:
1470; CHECK:       @ %bb.0: @ %entry
1471; CHECK-NEXT:    @ kill: def $d0 killed $d0 def $q0
1472; CHECK-NEXT:    vmov.f64 d1, d0
1473; CHECK-NEXT:    bx lr
1474
1475entry:
1476  %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1477  ret <8 x half> %r
1478}
1479
1480declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
1481declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
1482declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)
1483declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>)
1484declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>)
1485declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>)
1486declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>)
1487declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>)
1488declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>)
1489declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>)
1490declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>)
1491declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>)
1492declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>)
1493declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>)
1494declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>)
1495declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>)
1496declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>)
1497declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>)
1498declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>)
1499declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>)
1500declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>)
1501declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>)
1502declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>)
1503declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>)
1504declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>)
1505declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>)
1506declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>)
1507declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>)
1508declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>)
1509declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>)
1510declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>)
1511declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>)
1512declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>)
1513declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>)
1514declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>)
1515declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>)
1516declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>)
1517declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>)
1518declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>)
1519declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>)
1520declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>)
1521declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>)
1522declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>)
1523declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>)
1524declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>)
1525declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>)
1526declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>)
1527declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>)
1528declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>)
1529declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>)
1530declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>)
1531declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>)
1532declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>)
1533declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>)
1534declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
1535declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1536declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
1537declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
1538declare { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr, <8 x half>, <8 x half>, i32, i32)
1539declare { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr, <4 x half>, <4 x half>, i32, i32)
1540declare { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1541declare { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1542declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1543declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1544declare void @llvm.arm.neon.vst2lane.p0.v8f16(ptr, <8 x half>, <8 x half>, i32, i32)
1545declare void @llvm.arm.neon.vst2lane.p0.v4f16(ptr, <4 x half>, <4 x half>, i32, i32)
1546declare void @llvm.arm.neon.vst3lane.p0.v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1547declare void @llvm.arm.neon.vst3lane.p0.v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1548declare void @llvm.arm.neon.vst4lane.p0.v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32)
1549declare void @llvm.arm.neon.vst4lane.p0.v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32)
1550
1551define { <8 x half>, <8 x half> } @test_vld2q_lane_f16(ptr, <8 x half>, <8 x half>) {
1552; CHECK-LABEL: test_vld2q_lane_f16:
1553; CHECK:       @ %bb.0: @ %entry
1554; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1555; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1556; CHECK-NEXT:    vld2.16 {d1[3], d3[3]}, [r0]
1557; CHECK-NEXT:    bx lr
1558entry:
1559  %3 = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, i32 7, i32 2)
1560  ret { <8 x half>, <8 x half> } %3
1561}
1562
1563define { <4 x half>, <4 x half> } @test_vld2_lane_f16(ptr, <4 x half>, <4 x half>) {
1564; CHECK-LABEL: test_vld2_lane_f16:
1565; CHECK:       @ %bb.0: @ %entry
1566; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0 def $q0
1567; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0 def $q0
1568; CHECK-NEXT:    vld2.16 {d0[3], d1[3]}, [r0]
1569; CHECK-NEXT:    bx lr
1570entry:
1571  %3 = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2)
1572  ret { <4 x half>, <4 x half> } %3
1573}
1574
1575define { <8 x half>, <8 x half>, <8 x half> } @test_vld3q_lane_f16(ptr, <8 x half>, <8 x half>, <8 x half>) {
1576; CHECK-LABEL: test_vld3q_lane_f16:
1577; CHECK:       @ %bb.0: @ %entry
1578; CHECK-NEXT:    @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1579; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1580; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1581; CHECK-NEXT:    vld3.16 {d1[3], d3[3], d5[3]}, [r0]
1582; CHECK-NEXT:    bx lr
1583entry:
1584  %4 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 7, i32 2)
1585  ret { <8 x half>, <8 x half>, <8 x half> } %4
1586}
1587
1588define { <4 x half>, <4 x half>, <4 x half> } @test_vld3_lane_f16(ptr, <4 x half>, <4 x half>, <4 x half>) {
1589; CHECK-LABEL: test_vld3_lane_f16:
1590; CHECK:       @ %bb.0: @ %entry
1591; CHECK-NEXT:    @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1592; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1593; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1594; CHECK-NEXT:    vld3.16 {d0[3], d1[3], d2[3]}, [r0]
1595; CHECK-NEXT:    bx lr
1596entry:
1597  %4 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2)
1598  ret { <4 x half>, <4 x half>, <4 x half> } %4
1599}
1600define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @test_vld4lane_v8f16_p0i8(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1601; CHECK-LABEL: test_vld4lane_v8f16_p0i8:
1602; CHECK:       @ %bb.0: @ %entry
1603; CHECK-NEXT:    @ kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1604; CHECK-NEXT:    @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1605; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1606; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1607; CHECK-NEXT:    vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0]
1608; CHECK-NEXT:    bx lr
1609entry:
1610  %5 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 7, i32 2)
1611  ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %5
1612}
1613define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @test_vld4lane_v4f16_p0i8(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1614; CHECK-LABEL: test_vld4lane_v4f16_p0i8:
1615; CHECK:       @ %bb.0: @ %entry
1616; CHECK-NEXT:    @ kill: def $d3 killed $d3 killed $q0_q1 def $q0_q1
1617; CHECK-NEXT:    @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1618; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1619; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1620; CHECK-NEXT:    vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0]
1621; CHECK-NEXT:    bx lr
1622entry:
1623 %5 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 3, i32 2)
1624 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %5
1625}
1626define void @test_vst2lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>) {
1627; CHECK-LABEL: test_vst2lane_p0i8_v8f16:
1628; CHECK:       @ %bb.0: @ %entry
1629; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1630; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1631; CHECK-NEXT:    vst2.16 {d0[0], d2[0]}, [r0]
1632; CHECK-NEXT:    bx lr
1633entry:
1634  tail call void @llvm.arm.neon.vst2lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, i32 0, i32 1)
1635  ret void
1636}
1637define void @test_vst2lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>) {
1638; CHECK-LABEL: test_vst2lane_p0i8_v4f16:
1639; CHECK:       @ %bb.0: @ %entry
1640; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0 def $q0
1641; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0 def $q0
1642; CHECK-NEXT:    vst2.16 {d0[0], d1[0]}, [r0:32]
1643; CHECK-NEXT:    bx lr
1644entry:
1645  tail call void @llvm.arm.neon.vst2lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, i32 0, i32 0)
1646  ret void
1647}
1648define void @test_vst3lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>, <8 x half>) {
1649; CHECK-LABEL: test_vst3lane_p0i8_v8f16:
1650; CHECK:       @ %bb.0: @ %entry
1651; CHECK-NEXT:    @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1652; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1653; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1654; CHECK-NEXT:    vst3.16 {d0[0], d2[0], d4[0]}, [r0]
1655; CHECK-NEXT:    bx lr
1656entry:
1657  tail call void @llvm.arm.neon.vst3lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 0, i32 0)
1658  ret void
1659}
1660define void @test_vst3lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>, <4 x half>) {
1661; CHECK-LABEL: test_vst3lane_p0i8_v4f16:
1662; CHECK:       @ %bb.0: @ %entry
1663; CHECK-NEXT:    @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1664; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1665; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1666; CHECK-NEXT:    vst3.16 {d0[0], d1[0], d2[0]}, [r0]
1667; CHECK-NEXT:    bx lr
1668entry:
1669  tail call void @llvm.arm.neon.vst3lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 0, i32 0)
1670  ret void
1671}
1672define void @test_vst4lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>) {
1673; CHECK-LABEL: test_vst4lane_p0i8_v8f16:
1674; CHECK:       @ %bb.0: @ %entry
1675; CHECK-NEXT:    @ kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1676; CHECK-NEXT:    @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1677; CHECK-NEXT:    @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1678; CHECK-NEXT:    @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
1679; CHECK-NEXT:    vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0:64]
1680; CHECK-NEXT:    bx lr
1681entry:
1682  tail call void @llvm.arm.neon.vst4lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 0, i32 0)
1683  ret void
1684}
1685define void @test_vst4lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>) {
1686; CHECK-LABEL: test_vst4lane_p0i8_v4f16:
1687; CHECK:       @ %bb.0: @ %entry
1688; CHECK-NEXT:    @ kill: def $d3 killed $d3 killed $q0_q1 def $q0_q1
1689; CHECK-NEXT:    @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1
1690; CHECK-NEXT:    @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
1691; CHECK-NEXT:    @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1692; CHECK-NEXT:    vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0:64]
1693; CHECK-NEXT:    bx lr
1694entry:
1695  tail call void @llvm.arm.neon.vst4lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 0, i32 0)
1696  ret void
1697}
1698