xref: /llvm-project/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll (revision 3242e7784190b6e544ec4018706fa5923a20a98e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple armeb-eabi -mattr=armv8.2-a,neon,fullfp16 -target-abi=aapcs-gnu -float-abi hard -o - %s | FileCheck %s
3
4;64 bit conversions to v4f16
5define void @conv_i64_to_v4f16( i64 %val, ptr %store ) {
6; CHECK-LABEL: conv_i64_to_v4f16:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vmov d16, r1, r0
9; CHECK-NEXT:    vldr d17, [r2]
10; CHECK-NEXT:    vrev64.16 d16, d16
11; CHECK-NEXT:    vrev64.16 d17, d17
12; CHECK-NEXT:    vadd.f16 d16, d16, d17
13; CHECK-NEXT:    vrev64.16 d16, d16
14; CHECK-NEXT:    vstr d16, [r2]
15; CHECK-NEXT:    bx lr
16entry:
17  %v = bitcast i64 %val to <4 x half>
18  %w = load <4 x half>, ptr %store
19  %a = fadd <4 x half> %v, %w
20  store <4 x half> %a, ptr %store
21  ret void
22}
23
24define void @conv_f64_to_v4f16( double %val, ptr %store ) {
25; CHECK-LABEL: conv_f64_to_v4f16:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vldr d16, [r0]
28; CHECK-NEXT:    vrev64.16 d17, d0
29; CHECK-NEXT:    vrev64.16 d16, d16
30; CHECK-NEXT:    vadd.f16 d16, d17, d16
31; CHECK-NEXT:    vrev64.16 d16, d16
32; CHECK-NEXT:    vstr d16, [r0]
33; CHECK-NEXT:    bx lr
34entry:
35  %v = bitcast double %val to <4 x half>
36  %w = load <4 x half>, ptr %store
37  %a = fadd <4 x half> %v, %w
38  store <4 x half> %a, ptr %store
39  ret void
40}
41
42define void @conv_v2f32_to_v4f16( <2 x float> %a, ptr %store ) {
43; CHECK-LABEL: conv_v2f32_to_v4f16:
44; CHECK:       @ %bb.0: @ %entry
45; CHECK-NEXT:    vldr d16, .LCPI2_0
46; CHECK-NEXT:    vrev64.32 d17, d0
47; CHECK-NEXT:    vrev64.32 d16, d16
48; CHECK-NEXT:    vadd.f32 d16, d17, d16
49; CHECK-NEXT:    vldr d17, [r0]
50; CHECK-NEXT:    vrev64.16 d17, d17
51; CHECK-NEXT:    vrev32.16 d16, d16
52; CHECK-NEXT:    vadd.f16 d16, d16, d17
53; CHECK-NEXT:    vrev64.16 d16, d16
54; CHECK-NEXT:    vstr d16, [r0]
55; CHECK-NEXT:    bx lr
56; CHECK-NEXT:    .p2align 3
57; CHECK-NEXT:  @ %bb.1:
58; CHECK-NEXT:  .LCPI2_0:
59; CHECK-NEXT:    .long 0xbf800000 @ float -1
60; CHECK-NEXT:    .long 0x3f800000 @ float 1
61entry:
62  %c = fadd <2 x float> %a, <float -1.0, float 1.0>
63  %v = bitcast <2 x float> %c to <4 x half>
64  %w = load <4 x half>, ptr %store
65  %z = fadd <4 x half> %v, %w
66  store <4 x half> %z, ptr %store
67  ret void
68}
69
70define void @conv_v2i32_to_v4f16( <2 x i32> %a, ptr %store ) {
71; CHECK-LABEL: conv_v2i32_to_v4f16:
72; CHECK:       @ %bb.0: @ %entry
73; CHECK-NEXT:    vldr d16, .LCPI3_0
74; CHECK-NEXT:    vrev64.32 d17, d0
75; CHECK-NEXT:    vrev64.32 d16, d16
76; CHECK-NEXT:    vadd.i32 d16, d17, d16
77; CHECK-NEXT:    vldr d18, [r0]
78; CHECK-NEXT:    vrev64.16 d17, d18
79; CHECK-NEXT:    vrev32.16 d16, d16
80; CHECK-NEXT:    vadd.f16 d16, d16, d17
81; CHECK-NEXT:    vrev64.16 d16, d16
82; CHECK-NEXT:    vstr d16, [r0]
83; CHECK-NEXT:    bx lr
84; CHECK-NEXT:    .p2align 3
85; CHECK-NEXT:  @ %bb.1:
86; CHECK-NEXT:  .LCPI3_0:
87; CHECK-NEXT:    .long 1 @ 0x1
88; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
89entry:
90  %c = add <2 x i32> %a, <i32 1, i32 -1>
91  %v = bitcast <2 x i32> %c to <4 x half>
92  %w = load <4 x half>, ptr %store
93  %z = fadd <4 x half> %v, %w
94  store <4 x half> %z, ptr %store
95  ret void
96}
97
98define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) {
99; CHECK-LABEL: conv_v4i16_to_v4f16:
100; CHECK:       @ %bb.0: @ %entry
101; CHECK-NEXT:    vmov.i64 d16, #0xffff00000000ffff
102; CHECK-NEXT:    vldr d17, [r0]
103; CHECK-NEXT:    vrev64.16 d18, d0
104; CHECK-NEXT:    vadd.i16 d16, d18, d16
105; CHECK-NEXT:    vrev64.16 d17, d17
106; CHECK-NEXT:    vadd.f16 d16, d16, d17
107; CHECK-NEXT:    vrev64.16 d16, d16
108; CHECK-NEXT:    vstr d16, [r0]
109; CHECK-NEXT:    bx lr
110entry:
111  %c = add <4 x i16> %a, <i16 -1, i16 0, i16 0, i16 -1>
112  %v = bitcast <4 x i16> %c to <4 x half>
113  %w = load <4 x half>, ptr %store
114  %z = fadd <4 x half> %v, %w
115  store <4 x half> %z, ptr %store
116  ret void
117}
118
119define void @conv_v8i8_to_v4f16( <8 x i8> %a, ptr %store ) {
120; CHECK-LABEL: conv_v8i8_to_v4f16:
121; CHECK:       @ %bb.0: @ %entry
122; CHECK-NEXT:    vmov.i8 d16, #0x1
123; CHECK-NEXT:    vrev64.8 d17, d0
124; CHECK-NEXT:    vldr d18, [r0]
125; CHECK-NEXT:    vadd.i8 d16, d17, d16
126; CHECK-NEXT:    vrev64.16 d17, d18
127; CHECK-NEXT:    vrev16.8 d16, d16
128; CHECK-NEXT:    vadd.f16 d16, d16, d17
129; CHECK-NEXT:    vrev64.16 d16, d16
130; CHECK-NEXT:    vstr d16, [r0]
131; CHECK-NEXT:    bx lr
132entry:
133  %c = add <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
134  %v = bitcast <8 x i8> %c to <4 x half>
135  %w = load <4 x half>, ptr %store
136  %z = fadd <4 x half> %v, %w
137  store <4 x half> %z, ptr %store
138  ret void
139}
140
141define void @conv_v2i64_to_v8f16( <2 x i64> %val, ptr %store ) {
142; CHECK-LABEL: conv_v2i64_to_v8f16:
143; CHECK:       @ %bb.0: @ %entry
144; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
145; CHECK-NEXT:    adr r1, .LCPI6_0
146; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
147; CHECK-NEXT:    vadd.i64 q9, q0, q9
148; CHECK-NEXT:    vrev64.16 q8, q8
149; CHECK-NEXT:    vrev64.16 q9, q9
150; CHECK-NEXT:    vadd.f16 q8, q9, q8
151; CHECK-NEXT:    vrev64.16 q8, q8
152; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
153; CHECK-NEXT:    bx lr
154; CHECK-NEXT:    .p2align 4
155; CHECK-NEXT:  @ %bb.1:
156; CHECK-NEXT:  .LCPI6_0:
157; CHECK-NEXT:    .long 0 @ 0x0
158; CHECK-NEXT:    .long 1 @ 0x1
159; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
160; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
161entry:
162  %v = add <2 x i64> %val, <i64 1, i64 -1>
163  %v1 = bitcast <2 x i64> %v to <8 x half>
164  %w = load <8 x half>, ptr %store
165  %a = fadd <8 x half> %v1, %w
166  store <8 x half> %a, ptr %store
167  ret void
168}
169define void @conv_v2f64_to_v8f16( <2 x double> %val, ptr %store ) {
170; CHECK-LABEL: conv_v2f64_to_v8f16:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vmov.f64 d16, #-1.000000e+00
173; CHECK-NEXT:    vmov.f64 d17, #1.000000e+00
174; CHECK-NEXT:    vadd.f64 d19, d1, d16
175; CHECK-NEXT:    vadd.f64 d18, d0, d17
176; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
177; CHECK-NEXT:    vrev64.16 q8, q8
178; CHECK-NEXT:    vrev64.16 q9, q9
179; CHECK-NEXT:    vadd.f16 q8, q9, q8
180; CHECK-NEXT:    vrev64.16 q8, q8
181; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
182; CHECK-NEXT:    bx lr
183entry:
184  %v = fadd <2 x double> %val, <double 1.0, double -1.0>
185  %v1 = bitcast <2 x double> %v to <8 x half>
186  %w = load <8 x half>, ptr %store
187  %a = fadd <8 x half> %v1, %w
188  store <8 x half> %a, ptr %store
189  ret void
190}
191
192define void @conv_v4f32_to_v8f16( <4 x float> %a, ptr %store ) {
193; CHECK-LABEL: conv_v4f32_to_v8f16:
194; CHECK:       @ %bb.0: @ %entry
195; CHECK-NEXT:    adr r1, .LCPI8_0
196; CHECK-NEXT:    vrev64.32 q9, q0
197; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
198; CHECK-NEXT:    vrev64.32 q8, q8
199; CHECK-NEXT:    vadd.f32 q8, q9, q8
200; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
201; CHECK-NEXT:    vrev64.16 q9, q9
202; CHECK-NEXT:    vrev32.16 q8, q8
203; CHECK-NEXT:    vadd.f16 q8, q8, q9
204; CHECK-NEXT:    vrev64.16 q8, q8
205; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
206; CHECK-NEXT:    bx lr
207; CHECK-NEXT:    .p2align 4
208; CHECK-NEXT:  @ %bb.1:
209; CHECK-NEXT:  .LCPI8_0:
210; CHECK-NEXT:    .long 0xbf800000 @ float -1
211; CHECK-NEXT:    .long 0x3f800000 @ float 1
212; CHECK-NEXT:    .long 0xbf800000 @ float -1
213; CHECK-NEXT:    .long 0x3f800000 @ float 1
214entry:
215  %c = fadd <4 x float> %a, <float -1.0, float 1.0, float -1.0, float 1.0>
216  %v = bitcast <4 x float> %c to <8 x half>
217  %w = load <8 x half>, ptr %store
218  %z = fadd <8 x half> %v, %w
219  store <8 x half> %z, ptr %store
220  ret void
221}
222
223define void @conv_v4i32_to_v8f16( <4 x i32> %a, ptr %store ) {
224; CHECK-LABEL: conv_v4i32_to_v8f16:
225; CHECK:       @ %bb.0: @ %entry
226; CHECK-NEXT:    adr r1, .LCPI9_0
227; CHECK-NEXT:    vrev64.32 q9, q0
228; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
229; CHECK-NEXT:    vrev64.32 q8, q8
230; CHECK-NEXT:    vadd.i32 q8, q9, q8
231; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
232; CHECK-NEXT:    vrev64.16 q9, q10
233; CHECK-NEXT:    vrev32.16 q8, q8
234; CHECK-NEXT:    vadd.f16 q8, q8, q9
235; CHECK-NEXT:    vrev64.16 q8, q8
236; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
237; CHECK-NEXT:    bx lr
238; CHECK-NEXT:    .p2align 4
239; CHECK-NEXT:  @ %bb.1:
240; CHECK-NEXT:  .LCPI9_0:
241; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
242; CHECK-NEXT:    .long 1 @ 0x1
243; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
244; CHECK-NEXT:    .long 1 @ 0x1
245entry:
246  %c = add <4 x i32> %a, <i32 -1, i32 1, i32 -1, i32 1>
247  %v = bitcast <4 x i32> %c to <8 x half>
248  %w = load <8 x half>, ptr %store
249  %z = fadd <8 x half> %v, %w
250  store <8 x half> %z, ptr %store
251  ret void
252}
253
254define void @conv_v8i16_to_v8f16( <8 x i16> %a, ptr %store ) {
255; CHECK-LABEL: conv_v8i16_to_v8f16:
256; CHECK:       @ %bb.0: @ %entry
257; CHECK-NEXT:    adr r1, .LCPI10_0
258; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
259; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
260; CHECK-NEXT:    vrev64.16 q10, q0
261; CHECK-NEXT:    vrev64.16 q8, q8
262; CHECK-NEXT:    vrev64.16 q9, q9
263; CHECK-NEXT:    vadd.i16 q8, q10, q8
264; CHECK-NEXT:    vadd.f16 q8, q8, q9
265; CHECK-NEXT:    vrev64.16 q8, q8
266; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
267; CHECK-NEXT:    bx lr
268; CHECK-NEXT:    .p2align 4
269; CHECK-NEXT:  @ %bb.1:
270; CHECK-NEXT:  .LCPI10_0:
271; CHECK-NEXT:    .short 65535 @ 0xffff
272; CHECK-NEXT:    .short 1 @ 0x1
273; CHECK-NEXT:    .short 0 @ 0x0
274; CHECK-NEXT:    .short 7 @ 0x7
275; CHECK-NEXT:    .short 65535 @ 0xffff
276; CHECK-NEXT:    .short 1 @ 0x1
277; CHECK-NEXT:    .short 0 @ 0x0
278; CHECK-NEXT:    .short 7 @ 0x7
279entry:
280  %c = add <8 x i16> %a, <i16 -1, i16 1, i16 0, i16 7, i16 -1, i16 1, i16 0, i16 7>
281  %v = bitcast <8 x i16> %c to <8 x half>
282  %w = load <8 x half>, ptr %store
283  %z = fadd <8 x half> %v, %w
284  store <8 x half> %z, ptr %store
285  ret void
286}
287
288define void @conv_v16i8_to_v8f16( <16 x i8> %a, ptr %store ) {
289; CHECK-LABEL: conv_v16i8_to_v8f16:
290; CHECK:       @ %bb.0: @ %entry
291; CHECK-NEXT:    vrev64.8 q8, q0
292; CHECK-NEXT:    vmov.i8 q9, #0x1
293; CHECK-NEXT:    vadd.i8 q8, q8, q9
294; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
295; CHECK-NEXT:    vrev64.16 q9, q10
296; CHECK-NEXT:    vrev16.8 q8, q8
297; CHECK-NEXT:    vadd.f16 q8, q8, q9
298; CHECK-NEXT:    vrev64.16 q8, q8
299; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
300; CHECK-NEXT:    bx lr
301entry:
302  %c = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
303  %v = bitcast <16 x i8> %c to <8 x half>
304  %w = load <8 x half>, ptr %store
305  %z = fadd <8 x half> %v, %w
306  store <8 x half> %z, ptr %store
307  ret void
308}
309
310define void @conv_v4f16_to_i64( <4 x half> %a, ptr %store ) {
311; CHECK-LABEL: conv_v4f16_to_i64:
312; CHECK:       @ %bb.0: @ %entry
313; CHECK-NEXT:    vldr d16, .LCPI12_0
314; CHECK-NEXT:    vrev64.16 d17, d0
315; CHECK-NEXT:    vrev64.16 d16, d16
316; CHECK-NEXT:    vadd.f16 d16, d17, d16
317; CHECK-NEXT:    vrev64.16 d16, d16
318; CHECK-NEXT:    vmov r1, r2, d16
319; CHECK-NEXT:    subs r1, r1, #1
320; CHECK-NEXT:    sbc r2, r2, #0
321; CHECK-NEXT:    str r2, [r0]
322; CHECK-NEXT:    str r1, [r0, #4]
323; CHECK-NEXT:    bx lr
324; CHECK-NEXT:    .p2align 3
325; CHECK-NEXT:  @ %bb.1:
326; CHECK-NEXT:  .LCPI12_0:
327; CHECK-NEXT:    .short 0xbc00 @ half -1
328; CHECK-NEXT:    .short 0x3c00 @ half 1
329; CHECK-NEXT:    .short 0xbc00 @ half -1
330; CHECK-NEXT:    .short 0x3c00 @ half 1
331entry:
332  %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0>
333  %y = bitcast <4 x half> %z to i64
334  %w = add i64 %y, -1
335  store i64 %w, ptr %store
336  ret void
337}
338
339define void @conv_v4f16_to_f64( <4 x half> %a, ptr %store ) {
340; CHECK-LABEL: conv_v4f16_to_f64:
341; CHECK:       @ %bb.0: @ %entry
342; CHECK-NEXT:    vldr d16, .LCPI13_0
343; CHECK-NEXT:    vrev64.16 d17, d0
344; CHECK-NEXT:    vrev64.16 d16, d16
345; CHECK-NEXT:    vadd.f16 d16, d17, d16
346; CHECK-NEXT:    vmov.f64 d17, #-1.000000e+00
347; CHECK-NEXT:    vrev64.16 d16, d16
348; CHECK-NEXT:    vadd.f64 d16, d16, d17
349; CHECK-NEXT:    vstr d16, [r0]
350; CHECK-NEXT:    bx lr
351; CHECK-NEXT:    .p2align 3
352; CHECK-NEXT:  @ %bb.1:
353; CHECK-NEXT:  .LCPI13_0:
354; CHECK-NEXT:    .short 0xbc00 @ half -1
355; CHECK-NEXT:    .short 0x3c00 @ half 1
356; CHECK-NEXT:    .short 0xbc00 @ half -1
357; CHECK-NEXT:    .short 0x3c00 @ half 1
358entry:
359  %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0>
360  %y = bitcast <4 x half> %z to double
361  %w = fadd double %y, -1.0
362  store double %w, ptr %store
363  ret void
364}
365
366define void @conv_v4f16_to_v2i32( <4 x half> %a, ptr %store ) {
367; CHECK-LABEL: conv_v4f16_to_v2i32:
368; CHECK:       @ %bb.0: @ %entry
369; CHECK-NEXT:    vldr d16, .LCPI14_0
370; CHECK-NEXT:    vrev64.16 d17, d0
371; CHECK-NEXT:    vrev64.16 d16, d16
372; CHECK-NEXT:    vadd.f16 d16, d17, d16
373; CHECK-NEXT:    vldr d17, .LCPI14_1
374; CHECK-NEXT:    vrev64.32 d17, d17
375; CHECK-NEXT:    vrev32.16 d16, d16
376; CHECK-NEXT:    vadd.i32 d16, d16, d17
377; CHECK-NEXT:    vrev64.32 d16, d16
378; CHECK-NEXT:    vstr d16, [r0]
379; CHECK-NEXT:    bx lr
380; CHECK-NEXT:    .p2align 3
381; CHECK-NEXT:  @ %bb.1:
382; CHECK-NEXT:  .LCPI14_0:
383; CHECK-NEXT:    .short 0xbc00 @ half -1
384; CHECK-NEXT:    .short 0x3c00 @ half 1
385; CHECK-NEXT:    .short 0xbc00 @ half -1
386; CHECK-NEXT:    .short 0x3c00 @ half 1
387; CHECK-NEXT:  .LCPI14_1:
388; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
389; CHECK-NEXT:    .long 1 @ 0x1
390entry:
391  %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0>
392  %y = bitcast <4 x half> %z to <2 x i32>
393  %w = add <2 x i32> %y, <i32 -1, i32 1>
394  store <2 x i32> %w, ptr %store
395  ret void
396}
397
398define void @conv_v4f16_to_v2f32( <4 x half> %a, ptr %store ) {
399; CHECK-LABEL: conv_v4f16_to_v2f32:
400; CHECK:       @ %bb.0: @ %entry
401; CHECK-NEXT:    vldr d16, .LCPI15_0
402; CHECK-NEXT:    vrev64.16 d17, d0
403; CHECK-NEXT:    vrev64.16 d16, d16
404; CHECK-NEXT:    vadd.f16 d16, d17, d16
405; CHECK-NEXT:    vldr d17, .LCPI15_1
406; CHECK-NEXT:    vrev64.32 d17, d17
407; CHECK-NEXT:    vrev32.16 d16, d16
408; CHECK-NEXT:    vadd.f32 d16, d16, d17
409; CHECK-NEXT:    vrev64.32 d16, d16
410; CHECK-NEXT:    vstr d16, [r0]
411; CHECK-NEXT:    bx lr
412; CHECK-NEXT:    .p2align 3
413; CHECK-NEXT:  @ %bb.1:
414; CHECK-NEXT:  .LCPI15_0:
415; CHECK-NEXT:    .short 0xbc00 @ half -1
416; CHECK-NEXT:    .short 0x3c00 @ half 1
417; CHECK-NEXT:    .short 0xbc00 @ half -1
418; CHECK-NEXT:    .short 0x3c00 @ half 1
419; CHECK-NEXT:  .LCPI15_1:
420; CHECK-NEXT:    .long 0xbf800000 @ float -1
421; CHECK-NEXT:    .long 0x3f800000 @ float 1
422entry:
423  %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0>
424  %y = bitcast <4 x half> %z to <2 x float>
425  %w = fadd <2 x float> %y, <float -1.0, float 1.0>
426  store <2 x float> %w, ptr %store
427  ret void
428}
429
430define void @conv_v4f16_to_v4i16( <4 x half> %a, ptr %store ) {
431; CHECK-LABEL: conv_v4f16_to_v4i16:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    vldr d16, .LCPI16_0
434; CHECK-NEXT:    vrev64.16 d17, d0
435; CHECK-NEXT:    vrev64.16 d16, d16
436; CHECK-NEXT:    vadd.f16 d16, d17, d16
437; CHECK-NEXT:    vldr d17, .LCPI16_1
438; CHECK-NEXT:    vrev64.16 d17, d17
439; CHECK-NEXT:    vadd.i16 d16, d16, d17
440; CHECK-NEXT:    vrev64.16 d16, d16
441; CHECK-NEXT:    vstr d16, [r0]
442; CHECK-NEXT:    bx lr
443; CHECK-NEXT:    .p2align 3
444; CHECK-NEXT:  @ %bb.1:
445; CHECK-NEXT:  .LCPI16_0:
446; CHECK-NEXT:    .short 0xbc00 @ half -1
447; CHECK-NEXT:    .short 0x3c00 @ half 1
448; CHECK-NEXT:    .short 0xbc00 @ half -1
449; CHECK-NEXT:    .short 0x3c00 @ half 1
450; CHECK-NEXT:  .LCPI16_1:
451; CHECK-NEXT:    .short 65535 @ 0xffff
452; CHECK-NEXT:    .short 1 @ 0x1
453; CHECK-NEXT:    .short 0 @ 0x0
454; CHECK-NEXT:    .short 7 @ 0x7
455entry:
456  %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0>
457  %y = bitcast <4 x half> %z to <4 x i16>
458  %w = add <4 x i16> %y, <i16 -1, i16 1, i16 0, i16 7>
459  store <4 x i16> %w, ptr %store
460  ret void
461}
462
463define void @conv_v4f16_to_v8f8( <4 x half> %a, ptr %store ) {
464; CHECK-LABEL: conv_v4f16_to_v8f8:
465; CHECK:       @ %bb.0: @ %entry
466; CHECK-NEXT:    vldr d16, .LCPI17_0
467; CHECK-NEXT:    vrev64.16 d17, d0
468; CHECK-NEXT:    vrev64.16 d16, d16
469; CHECK-NEXT:    vadd.f16 d16, d17, d16
470; CHECK-NEXT:    vmov.i8 d17, #0x1
471; CHECK-NEXT:    vrev16.8 d16, d16
472; CHECK-NEXT:    vadd.i8 d16, d16, d17
473; CHECK-NEXT:    vrev64.8 d16, d16
474; CHECK-NEXT:    vstr d16, [r0]
475; CHECK-NEXT:    bx lr
476; CHECK-NEXT:    .p2align 3
477; CHECK-NEXT:  @ %bb.1:
478; CHECK-NEXT:  .LCPI17_0:
479; CHECK-NEXT:    .short 0xbc00 @ half -1
480; CHECK-NEXT:    .short 0x3c00 @ half 1
481; CHECK-NEXT:    .short 0xbc00 @ half -1
482; CHECK-NEXT:    .short 0x3c00 @ half 1
483entry:
484  %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0>
485  %y = bitcast <4 x half> %z to <8 x i8>
486  %w = add <8 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
487  store <8 x i8> %w, ptr %store
488  ret void
489}
490
491define void @conv_v8f16_to_i128( <8 x half> %a, ptr %store ) {
492; CHECK-LABEL: conv_v8f16_to_i128:
493; CHECK:       @ %bb.0: @ %entry
494; CHECK-NEXT:    .save {r11, lr}
495; CHECK-NEXT:    push {r11, lr}
496; CHECK-NEXT:    adr r1, .LCPI18_0
497; CHECK-NEXT:    vrev64.16 q9, q0
498; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
499; CHECK-NEXT:    vrev64.16 q8, q8
500; CHECK-NEXT:    vadd.f16 q8, q9, q8
501; CHECK-NEXT:    vrev32.16 q8, q8
502; CHECK-NEXT:    vmov r12, r2, d17
503; CHECK-NEXT:    vmov r3, r1, d16
504; CHECK-NEXT:    subs lr, r2, #1
505; CHECK-NEXT:    sbcs r2, r12, #0
506; CHECK-NEXT:    sbcs r1, r1, #0
507; CHECK-NEXT:    sbc r3, r3, #0
508; CHECK-NEXT:    str r3, [r0]
509; CHECK-NEXT:    stmib r0, {r1, r2, lr}
510; CHECK-NEXT:    pop {r11, pc}
511; CHECK-NEXT:    .p2align 4
512; CHECK-NEXT:  @ %bb.1:
513; CHECK-NEXT:  .LCPI18_0:
514; CHECK-NEXT:    .short 0xbc00 @ half -1
515; CHECK-NEXT:    .short 0x3c00 @ half 1
516; CHECK-NEXT:    .short 0xbc00 @ half -1
517; CHECK-NEXT:    .short 0x3c00 @ half 1
518; CHECK-NEXT:    .short 0xbc00 @ half -1
519; CHECK-NEXT:    .short 0x3c00 @ half 1
520; CHECK-NEXT:    .short 0xbc00 @ half -1
521; CHECK-NEXT:    .short 0x3c00 @ half 1
522entry:
523  %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0>
524  %y = bitcast <8 x half> %z to i128
525  %w = add i128 %y, -1
526  store i128 %w, ptr %store
527  ret void
528}
529
530define void @conv_v8f16_to_v2f64( <8 x half> %a, ptr %store ) {
531; CHECK-LABEL: conv_v8f16_to_v2f64:
532; CHECK:       @ %bb.0: @ %entry
533; CHECK-NEXT:    adr r1, .LCPI19_0
534; CHECK-NEXT:    vrev64.16 q9, q0
535; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
536; CHECK-NEXT:    vrev64.16 q8, q8
537; CHECK-NEXT:    vadd.f16 q8, q9, q8
538; CHECK-NEXT:    vmov.f64 d18, #1.000000e+00
539; CHECK-NEXT:    vrev64.16 q8, q8
540; CHECK-NEXT:    vmov.f64 d19, #-1.000000e+00
541; CHECK-NEXT:    vadd.f64 d21, d17, d18
542; CHECK-NEXT:    vadd.f64 d20, d16, d19
543; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
544; CHECK-NEXT:    bx lr
545; CHECK-NEXT:    .p2align 4
546; CHECK-NEXT:  @ %bb.1:
547; CHECK-NEXT:  .LCPI19_0:
548; CHECK-NEXT:    .short 0xbc00 @ half -1
549; CHECK-NEXT:    .short 0x3c00 @ half 1
550; CHECK-NEXT:    .short 0xbc00 @ half -1
551; CHECK-NEXT:    .short 0x3c00 @ half 1
552; CHECK-NEXT:    .short 0xbc00 @ half -1
553; CHECK-NEXT:    .short 0x3c00 @ half 1
554; CHECK-NEXT:    .short 0xbc00 @ half -1
555; CHECK-NEXT:    .short 0x3c00 @ half 1
556entry:
557  %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0>
558  %y = bitcast <8 x half> %z to <2 x double>
559  %w = fadd <2 x double> %y, <double -1.0, double 1.0>
560  store <2 x double> %w, ptr %store
561  ret void
562}
563
564define void @conv_v8f16_to_v4i32( <8 x half> %a, ptr %store ) {
565; CHECK-LABEL: conv_v8f16_to_v4i32:
566; CHECK:       @ %bb.0: @ %entry
567; CHECK-NEXT:    adr r1, .LCPI20_0
568; CHECK-NEXT:    vrev64.16 q9, q0
569; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
570; CHECK-NEXT:    adr r1, .LCPI20_1
571; CHECK-NEXT:    vrev64.16 q8, q8
572; CHECK-NEXT:    vadd.f16 q8, q9, q8
573; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
574; CHECK-NEXT:    vrev64.32 q9, q9
575; CHECK-NEXT:    vrev32.16 q8, q8
576; CHECK-NEXT:    vadd.i32 q8, q8, q9
577; CHECK-NEXT:    vrev64.32 q8, q8
578; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
579; CHECK-NEXT:    bx lr
580; CHECK-NEXT:    .p2align 4
581; CHECK-NEXT:  @ %bb.1:
582; CHECK-NEXT:  .LCPI20_0:
583; CHECK-NEXT:    .short 0xbc00 @ half -1
584; CHECK-NEXT:    .short 0x3c00 @ half 1
585; CHECK-NEXT:    .short 0xbc00 @ half -1
586; CHECK-NEXT:    .short 0x3c00 @ half 1
587; CHECK-NEXT:    .short 0xbc00 @ half -1
588; CHECK-NEXT:    .short 0x3c00 @ half 1
589; CHECK-NEXT:    .short 0xbc00 @ half -1
590; CHECK-NEXT:    .short 0x3c00 @ half 1
591; CHECK-NEXT:  .LCPI20_1:
592; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
593; CHECK-NEXT:    .long 1 @ 0x1
594; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
595; CHECK-NEXT:    .long 1 @ 0x1
596entry:
597  %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0>
598  %y = bitcast <8 x half> %z to <4 x i32>
599  %w = add <4 x i32> %y, <i32 -1, i32 1, i32 -1, i32 1>
600  store <4 x i32> %w, ptr %store
601  ret void
602}
603
604define void @conv_v8f16_to_v4f32( <8 x half> %a, ptr %store ) {
605; CHECK-LABEL: conv_v8f16_to_v4f32:
606; CHECK:       @ %bb.0: @ %entry
607; CHECK-NEXT:    adr r1, .LCPI21_0
608; CHECK-NEXT:    vrev64.16 q9, q0
609; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
610; CHECK-NEXT:    adr r1, .LCPI21_1
611; CHECK-NEXT:    vrev64.16 q8, q8
612; CHECK-NEXT:    vadd.f16 q8, q9, q8
613; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
614; CHECK-NEXT:    vrev64.32 q9, q9
615; CHECK-NEXT:    vrev32.16 q8, q8
616; CHECK-NEXT:    vadd.f32 q8, q8, q9
617; CHECK-NEXT:    vrev64.32 q8, q8
618; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
619; CHECK-NEXT:    bx lr
620; CHECK-NEXT:    .p2align 4
621; CHECK-NEXT:  @ %bb.1:
622; CHECK-NEXT:  .LCPI21_0:
623; CHECK-NEXT:    .short 0xbc00 @ half -1
624; CHECK-NEXT:    .short 0x3c00 @ half 1
625; CHECK-NEXT:    .short 0xbc00 @ half -1
626; CHECK-NEXT:    .short 0x3c00 @ half 1
627; CHECK-NEXT:    .short 0xbc00 @ half -1
628; CHECK-NEXT:    .short 0x3c00 @ half 1
629; CHECK-NEXT:    .short 0xbc00 @ half -1
630; CHECK-NEXT:    .short 0x3c00 @ half 1
631; CHECK-NEXT:  .LCPI21_1:
632; CHECK-NEXT:    .long 0xbf800000 @ float -1
633; CHECK-NEXT:    .long 0x3f800000 @ float 1
634; CHECK-NEXT:    .long 0xbf800000 @ float -1
635; CHECK-NEXT:    .long 0x3f800000 @ float 1
636entry:
637  %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0>
638  %y = bitcast <8 x half> %z to <4 x float>
639  %w = fadd <4 x float> %y, <float -1.0, float 1.0, float -1.0, float 1.0>
640  store <4 x float> %w, ptr %store
641  ret void
642}
643
644define void @conv_v8f16_to_v8i16( <8 x half> %a, ptr %store ) {
645; CHECK-LABEL: conv_v8f16_to_v8i16:
646; CHECK:       @ %bb.0: @ %entry
647; CHECK-NEXT:    adr r1, .LCPI22_0
648; CHECK-NEXT:    vrev64.16 q9, q0
649; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
650; CHECK-NEXT:    adr r1, .LCPI22_1
651; CHECK-NEXT:    vrev64.16 q8, q8
652; CHECK-NEXT:    vadd.f16 q8, q9, q8
653; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
654; CHECK-NEXT:    vrev64.16 q9, q9
655; CHECK-NEXT:    vadd.i16 q8, q8, q9
656; CHECK-NEXT:    vrev64.16 q8, q8
657; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
658; CHECK-NEXT:    bx lr
659; CHECK-NEXT:    .p2align 4
660; CHECK-NEXT:  @ %bb.1:
661; CHECK-NEXT:  .LCPI22_0:
662; CHECK-NEXT:    .short 0xbc00 @ half -1
663; CHECK-NEXT:    .short 0x3c00 @ half 1
664; CHECK-NEXT:    .short 0xbc00 @ half -1
665; CHECK-NEXT:    .short 0x3c00 @ half 1
666; CHECK-NEXT:    .short 0xbc00 @ half -1
667; CHECK-NEXT:    .short 0x3c00 @ half 1
668; CHECK-NEXT:    .short 0xbc00 @ half -1
669; CHECK-NEXT:    .short 0x3c00 @ half 1
670; CHECK-NEXT:  .LCPI22_1:
671; CHECK-NEXT:    .short 65535 @ 0xffff
672; CHECK-NEXT:    .short 1 @ 0x1
673; CHECK-NEXT:    .short 0 @ 0x0
674; CHECK-NEXT:    .short 7 @ 0x7
675; CHECK-NEXT:    .short 65535 @ 0xffff
676; CHECK-NEXT:    .short 1 @ 0x1
677; CHECK-NEXT:    .short 0 @ 0x0
678; CHECK-NEXT:    .short 7 @ 0x7
679entry:
680  %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0>
681  %y = bitcast <8 x half> %z to <8 x i16>
682  %w = add <8 x i16> %y, <i16 -1, i16 1, i16 0, i16 7, i16 -1, i16 1, i16 0, i16 7>
683  store <8 x i16> %w, ptr %store
684  ret void
685}
686
687define void @conv_v8f16_to_v8f8( <8 x half> %a, ptr %store ) {
688; CHECK-LABEL: conv_v8f16_to_v8f8:
689; CHECK:       @ %bb.0: @ %entry
690; CHECK-NEXT:    adr r1, .LCPI23_0
691; CHECK-NEXT:    vrev64.16 q9, q0
692; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
693; CHECK-NEXT:    vrev64.16 q8, q8
694; CHECK-NEXT:    vadd.f16 q8, q9, q8
695; CHECK-NEXT:    vmov.i8 q9, #0x1
696; CHECK-NEXT:    vrev16.8 q8, q8
697; CHECK-NEXT:    vadd.i8 q8, q8, q9
698; CHECK-NEXT:    vrev64.8 q8, q8
699; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
700; CHECK-NEXT:    bx lr
701; CHECK-NEXT:    .p2align 4
702; CHECK-NEXT:  @ %bb.1:
703; CHECK-NEXT:  .LCPI23_0:
704; CHECK-NEXT:    .short 0xbc00 @ half -1
705; CHECK-NEXT:    .short 0x3c00 @ half 1
706; CHECK-NEXT:    .short 0xbc00 @ half -1
707; CHECK-NEXT:    .short 0x3c00 @ half 1
708; CHECK-NEXT:    .short 0xbc00 @ half -1
709; CHECK-NEXT:    .short 0x3c00 @ half 1
710; CHECK-NEXT:    .short 0xbc00 @ half -1
711; CHECK-NEXT:    .short 0x3c00 @ half 1
712entry:
713  %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0>
714  %y = bitcast <8 x half> %z to <16 x i8>
715  %w = add <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
716  store <16 x i8> %w, ptr %store
717  ret void
718}
719