xref: /llvm-project/llvm/test/CodeGen/ARM/big-endian-vector-callee.ll (revision 371ee32e01a788a6dfc62cb7b10a94b80fe28425)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix SOFT
3; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix HARD
4
5define i64 @test_i64_f64(double %p) {
6; SOFT-LABEL: test_i64_f64:
7; SOFT:       @ %bb.0:
8; SOFT-NEXT:    vmov d16, r1, r0
9; SOFT-NEXT:    vadd.f64 d16, d16, d16
10; SOFT-NEXT:    vmov r0, r2, d16
11; SOFT-NEXT:    adds r1, r0, r0
12; SOFT-NEXT:    adc r0, r2, r2
13; SOFT-NEXT:    bx lr
14;
15; HARD-LABEL: test_i64_f64:
16; HARD:       @ %bb.0:
17; HARD-NEXT:    vadd.f64 d16, d0, d0
18; HARD-NEXT:    vmov r0, r2, d16
19; HARD-NEXT:    adds r1, r0, r0
20; HARD-NEXT:    adc r0, r2, r2
21; HARD-NEXT:    bx lr
22    %1 = fadd double %p, %p
23    %2 = bitcast double %1 to i64
24    %3 = add i64 %2, %2
25    ret i64 %3
26}
27
28define i64 @test_i64_v1i64(<1 x i64> %p) {
29; SOFT-LABEL: test_i64_v1i64:
30; SOFT:       @ %bb.0:
31; SOFT-NEXT:    vmov d16, r1, r0
32; SOFT-NEXT:    vadd.i64 d16, d16, d16
33; SOFT-NEXT:    vmov r0, r2, d16
34; SOFT-NEXT:    adds r1, r0, r0
35; SOFT-NEXT:    adc r0, r2, r2
36; SOFT-NEXT:    bx lr
37;
38; HARD-LABEL: test_i64_v1i64:
39; HARD:       @ %bb.0:
40; HARD-NEXT:    vadd.i64 d16, d0, d0
41; HARD-NEXT:    vmov r0, r2, d16
42; HARD-NEXT:    adds r1, r0, r0
43; HARD-NEXT:    adc r0, r2, r2
44; HARD-NEXT:    bx lr
45    %1 = add <1 x i64> %p, %p
46    %2 = bitcast <1 x i64> %1 to i64
47    %3 = add i64 %2, %2
48    ret i64 %3
49}
50
51define i64 @test_i64_v2f32(<2 x float> %p) {
52; SOFT-LABEL: test_i64_v2f32:
53; SOFT:       @ %bb.0:
54; SOFT-NEXT:    vmov d16, r1, r0
55; SOFT-NEXT:    vrev64.32 d16, d16
56; SOFT-NEXT:    vadd.f32 d16, d16, d16
57; SOFT-NEXT:    vrev64.32 d16, d16
58; SOFT-NEXT:    vmov r0, r2, d16
59; SOFT-NEXT:    adds r1, r0, r0
60; SOFT-NEXT:    adc r0, r2, r2
61; SOFT-NEXT:    bx lr
62;
63; HARD-LABEL: test_i64_v2f32:
64; HARD:       @ %bb.0:
65; HARD-NEXT:    vrev64.32 d16, d0
66; HARD-NEXT:    vadd.f32 d16, d16, d16
67; HARD-NEXT:    vrev64.32 d16, d16
68; HARD-NEXT:    vmov r0, r2, d16
69; HARD-NEXT:    adds r1, r0, r0
70; HARD-NEXT:    adc r0, r2, r2
71; HARD-NEXT:    bx lr
72    %1 = fadd <2 x float> %p, %p
73    %2 = bitcast <2 x float> %1 to i64
74    %3 = add i64 %2, %2
75    ret i64 %3
76}
77
78define i64 @test_i64_v2i32(<2 x i32> %p) {
79; SOFT-LABEL: test_i64_v2i32:
80; SOFT:       @ %bb.0:
81; SOFT-NEXT:    vmov d16, r1, r0
82; SOFT-NEXT:    vrev64.32 d16, d16
83; SOFT-NEXT:    vadd.i32 d16, d16, d16
84; SOFT-NEXT:    vrev64.32 d16, d16
85; SOFT-NEXT:    vmov r0, r2, d16
86; SOFT-NEXT:    adds r1, r0, r0
87; SOFT-NEXT:    adc r0, r2, r2
88; SOFT-NEXT:    bx lr
89;
90; HARD-LABEL: test_i64_v2i32:
91; HARD:       @ %bb.0:
92; HARD-NEXT:    vrev64.32 d16, d0
93; HARD-NEXT:    vadd.i32 d16, d16, d16
94; HARD-NEXT:    vrev64.32 d16, d16
95; HARD-NEXT:    vmov r0, r2, d16
96; HARD-NEXT:    adds r1, r0, r0
97; HARD-NEXT:    adc r0, r2, r2
98; HARD-NEXT:    bx lr
99    %1 = add <2 x i32> %p, %p
100    %2 = bitcast <2 x i32> %1 to i64
101    %3 = add i64 %2, %2
102    ret i64 %3
103}
104
105define i64 @test_i64_v4i16(<4 x i16> %p) {
106; SOFT-LABEL: test_i64_v4i16:
107; SOFT:       @ %bb.0:
108; SOFT-NEXT:    vmov d16, r1, r0
109; SOFT-NEXT:    vrev64.16 d16, d16
110; SOFT-NEXT:    vadd.i16 d16, d16, d16
111; SOFT-NEXT:    vrev64.16 d16, d16
112; SOFT-NEXT:    vmov r0, r2, d16
113; SOFT-NEXT:    adds r1, r0, r0
114; SOFT-NEXT:    adc r0, r2, r2
115; SOFT-NEXT:    bx lr
116;
117; HARD-LABEL: test_i64_v4i16:
118; HARD:       @ %bb.0:
119; HARD-NEXT:    vrev64.16 d16, d0
120; HARD-NEXT:    vadd.i16 d16, d16, d16
121; HARD-NEXT:    vrev64.16 d16, d16
122; HARD-NEXT:    vmov r0, r2, d16
123; HARD-NEXT:    adds r1, r0, r0
124; HARD-NEXT:    adc r0, r2, r2
125; HARD-NEXT:    bx lr
126    %1 = add <4 x i16> %p, %p
127    %2 = bitcast <4 x i16> %1 to i64
128    %3 = add i64 %2, %2
129    ret i64 %3
130}
131
132define i64 @test_i64_v8i8(<8 x i8> %p) {
133; SOFT-LABEL: test_i64_v8i8:
134; SOFT:       @ %bb.0:
135; SOFT-NEXT:    vmov d16, r1, r0
136; SOFT-NEXT:    vrev64.8 d16, d16
137; SOFT-NEXT:    vadd.i8 d16, d16, d16
138; SOFT-NEXT:    vrev64.8 d16, d16
139; SOFT-NEXT:    vmov r0, r2, d16
140; SOFT-NEXT:    adds r1, r0, r0
141; SOFT-NEXT:    adc r0, r2, r2
142; SOFT-NEXT:    bx lr
143;
144; HARD-LABEL: test_i64_v8i8:
145; HARD:       @ %bb.0:
146; HARD-NEXT:    vrev64.8 d16, d0
147; HARD-NEXT:    vadd.i8 d16, d16, d16
148; HARD-NEXT:    vrev64.8 d16, d16
149; HARD-NEXT:    vmov r0, r2, d16
150; HARD-NEXT:    adds r1, r0, r0
151; HARD-NEXT:    adc r0, r2, r2
152; HARD-NEXT:    bx lr
153    %1 = add <8 x i8> %p, %p
154    %2 = bitcast <8 x i8> %1 to i64
155    %3 = add i64 %2, %2
156    ret i64 %3
157}
158
159define double @test_f64_i64(i64 %p) {
160; SOFT-LABEL: test_f64_i64:
161; SOFT:       @ %bb.0:
162; SOFT-NEXT:    adds r1, r1, r1
163; SOFT-NEXT:    adc r0, r0, r0
164; SOFT-NEXT:    vmov d16, r1, r0
165; SOFT-NEXT:    vadd.f64 d16, d16, d16
166; SOFT-NEXT:    vmov r1, r0, d16
167; SOFT-NEXT:    bx lr
168;
169; HARD-LABEL: test_f64_i64:
170; HARD:       @ %bb.0:
171; HARD-NEXT:    adds r1, r1, r1
172; HARD-NEXT:    adc r0, r0, r0
173; HARD-NEXT:    vmov d16, r1, r0
174; HARD-NEXT:    vadd.f64 d0, d16, d16
175; HARD-NEXT:    bx lr
176    %1 = add i64 %p, %p
177    %2 = bitcast i64 %1 to double
178    %3 = fadd double %2, %2
179    ret double %3
180}
181
182define double @test_f64_v1i64(<1 x i64> %p) {
183; SOFT-LABEL: test_f64_v1i64:
184; SOFT:       @ %bb.0:
185; SOFT-NEXT:    vmov d16, r1, r0
186; SOFT-NEXT:    vadd.i64 d16, d16, d16
187; SOFT-NEXT:    vadd.f64 d16, d16, d16
188; SOFT-NEXT:    vmov r1, r0, d16
189; SOFT-NEXT:    bx lr
190;
191; HARD-LABEL: test_f64_v1i64:
192; HARD:       @ %bb.0:
193; HARD-NEXT:    vadd.i64 d16, d0, d0
194; HARD-NEXT:    vadd.f64 d0, d16, d16
195; HARD-NEXT:    bx lr
196    %1 = add <1 x i64> %p, %p
197    %2 = bitcast <1 x i64> %1 to double
198    %3 = fadd double %2, %2
199    ret double %3
200}
201
202define double @test_f64_v2f32(<2 x float> %p) {
203; SOFT-LABEL: test_f64_v2f32:
204; SOFT:       @ %bb.0:
205; SOFT-NEXT:    vmov d16, r1, r0
206; SOFT-NEXT:    vrev64.32 d16, d16
207; SOFT-NEXT:    vadd.f32 d16, d16, d16
208; SOFT-NEXT:    vrev64.32 d16, d16
209; SOFT-NEXT:    vadd.f64 d16, d16, d16
210; SOFT-NEXT:    vmov r1, r0, d16
211; SOFT-NEXT:    bx lr
212;
213; HARD-LABEL: test_f64_v2f32:
214; HARD:       @ %bb.0:
215; HARD-NEXT:    vrev64.32 d16, d0
216; HARD-NEXT:    vadd.f32 d16, d16, d16
217; HARD-NEXT:    vrev64.32 d16, d16
218; HARD-NEXT:    vadd.f64 d0, d16, d16
219; HARD-NEXT:    bx lr
220    %1 = fadd <2 x float> %p, %p
221    %2 = bitcast <2 x float> %1 to double
222    %3 = fadd double %2, %2
223    ret double %3
224}
225
226define double @test_f64_v2i32(<2 x i32> %p) {
227; SOFT-LABEL: test_f64_v2i32:
228; SOFT:       @ %bb.0:
229; SOFT-NEXT:    vmov d16, r1, r0
230; SOFT-NEXT:    vrev64.32 d16, d16
231; SOFT-NEXT:    vadd.i32 d16, d16, d16
232; SOFT-NEXT:    vrev64.32 d16, d16
233; SOFT-NEXT:    vadd.f64 d16, d16, d16
234; SOFT-NEXT:    vmov r1, r0, d16
235; SOFT-NEXT:    bx lr
236;
237; HARD-LABEL: test_f64_v2i32:
238; HARD:       @ %bb.0:
239; HARD-NEXT:    vrev64.32 d16, d0
240; HARD-NEXT:    vadd.i32 d16, d16, d16
241; HARD-NEXT:    vrev64.32 d16, d16
242; HARD-NEXT:    vadd.f64 d0, d16, d16
243; HARD-NEXT:    bx lr
244    %1 = add <2 x i32> %p, %p
245    %2 = bitcast <2 x i32> %1 to double
246    %3 = fadd double %2, %2
247    ret double %3
248}
249
250define double @test_f64_v4i16(<4 x i16> %p) {
251; SOFT-LABEL: test_f64_v4i16:
252; SOFT:       @ %bb.0:
253; SOFT-NEXT:    vmov d16, r1, r0
254; SOFT-NEXT:    vrev64.16 d16, d16
255; SOFT-NEXT:    vadd.i16 d16, d16, d16
256; SOFT-NEXT:    vrev64.16 d16, d16
257; SOFT-NEXT:    vadd.f64 d16, d16, d16
258; SOFT-NEXT:    vmov r1, r0, d16
259; SOFT-NEXT:    bx lr
260;
261; HARD-LABEL: test_f64_v4i16:
262; HARD:       @ %bb.0:
263; HARD-NEXT:    vrev64.16 d16, d0
264; HARD-NEXT:    vadd.i16 d16, d16, d16
265; HARD-NEXT:    vrev64.16 d16, d16
266; HARD-NEXT:    vadd.f64 d0, d16, d16
267; HARD-NEXT:    bx lr
268    %1 = add <4 x i16> %p, %p
269    %2 = bitcast <4 x i16> %1 to double
270    %3 = fadd double %2, %2
271    ret double %3
272}
273
274define double @test_f64_v8i8(<8 x i8> %p) {
275; SOFT-LABEL: test_f64_v8i8:
276; SOFT:       @ %bb.0:
277; SOFT-NEXT:    vmov d16, r1, r0
278; SOFT-NEXT:    vrev64.8 d16, d16
279; SOFT-NEXT:    vadd.i8 d16, d16, d16
280; SOFT-NEXT:    vrev64.8 d16, d16
281; SOFT-NEXT:    vadd.f64 d16, d16, d16
282; SOFT-NEXT:    vmov r1, r0, d16
283; SOFT-NEXT:    bx lr
284;
285; HARD-LABEL: test_f64_v8i8:
286; HARD:       @ %bb.0:
287; HARD-NEXT:    vrev64.8 d16, d0
288; HARD-NEXT:    vadd.i8 d16, d16, d16
289; HARD-NEXT:    vrev64.8 d16, d16
290; HARD-NEXT:    vadd.f64 d0, d16, d16
291; HARD-NEXT:    bx lr
292    %1 = add <8 x i8> %p, %p
293    %2 = bitcast <8 x i8> %1 to double
294    %3 = fadd double %2, %2
295    ret double %3
296}
297
298define <1 x i64> @test_v1i64_i64(i64 %p) {
299; SOFT-LABEL: test_v1i64_i64:
300; SOFT:       @ %bb.0:
301; SOFT-NEXT:    adds r1, r1, r1
302; SOFT-NEXT:    adc r0, r0, r0
303; SOFT-NEXT:    vmov d16, r1, r0
304; SOFT-NEXT:    vadd.i64 d16, d16, d16
305; SOFT-NEXT:    vmov r1, r0, d16
306; SOFT-NEXT:    bx lr
307;
308; HARD-LABEL: test_v1i64_i64:
309; HARD:       @ %bb.0:
310; HARD-NEXT:    adds r1, r1, r1
311; HARD-NEXT:    adc r0, r0, r0
312; HARD-NEXT:    vmov d16, r1, r0
313; HARD-NEXT:    vadd.i64 d0, d16, d16
314; HARD-NEXT:    bx lr
315    %1 = add i64 %p, %p
316    %2 = bitcast i64 %1 to <1 x i64>
317    %3 = add <1 x i64> %2, %2
318    ret <1 x i64> %3
319}
320
321define <1 x i64> @test_v1i64_f64(double %p) {
322; SOFT-LABEL: test_v1i64_f64:
323; SOFT:       @ %bb.0:
324; SOFT-NEXT:    vmov d16, r1, r0
325; SOFT-NEXT:    vadd.f64 d16, d16, d16
326; SOFT-NEXT:    vadd.i64 d16, d16, d16
327; SOFT-NEXT:    vmov r1, r0, d16
328; SOFT-NEXT:    bx lr
329;
330; HARD-LABEL: test_v1i64_f64:
331; HARD:       @ %bb.0:
332; HARD-NEXT:    vadd.f64 d16, d0, d0
333; HARD-NEXT:    vadd.i64 d0, d16, d16
334; HARD-NEXT:    bx lr
335    %1 = fadd double %p, %p
336    %2 = bitcast double %1 to <1 x i64>
337    %3 = add <1 x i64> %2, %2
338    ret <1 x i64> %3
339}
340
341define <1 x i64> @test_v1i64_v2f32(<2 x float> %p) {
342; SOFT-LABEL: test_v1i64_v2f32:
343; SOFT:       @ %bb.0:
344; SOFT-NEXT:    vmov d16, r1, r0
345; SOFT-NEXT:    vrev64.32 d16, d16
346; SOFT-NEXT:    vadd.f32 d16, d16, d16
347; SOFT-NEXT:    vrev64.32 d16, d16
348; SOFT-NEXT:    vadd.i64 d16, d16, d16
349; SOFT-NEXT:    vmov r1, r0, d16
350; SOFT-NEXT:    bx lr
351;
352; HARD-LABEL: test_v1i64_v2f32:
353; HARD:       @ %bb.0:
354; HARD-NEXT:    vrev64.32 d16, d0
355; HARD-NEXT:    vadd.f32 d16, d16, d16
356; HARD-NEXT:    vrev64.32 d16, d16
357; HARD-NEXT:    vadd.i64 d0, d16, d16
358; HARD-NEXT:    bx lr
359    %1 = fadd <2 x float> %p, %p
360    %2 = bitcast <2 x float> %1 to <1 x i64>
361    %3 = add <1 x i64> %2, %2
362    ret <1 x i64> %3
363}
364
365define <1 x i64> @test_v1i64_v2i32(<2 x i32> %p) {
366; SOFT-LABEL: test_v1i64_v2i32:
367; SOFT:       @ %bb.0:
368; SOFT-NEXT:    vmov d16, r1, r0
369; SOFT-NEXT:    vrev64.32 d16, d16
370; SOFT-NEXT:    vadd.i32 d16, d16, d16
371; SOFT-NEXT:    vrev64.32 d16, d16
372; SOFT-NEXT:    vadd.i64 d16, d16, d16
373; SOFT-NEXT:    vmov r1, r0, d16
374; SOFT-NEXT:    bx lr
375;
376; HARD-LABEL: test_v1i64_v2i32:
377; HARD:       @ %bb.0:
378; HARD-NEXT:    vrev64.32 d16, d0
379; HARD-NEXT:    vadd.i32 d16, d16, d16
380; HARD-NEXT:    vrev64.32 d16, d16
381; HARD-NEXT:    vadd.i64 d0, d16, d16
382; HARD-NEXT:    bx lr
383    %1 = add <2 x i32> %p, %p
384    %2 = bitcast <2 x i32> %1 to <1 x i64>
385    %3 = add <1 x i64> %2, %2
386    ret <1 x i64> %3
387}
388
389define <1 x i64> @test_v1i64_v4i16(<4 x i16> %p) {
390; SOFT-LABEL: test_v1i64_v4i16:
391; SOFT:       @ %bb.0:
392; SOFT-NEXT:    vmov d16, r1, r0
393; SOFT-NEXT:    vrev64.16 d16, d16
394; SOFT-NEXT:    vadd.i16 d16, d16, d16
395; SOFT-NEXT:    vrev64.16 d16, d16
396; SOFT-NEXT:    vadd.i64 d16, d16, d16
397; SOFT-NEXT:    vmov r1, r0, d16
398; SOFT-NEXT:    bx lr
399;
400; HARD-LABEL: test_v1i64_v4i16:
401; HARD:       @ %bb.0:
402; HARD-NEXT:    vrev64.16 d16, d0
403; HARD-NEXT:    vadd.i16 d16, d16, d16
404; HARD-NEXT:    vrev64.16 d16, d16
405; HARD-NEXT:    vadd.i64 d0, d16, d16
406; HARD-NEXT:    bx lr
407    %1 = add <4 x i16> %p, %p
408    %2 = bitcast <4 x i16> %1 to <1 x i64>
409    %3 = add <1 x i64> %2, %2
410    ret <1 x i64> %3
411}
412
413define <1 x i64> @test_v1i64_v8i8(<8 x i8> %p) {
414; SOFT-LABEL: test_v1i64_v8i8:
415; SOFT:       @ %bb.0:
416; SOFT-NEXT:    vmov d16, r1, r0
417; SOFT-NEXT:    vrev64.8 d16, d16
418; SOFT-NEXT:    vadd.i8 d16, d16, d16
419; SOFT-NEXT:    vrev64.8 d16, d16
420; SOFT-NEXT:    vadd.i64 d16, d16, d16
421; SOFT-NEXT:    vmov r1, r0, d16
422; SOFT-NEXT:    bx lr
423;
424; HARD-LABEL: test_v1i64_v8i8:
425; HARD:       @ %bb.0:
426; HARD-NEXT:    vrev64.8 d16, d0
427; HARD-NEXT:    vadd.i8 d16, d16, d16
428; HARD-NEXT:    vrev64.8 d16, d16
429; HARD-NEXT:    vadd.i64 d0, d16, d16
430; HARD-NEXT:    bx lr
431    %1 = add <8 x i8> %p, %p
432    %2 = bitcast <8 x i8> %1 to <1 x i64>
433    %3 = add <1 x i64> %2, %2
434    ret <1 x i64> %3
435}
436
437define <2 x float> @test_v2f32_i64(i64 %p) {
438; SOFT-LABEL: test_v2f32_i64:
439; SOFT:       @ %bb.0:
440; SOFT-NEXT:    adds r1, r1, r1
441; SOFT-NEXT:    adc r0, r0, r0
442; SOFT-NEXT:    vmov d16, r1, r0
443; SOFT-NEXT:    vrev64.32 d16, d16
444; SOFT-NEXT:    vadd.f32 d16, d16, d16
445; SOFT-NEXT:    vrev64.32 d16, d16
446; SOFT-NEXT:    vmov r1, r0, d16
447; SOFT-NEXT:    bx lr
448;
449; HARD-LABEL: test_v2f32_i64:
450; HARD:       @ %bb.0:
451; HARD-NEXT:    adds r1, r1, r1
452; HARD-NEXT:    adc r0, r0, r0
453; HARD-NEXT:    vmov d16, r1, r0
454; HARD-NEXT:    vrev64.32 d16, d16
455; HARD-NEXT:    vadd.f32 d16, d16, d16
456; HARD-NEXT:    vrev64.32 d0, d16
457; HARD-NEXT:    bx lr
458    %1 = add i64 %p, %p
459    %2 = bitcast i64 %1 to <2 x float>
460    %3 = fadd <2 x float> %2, %2
461    ret <2 x float> %3
462}
463
464define <2 x float> @test_v2f32_f64(double %p) {
465; SOFT-LABEL: test_v2f32_f64:
466; SOFT:       @ %bb.0:
467; SOFT-NEXT:    vmov d16, r1, r0
468; SOFT-NEXT:    vadd.f64 d16, d16, d16
469; SOFT-NEXT:    vrev64.32 d16, d16
470; SOFT-NEXT:    vadd.f32 d16, d16, d16
471; SOFT-NEXT:    vrev64.32 d16, d16
472; SOFT-NEXT:    vmov r1, r0, d16
473; SOFT-NEXT:    bx lr
474;
475; HARD-LABEL: test_v2f32_f64:
476; HARD:       @ %bb.0:
477; HARD-NEXT:    vadd.f64 d16, d0, d0
478; HARD-NEXT:    vrev64.32 d16, d16
479; HARD-NEXT:    vadd.f32 d16, d16, d16
480; HARD-NEXT:    vrev64.32 d0, d16
481; HARD-NEXT:    bx lr
482    %1 = fadd double %p, %p
483    %2 = bitcast double %1 to <2 x float>
484    %3 = fadd <2 x float> %2, %2
485    ret <2 x float> %3
486}
487
488define <2 x float> @test_v2f32_v1i64(<1 x i64> %p) {
489; SOFT-LABEL: test_v2f32_v1i64:
490; SOFT:       @ %bb.0:
491; SOFT-NEXT:    vmov d16, r1, r0
492; SOFT-NEXT:    vadd.i64 d16, d16, d16
493; SOFT-NEXT:    vrev64.32 d16, d16
494; SOFT-NEXT:    vadd.f32 d16, d16, d16
495; SOFT-NEXT:    vrev64.32 d16, d16
496; SOFT-NEXT:    vmov r1, r0, d16
497; SOFT-NEXT:    bx lr
498;
499; HARD-LABEL: test_v2f32_v1i64:
500; HARD:       @ %bb.0:
501; HARD-NEXT:    vadd.i64 d16, d0, d0
502; HARD-NEXT:    vrev64.32 d16, d16
503; HARD-NEXT:    vadd.f32 d16, d16, d16
504; HARD-NEXT:    vrev64.32 d0, d16
505; HARD-NEXT:    bx lr
506    %1 = add <1 x i64> %p, %p
507    %2 = bitcast <1 x i64> %1 to <2 x float>
508    %3 = fadd <2 x float> %2, %2
509    ret <2 x float> %3
510}
511
512define <2 x float> @test_v2f32_v2i32(<2 x i32> %p) {
513; SOFT-LABEL: test_v2f32_v2i32:
514; SOFT:       @ %bb.0:
515; SOFT-NEXT:    vmov d16, r1, r0
516; SOFT-NEXT:    vrev64.32 d16, d16
517; SOFT-NEXT:    vadd.i32 d16, d16, d16
518; SOFT-NEXT:    vadd.f32 d16, d16, d16
519; SOFT-NEXT:    vrev64.32 d16, d16
520; SOFT-NEXT:    vmov r1, r0, d16
521; SOFT-NEXT:    bx lr
522;
523; HARD-LABEL: test_v2f32_v2i32:
524; HARD:       @ %bb.0:
525; HARD-NEXT:    vrev64.32 d16, d0
526; HARD-NEXT:    vadd.i32 d16, d16, d16
527; HARD-NEXT:    vadd.f32 d16, d16, d16
528; HARD-NEXT:    vrev64.32 d0, d16
529; HARD-NEXT:    bx lr
530    %1 = add <2 x i32> %p, %p
531    %2 = bitcast <2 x i32> %1 to <2 x float>
532    %3 = fadd <2 x float> %2, %2
533    ret <2 x float> %3
534}
535
536define <2 x float> @test_v2f32_v4i16(<4 x i16> %p) {
537; SOFT-LABEL: test_v2f32_v4i16:
538; SOFT:       @ %bb.0:
539; SOFT-NEXT:    vmov d16, r1, r0
540; SOFT-NEXT:    vrev64.16 d16, d16
541; SOFT-NEXT:    vadd.i16 d16, d16, d16
542; SOFT-NEXT:    vrev32.16 d16, d16
543; SOFT-NEXT:    vadd.f32 d16, d16, d16
544; SOFT-NEXT:    vrev64.32 d16, d16
545; SOFT-NEXT:    vmov r1, r0, d16
546; SOFT-NEXT:    bx lr
547;
548; HARD-LABEL: test_v2f32_v4i16:
549; HARD:       @ %bb.0:
550; HARD-NEXT:    vrev64.16 d16, d0
551; HARD-NEXT:    vadd.i16 d16, d16, d16
552; HARD-NEXT:    vrev32.16 d16, d16
553; HARD-NEXT:    vadd.f32 d16, d16, d16
554; HARD-NEXT:    vrev64.32 d0, d16
555; HARD-NEXT:    bx lr
556    %1 = add <4 x i16> %p, %p
557    %2 = bitcast <4 x i16> %1 to <2 x float>
558    %3 = fadd <2 x float> %2, %2
559    ret <2 x float> %3
560}
561
562define <2 x float> @test_v2f32_v8i8(<8 x i8> %p) {
563; SOFT-LABEL: test_v2f32_v8i8:
564; SOFT:       @ %bb.0:
565; SOFT-NEXT:    vmov d16, r1, r0
566; SOFT-NEXT:    vrev64.8 d16, d16
567; SOFT-NEXT:    vadd.i8 d16, d16, d16
568; SOFT-NEXT:    vrev32.8 d16, d16
569; SOFT-NEXT:    vadd.f32 d16, d16, d16
570; SOFT-NEXT:    vrev64.32 d16, d16
571; SOFT-NEXT:    vmov r1, r0, d16
572; SOFT-NEXT:    bx lr
573;
574; HARD-LABEL: test_v2f32_v8i8:
575; HARD:       @ %bb.0:
576; HARD-NEXT:    vrev64.8 d16, d0
577; HARD-NEXT:    vadd.i8 d16, d16, d16
578; HARD-NEXT:    vrev32.8 d16, d16
579; HARD-NEXT:    vadd.f32 d16, d16, d16
580; HARD-NEXT:    vrev64.32 d0, d16
581; HARD-NEXT:    bx lr
582    %1 = add <8 x i8> %p, %p
583    %2 = bitcast <8 x i8> %1 to <2 x float>
584    %3 = fadd <2 x float> %2, %2
585    ret <2 x float> %3
586}
587
588define <2 x i32> @test_v2i32_i64(i64 %p) {
589; SOFT-LABEL: test_v2i32_i64:
590; SOFT:       @ %bb.0:
591; SOFT-NEXT:    adds r1, r1, r1
592; SOFT-NEXT:    adc r0, r0, r0
593; SOFT-NEXT:    vmov d16, r1, r0
594; SOFT-NEXT:    vrev64.32 d16, d16
595; SOFT-NEXT:    vadd.i32 d16, d16, d16
596; SOFT-NEXT:    vrev64.32 d16, d16
597; SOFT-NEXT:    vmov r1, r0, d16
598; SOFT-NEXT:    bx lr
599;
600; HARD-LABEL: test_v2i32_i64:
601; HARD:       @ %bb.0:
602; HARD-NEXT:    adds r1, r1, r1
603; HARD-NEXT:    adc r0, r0, r0
604; HARD-NEXT:    vmov d16, r1, r0
605; HARD-NEXT:    vrev64.32 d16, d16
606; HARD-NEXT:    vadd.i32 d16, d16, d16
607; HARD-NEXT:    vrev64.32 d0, d16
608; HARD-NEXT:    bx lr
609    %1 = add i64 %p, %p
610    %2 = bitcast i64 %1 to <2 x i32>
611    %3 = add <2 x i32> %2, %2
612    ret <2 x i32> %3
613}
614
615define <2 x i32> @test_v2i32_f64(double %p) {
616; SOFT-LABEL: test_v2i32_f64:
617; SOFT:       @ %bb.0:
618; SOFT-NEXT:    vmov d16, r1, r0
619; SOFT-NEXT:    vadd.f64 d16, d16, d16
620; SOFT-NEXT:    vrev64.32 d16, d16
621; SOFT-NEXT:    vadd.i32 d16, d16, d16
622; SOFT-NEXT:    vrev64.32 d16, d16
623; SOFT-NEXT:    vmov r1, r0, d16
624; SOFT-NEXT:    bx lr
625;
626; HARD-LABEL: test_v2i32_f64:
627; HARD:       @ %bb.0:
628; HARD-NEXT:    vadd.f64 d16, d0, d0
629; HARD-NEXT:    vrev64.32 d16, d16
630; HARD-NEXT:    vadd.i32 d16, d16, d16
631; HARD-NEXT:    vrev64.32 d0, d16
632; HARD-NEXT:    bx lr
633    %1 = fadd double %p, %p
634    %2 = bitcast double %1 to <2 x i32>
635    %3 = add <2 x i32> %2, %2
636    ret <2 x i32> %3
637}
638
639define <2 x i32> @test_v2i32_v1i64(<1 x i64> %p) {
640; SOFT-LABEL: test_v2i32_v1i64:
641; SOFT:       @ %bb.0:
642; SOFT-NEXT:    vmov d16, r1, r0
643; SOFT-NEXT:    vadd.i64 d16, d16, d16
644; SOFT-NEXT:    vrev64.32 d16, d16
645; SOFT-NEXT:    vadd.i32 d16, d16, d16
646; SOFT-NEXT:    vrev64.32 d16, d16
647; SOFT-NEXT:    vmov r1, r0, d16
648; SOFT-NEXT:    bx lr
649;
650; HARD-LABEL: test_v2i32_v1i64:
651; HARD:       @ %bb.0:
652; HARD-NEXT:    vadd.i64 d16, d0, d0
653; HARD-NEXT:    vrev64.32 d16, d16
654; HARD-NEXT:    vadd.i32 d16, d16, d16
655; HARD-NEXT:    vrev64.32 d0, d16
656; HARD-NEXT:    bx lr
657    %1 = add <1 x i64> %p, %p
658    %2 = bitcast <1 x i64> %1 to <2 x i32>
659    %3 = add <2 x i32> %2, %2
660    ret <2 x i32> %3
661}
662
663define <2 x i32> @test_v2i32_v2f32(<2 x float> %p) {
664; SOFT-LABEL: test_v2i32_v2f32:
665; SOFT:       @ %bb.0:
666; SOFT-NEXT:    vmov d16, r1, r0
667; SOFT-NEXT:    vrev64.32 d16, d16
668; SOFT-NEXT:    vadd.f32 d16, d16, d16
669; SOFT-NEXT:    vadd.i32 d16, d16, d16
670; SOFT-NEXT:    vrev64.32 d16, d16
671; SOFT-NEXT:    vmov r1, r0, d16
672; SOFT-NEXT:    bx lr
673;
674; HARD-LABEL: test_v2i32_v2f32:
675; HARD:       @ %bb.0:
676; HARD-NEXT:    vrev64.32 d16, d0
677; HARD-NEXT:    vadd.f32 d16, d16, d16
678; HARD-NEXT:    vadd.i32 d16, d16, d16
679; HARD-NEXT:    vrev64.32 d0, d16
680; HARD-NEXT:    bx lr
681    %1 = fadd <2 x float> %p, %p
682    %2 = bitcast <2 x float> %1 to <2 x i32>
683    %3 = add <2 x i32> %2, %2
684    ret <2 x i32> %3
685}
686
687define <2 x i32> @test_v2i32_v4i16(<4 x i16> %p) {
688; SOFT-LABEL: test_v2i32_v4i16:
689; SOFT:       @ %bb.0:
690; SOFT-NEXT:    vmov d16, r1, r0
691; SOFT-NEXT:    vrev64.16 d16, d16
692; SOFT-NEXT:    vadd.i16 d16, d16, d16
693; SOFT-NEXT:    vrev32.16 d16, d16
694; SOFT-NEXT:    vadd.i32 d16, d16, d16
695; SOFT-NEXT:    vrev64.32 d16, d16
696; SOFT-NEXT:    vmov r1, r0, d16
697; SOFT-NEXT:    bx lr
698;
699; HARD-LABEL: test_v2i32_v4i16:
700; HARD:       @ %bb.0:
701; HARD-NEXT:    vrev64.16 d16, d0
702; HARD-NEXT:    vadd.i16 d16, d16, d16
703; HARD-NEXT:    vrev32.16 d16, d16
704; HARD-NEXT:    vadd.i32 d16, d16, d16
705; HARD-NEXT:    vrev64.32 d0, d16
706; HARD-NEXT:    bx lr
707    %1 = add <4 x i16> %p, %p
708    %2 = bitcast <4 x i16> %1 to <2 x i32>
709    %3 = add <2 x i32> %2, %2
710    ret <2 x i32> %3
711}
712
713define <2 x i32> @test_v2i32_v8i8(<8 x i8> %p) {
714; SOFT-LABEL: test_v2i32_v8i8:
715; SOFT:       @ %bb.0:
716; SOFT-NEXT:    vmov d16, r1, r0
717; SOFT-NEXT:    vrev64.8 d16, d16
718; SOFT-NEXT:    vadd.i8 d16, d16, d16
719; SOFT-NEXT:    vrev32.8 d16, d16
720; SOFT-NEXT:    vadd.i32 d16, d16, d16
721; SOFT-NEXT:    vrev64.32 d16, d16
722; SOFT-NEXT:    vmov r1, r0, d16
723; SOFT-NEXT:    bx lr
724;
725; HARD-LABEL: test_v2i32_v8i8:
726; HARD:       @ %bb.0:
727; HARD-NEXT:    vrev64.8 d16, d0
728; HARD-NEXT:    vadd.i8 d16, d16, d16
729; HARD-NEXT:    vrev32.8 d16, d16
730; HARD-NEXT:    vadd.i32 d16, d16, d16
731; HARD-NEXT:    vrev64.32 d0, d16
732; HARD-NEXT:    bx lr
733    %1 = add <8 x i8> %p, %p
734    %2 = bitcast <8 x i8> %1 to <2 x i32>
735    %3 = add <2 x i32> %2, %2
736    ret <2 x i32> %3
737}
738
739define <4 x i16> @test_v4i16_i64(i64 %p) {
740; SOFT-LABEL: test_v4i16_i64:
741; SOFT:       @ %bb.0:
742; SOFT-NEXT:    adds r1, r1, r1
743; SOFT-NEXT:    adc r0, r0, r0
744; SOFT-NEXT:    vmov d16, r1, r0
745; SOFT-NEXT:    vrev64.16 d16, d16
746; SOFT-NEXT:    vadd.i16 d16, d16, d16
747; SOFT-NEXT:    vrev64.16 d16, d16
748; SOFT-NEXT:    vmov r1, r0, d16
749; SOFT-NEXT:    bx lr
750;
751; HARD-LABEL: test_v4i16_i64:
752; HARD:       @ %bb.0:
753; HARD-NEXT:    adds r1, r1, r1
754; HARD-NEXT:    adc r0, r0, r0
755; HARD-NEXT:    vmov d16, r1, r0
756; HARD-NEXT:    vrev64.16 d16, d16
757; HARD-NEXT:    vadd.i16 d16, d16, d16
758; HARD-NEXT:    vrev64.16 d0, d16
759; HARD-NEXT:    bx lr
760    %1 = add i64 %p, %p
761    %2 = bitcast i64 %1 to <4 x i16>
762    %3 = add <4 x i16> %2, %2
763    ret <4 x i16> %3
764}
765
766define <4 x i16> @test_v4i16_f64(double %p) {
767; SOFT-LABEL: test_v4i16_f64:
768; SOFT:       @ %bb.0:
769; SOFT-NEXT:    vmov d16, r1, r0
770; SOFT-NEXT:    vadd.f64 d16, d16, d16
771; SOFT-NEXT:    vrev64.16 d16, d16
772; SOFT-NEXT:    vadd.i16 d16, d16, d16
773; SOFT-NEXT:    vrev64.16 d16, d16
774; SOFT-NEXT:    vmov r1, r0, d16
775; SOFT-NEXT:    bx lr
776;
777; HARD-LABEL: test_v4i16_f64:
778; HARD:       @ %bb.0:
779; HARD-NEXT:    vadd.f64 d16, d0, d0
780; HARD-NEXT:    vrev64.16 d16, d16
781; HARD-NEXT:    vadd.i16 d16, d16, d16
782; HARD-NEXT:    vrev64.16 d0, d16
783; HARD-NEXT:    bx lr
784    %1 = fadd double %p, %p
785    %2 = bitcast double %1 to <4 x i16>
786    %3 = add <4 x i16> %2, %2
787    ret <4 x i16> %3
788}
789
790define <4 x i16> @test_v4i16_v1i64(<1 x i64> %p) {
791; SOFT-LABEL: test_v4i16_v1i64:
792; SOFT:       @ %bb.0:
793; SOFT-NEXT:    vmov d16, r1, r0
794; SOFT-NEXT:    vadd.i64 d16, d16, d16
795; SOFT-NEXT:    vrev64.16 d16, d16
796; SOFT-NEXT:    vadd.i16 d16, d16, d16
797; SOFT-NEXT:    vrev64.16 d16, d16
798; SOFT-NEXT:    vmov r1, r0, d16
799; SOFT-NEXT:    bx lr
800;
801; HARD-LABEL: test_v4i16_v1i64:
802; HARD:       @ %bb.0:
803; HARD-NEXT:    vadd.i64 d16, d0, d0
804; HARD-NEXT:    vrev64.16 d16, d16
805; HARD-NEXT:    vadd.i16 d16, d16, d16
806; HARD-NEXT:    vrev64.16 d0, d16
807; HARD-NEXT:    bx lr
808    %1 = add <1 x i64> %p, %p
809    %2 = bitcast <1 x i64> %1 to <4 x i16>
810    %3 = add <4 x i16> %2, %2
811    ret <4 x i16> %3
812}
813
814define <4 x i16> @test_v4i16_v2f32(<2 x float> %p) {
815; SOFT-LABEL: test_v4i16_v2f32:
816; SOFT:       @ %bb.0:
817; SOFT-NEXT:    vmov d16, r1, r0
818; SOFT-NEXT:    vrev64.32 d16, d16
819; SOFT-NEXT:    vadd.f32 d16, d16, d16
820; SOFT-NEXT:    vrev32.16 d16, d16
821; SOFT-NEXT:    vadd.i16 d16, d16, d16
822; SOFT-NEXT:    vrev64.16 d16, d16
823; SOFT-NEXT:    vmov r1, r0, d16
824; SOFT-NEXT:    bx lr
825;
826; HARD-LABEL: test_v4i16_v2f32:
827; HARD:       @ %bb.0:
828; HARD-NEXT:    vrev64.32 d16, d0
829; HARD-NEXT:    vadd.f32 d16, d16, d16
830; HARD-NEXT:    vrev32.16 d16, d16
831; HARD-NEXT:    vadd.i16 d16, d16, d16
832; HARD-NEXT:    vrev64.16 d0, d16
833; HARD-NEXT:    bx lr
834    %1 = fadd <2 x float> %p, %p
835    %2 = bitcast <2 x float> %1 to <4 x i16>
836    %3 = add <4 x i16> %2, %2
837    ret <4 x i16> %3
838}
839
840define <4 x i16> @test_v4i16_v2i32(<2 x i32> %p) {
841; SOFT-LABEL: test_v4i16_v2i32:
842; SOFT:       @ %bb.0:
843; SOFT-NEXT:    vmov d16, r1, r0
844; SOFT-NEXT:    vrev64.32 d16, d16
845; SOFT-NEXT:    vadd.i32 d16, d16, d16
846; SOFT-NEXT:    vrev32.16 d16, d16
847; SOFT-NEXT:    vadd.i16 d16, d16, d16
848; SOFT-NEXT:    vrev64.16 d16, d16
849; SOFT-NEXT:    vmov r1, r0, d16
850; SOFT-NEXT:    bx lr
851;
852; HARD-LABEL: test_v4i16_v2i32:
853; HARD:       @ %bb.0:
854; HARD-NEXT:    vrev64.32 d16, d0
855; HARD-NEXT:    vadd.i32 d16, d16, d16
856; HARD-NEXT:    vrev32.16 d16, d16
857; HARD-NEXT:    vadd.i16 d16, d16, d16
858; HARD-NEXT:    vrev64.16 d0, d16
859; HARD-NEXT:    bx lr
860    %1 = add <2 x i32> %p, %p
861    %2 = bitcast <2 x i32> %1 to <4 x i16>
862    %3 = add <4 x i16> %2, %2
863    ret <4 x i16> %3
864}
865
866define <4 x i16> @test_v4i16_v8i8(<8 x i8> %p) {
867; SOFT-LABEL: test_v4i16_v8i8:
868; SOFT:       @ %bb.0:
869; SOFT-NEXT:    vmov d16, r1, r0
870; SOFT-NEXT:    vrev64.8 d16, d16
871; SOFT-NEXT:    vadd.i8 d16, d16, d16
872; SOFT-NEXT:    vrev16.8 d16, d16
873; SOFT-NEXT:    vadd.i16 d16, d16, d16
874; SOFT-NEXT:    vrev64.16 d16, d16
875; SOFT-NEXT:    vmov r1, r0, d16
876; SOFT-NEXT:    bx lr
877;
878; HARD-LABEL: test_v4i16_v8i8:
879; HARD:       @ %bb.0:
880; HARD-NEXT:    vrev64.8 d16, d0
881; HARD-NEXT:    vadd.i8 d16, d16, d16
882; HARD-NEXT:    vrev16.8 d16, d16
883; HARD-NEXT:    vadd.i16 d16, d16, d16
884; HARD-NEXT:    vrev64.16 d0, d16
885; HARD-NEXT:    bx lr
886    %1 = add <8 x i8> %p, %p
887    %2 = bitcast <8 x i8> %1 to <4 x i16>
888    %3 = add <4 x i16> %2, %2
889    ret <4 x i16> %3
890}
891
892define <8 x i8> @test_v8i8_i64(i64 %p) {
893; SOFT-LABEL: test_v8i8_i64:
894; SOFT:       @ %bb.0:
895; SOFT-NEXT:    adds r1, r1, r1
896; SOFT-NEXT:    adc r0, r0, r0
897; SOFT-NEXT:    vmov d16, r1, r0
898; SOFT-NEXT:    vrev64.8 d16, d16
899; SOFT-NEXT:    vadd.i8 d16, d16, d16
900; SOFT-NEXT:    vrev64.8 d16, d16
901; SOFT-NEXT:    vmov r1, r0, d16
902; SOFT-NEXT:    bx lr
903;
904; HARD-LABEL: test_v8i8_i64:
905; HARD:       @ %bb.0:
906; HARD-NEXT:    adds r1, r1, r1
907; HARD-NEXT:    adc r0, r0, r0
908; HARD-NEXT:    vmov d16, r1, r0
909; HARD-NEXT:    vrev64.8 d16, d16
910; HARD-NEXT:    vadd.i8 d16, d16, d16
911; HARD-NEXT:    vrev64.8 d0, d16
912; HARD-NEXT:    bx lr
913    %1 = add i64 %p, %p
914    %2 = bitcast i64 %1 to <8 x i8>
915    %3 = add <8 x i8> %2, %2
916    ret <8 x i8> %3
917}
918
919define <8 x i8> @test_v8i8_f64(double %p) {
920; SOFT-LABEL: test_v8i8_f64:
921; SOFT:       @ %bb.0:
922; SOFT-NEXT:    vmov d16, r1, r0
923; SOFT-NEXT:    vadd.f64 d16, d16, d16
924; SOFT-NEXT:    vrev64.8 d16, d16
925; SOFT-NEXT:    vadd.i8 d16, d16, d16
926; SOFT-NEXT:    vrev64.8 d16, d16
927; SOFT-NEXT:    vmov r1, r0, d16
928; SOFT-NEXT:    bx lr
929;
930; HARD-LABEL: test_v8i8_f64:
931; HARD:       @ %bb.0:
932; HARD-NEXT:    vadd.f64 d16, d0, d0
933; HARD-NEXT:    vrev64.8 d16, d16
934; HARD-NEXT:    vadd.i8 d16, d16, d16
935; HARD-NEXT:    vrev64.8 d0, d16
936; HARD-NEXT:    bx lr
937    %1 = fadd double %p, %p
938    %2 = bitcast double %1 to <8 x i8>
939    %3 = add <8 x i8> %2, %2
940    ret <8 x i8> %3
941}
942
943define <8 x i8> @test_v8i8_v1i64(<1 x i64> %p) {
944; SOFT-LABEL: test_v8i8_v1i64:
945; SOFT:       @ %bb.0:
946; SOFT-NEXT:    vmov d16, r1, r0
947; SOFT-NEXT:    vadd.i64 d16, d16, d16
948; SOFT-NEXT:    vrev64.8 d16, d16
949; SOFT-NEXT:    vadd.i8 d16, d16, d16
950; SOFT-NEXT:    vrev64.8 d16, d16
951; SOFT-NEXT:    vmov r1, r0, d16
952; SOFT-NEXT:    bx lr
953;
954; HARD-LABEL: test_v8i8_v1i64:
955; HARD:       @ %bb.0:
956; HARD-NEXT:    vadd.i64 d16, d0, d0
957; HARD-NEXT:    vrev64.8 d16, d16
958; HARD-NEXT:    vadd.i8 d16, d16, d16
959; HARD-NEXT:    vrev64.8 d0, d16
960; HARD-NEXT:    bx lr
961    %1 = add <1 x i64> %p, %p
962    %2 = bitcast <1 x i64> %1 to <8 x i8>
963    %3 = add <8 x i8> %2, %2
964    ret <8 x i8> %3
965}
966
967define <8 x i8> @test_v8i8_v2f32(<2 x float> %p) {
968; SOFT-LABEL: test_v8i8_v2f32:
969; SOFT:       @ %bb.0:
970; SOFT-NEXT:    vmov d16, r1, r0
971; SOFT-NEXT:    vrev64.32 d16, d16
972; SOFT-NEXT:    vadd.f32 d16, d16, d16
973; SOFT-NEXT:    vrev32.8 d16, d16
974; SOFT-NEXT:    vadd.i8 d16, d16, d16
975; SOFT-NEXT:    vrev64.8 d16, d16
976; SOFT-NEXT:    vmov r1, r0, d16
977; SOFT-NEXT:    bx lr
978;
979; HARD-LABEL: test_v8i8_v2f32:
980; HARD:       @ %bb.0:
981; HARD-NEXT:    vrev64.32 d16, d0
982; HARD-NEXT:    vadd.f32 d16, d16, d16
983; HARD-NEXT:    vrev32.8 d16, d16
984; HARD-NEXT:    vadd.i8 d16, d16, d16
985; HARD-NEXT:    vrev64.8 d0, d16
986; HARD-NEXT:    bx lr
987    %1 = fadd <2 x float> %p, %p
988    %2 = bitcast <2 x float> %1 to <8 x i8>
989    %3 = add <8 x i8> %2, %2
990    ret <8 x i8> %3
991}
992
993define <8 x i8> @test_v8i8_v2i32(<2 x i32> %p) {
994; SOFT-LABEL: test_v8i8_v2i32:
995; SOFT:       @ %bb.0:
996; SOFT-NEXT:    vmov d16, r1, r0
997; SOFT-NEXT:    vrev64.32 d16, d16
998; SOFT-NEXT:    vadd.i32 d16, d16, d16
999; SOFT-NEXT:    vrev32.8 d16, d16
1000; SOFT-NEXT:    vadd.i8 d16, d16, d16
1001; SOFT-NEXT:    vrev64.8 d16, d16
1002; SOFT-NEXT:    vmov r1, r0, d16
1003; SOFT-NEXT:    bx lr
1004;
1005; HARD-LABEL: test_v8i8_v2i32:
1006; HARD:       @ %bb.0:
1007; HARD-NEXT:    vrev64.32 d16, d0
1008; HARD-NEXT:    vadd.i32 d16, d16, d16
1009; HARD-NEXT:    vrev32.8 d16, d16
1010; HARD-NEXT:    vadd.i8 d16, d16, d16
1011; HARD-NEXT:    vrev64.8 d0, d16
1012; HARD-NEXT:    bx lr
1013    %1 = add <2 x i32> %p, %p
1014    %2 = bitcast <2 x i32> %1 to <8 x i8>
1015    %3 = add <8 x i8> %2, %2
1016    ret <8 x i8> %3
1017}
1018
1019define <8 x i8> @test_v8i8_v4i16(<4 x i16> %p) {
1020; SOFT-LABEL: test_v8i8_v4i16:
1021; SOFT:       @ %bb.0:
1022; SOFT-NEXT:    vmov d16, r1, r0
1023; SOFT-NEXT:    vrev64.16 d16, d16
1024; SOFT-NEXT:    vadd.i16 d16, d16, d16
1025; SOFT-NEXT:    vrev16.8 d16, d16
1026; SOFT-NEXT:    vadd.i8 d16, d16, d16
1027; SOFT-NEXT:    vrev64.8 d16, d16
1028; SOFT-NEXT:    vmov r1, r0, d16
1029; SOFT-NEXT:    bx lr
1030;
1031; HARD-LABEL: test_v8i8_v4i16:
1032; HARD:       @ %bb.0:
1033; HARD-NEXT:    vrev64.16 d16, d0
1034; HARD-NEXT:    vadd.i16 d16, d16, d16
1035; HARD-NEXT:    vrev16.8 d16, d16
1036; HARD-NEXT:    vadd.i8 d16, d16, d16
1037; HARD-NEXT:    vrev64.8 d0, d16
1038; HARD-NEXT:    bx lr
1039    %1 = add <4 x i16> %p, %p
1040    %2 = bitcast <4 x i16> %1 to <8 x i8>
1041    %3 = add <8 x i8> %2, %2
1042    ret <8 x i8> %3
1043}
1044
1045define fp128 @test_f128_v2f64(<2 x double> %p) {
1046; SOFT-LABEL: test_f128_v2f64:
1047; SOFT:       @ %bb.0:
1048; SOFT-NEXT:    .save {r11, lr}
1049; SOFT-NEXT:    push {r11, lr}
1050; SOFT-NEXT:    .pad #16
1051; SOFT-NEXT:    sub sp, sp, #16
1052; SOFT-NEXT:    vmov d16, r3, r2
1053; SOFT-NEXT:    vmov d17, r1, r0
1054; SOFT-NEXT:    vadd.f64 d19, d16, d16
1055; SOFT-NEXT:    vadd.f64 d18, d17, d17
1056; SOFT-NEXT:    vrev64.32 q8, q9
1057; SOFT-NEXT:    vmov r2, r3, d17
1058; SOFT-NEXT:    vmov r0, r1, d16
1059; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1060; SOFT-NEXT:    bl __addtf3
1061; SOFT-NEXT:    add sp, sp, #16
1062; SOFT-NEXT:    pop {r11, pc}
1063;
1064; HARD-LABEL: test_f128_v2f64:
1065; HARD:       @ %bb.0:
1066; HARD-NEXT:    .save {r11, lr}
1067; HARD-NEXT:    push {r11, lr}
1068; HARD-NEXT:    .pad #16
1069; HARD-NEXT:    sub sp, sp, #16
1070; HARD-NEXT:    vadd.f64 d17, d1, d1
1071; HARD-NEXT:    vadd.f64 d16, d0, d0
1072; HARD-NEXT:    vrev64.32 q8, q8
1073; HARD-NEXT:    vmov r2, r3, d17
1074; HARD-NEXT:    vmov r0, r1, d16
1075; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1076; HARD-NEXT:    bl __addtf3
1077; HARD-NEXT:    add sp, sp, #16
1078; HARD-NEXT:    pop {r11, pc}
1079    %1 = fadd <2 x double> %p, %p
1080    %2 = bitcast <2 x double> %1 to fp128
1081    %3 = fadd fp128 %2, %2
1082    ret fp128 %3
1083}
1084
1085define fp128 @test_f128_v2i64(<2 x i64> %p) {
1086; SOFT-LABEL: test_f128_v2i64:
1087; SOFT:       @ %bb.0:
1088; SOFT-NEXT:    .save {r11, lr}
1089; SOFT-NEXT:    push {r11, lr}
1090; SOFT-NEXT:    .pad #16
1091; SOFT-NEXT:    sub sp, sp, #16
1092; SOFT-NEXT:    vmov d17, r3, r2
1093; SOFT-NEXT:    vmov d16, r1, r0
1094; SOFT-NEXT:    vadd.i64 q8, q8, q8
1095; SOFT-NEXT:    vrev64.32 q8, q8
1096; SOFT-NEXT:    vmov r2, r3, d17
1097; SOFT-NEXT:    vmov r0, r1, d16
1098; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1099; SOFT-NEXT:    bl __addtf3
1100; SOFT-NEXT:    add sp, sp, #16
1101; SOFT-NEXT:    pop {r11, pc}
1102;
1103; HARD-LABEL: test_f128_v2i64:
1104; HARD:       @ %bb.0:
1105; HARD-NEXT:    .save {r11, lr}
1106; HARD-NEXT:    push {r11, lr}
1107; HARD-NEXT:    .pad #16
1108; HARD-NEXT:    sub sp, sp, #16
1109; HARD-NEXT:    vadd.i64 q8, q0, q0
1110; HARD-NEXT:    vrev64.32 q8, q8
1111; HARD-NEXT:    vmov r2, r3, d17
1112; HARD-NEXT:    vmov r0, r1, d16
1113; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1114; HARD-NEXT:    bl __addtf3
1115; HARD-NEXT:    add sp, sp, #16
1116; HARD-NEXT:    pop {r11, pc}
1117    %1 = add <2 x i64> %p, %p
1118    %2 = bitcast <2 x i64> %1 to fp128
1119    %3 = fadd fp128 %2, %2
1120    ret fp128 %3
1121}
1122
1123define fp128 @test_f128_v4f32(<4 x float> %p) {
1124; SOFT-LABEL: test_f128_v4f32:
1125; SOFT:       @ %bb.0:
1126; SOFT-NEXT:    .save {r11, lr}
1127; SOFT-NEXT:    push {r11, lr}
1128; SOFT-NEXT:    .pad #16
1129; SOFT-NEXT:    sub sp, sp, #16
1130; SOFT-NEXT:    vmov d17, r3, r2
1131; SOFT-NEXT:    vmov d16, r1, r0
1132; SOFT-NEXT:    vrev64.32 q8, q8
1133; SOFT-NEXT:    vadd.f32 q8, q8, q8
1134; SOFT-NEXT:    vmov r2, r3, d17
1135; SOFT-NEXT:    vmov r0, r1, d16
1136; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1137; SOFT-NEXT:    bl __addtf3
1138; SOFT-NEXT:    add sp, sp, #16
1139; SOFT-NEXT:    pop {r11, pc}
1140;
1141; HARD-LABEL: test_f128_v4f32:
1142; HARD:       @ %bb.0:
1143; HARD-NEXT:    .save {r11, lr}
1144; HARD-NEXT:    push {r11, lr}
1145; HARD-NEXT:    .pad #16
1146; HARD-NEXT:    sub sp, sp, #16
1147; HARD-NEXT:    vrev64.32 q8, q0
1148; HARD-NEXT:    vadd.f32 q8, q8, q8
1149; HARD-NEXT:    vmov r2, r3, d17
1150; HARD-NEXT:    vmov r0, r1, d16
1151; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1152; HARD-NEXT:    bl __addtf3
1153; HARD-NEXT:    add sp, sp, #16
1154; HARD-NEXT:    pop {r11, pc}
1155    %1 = fadd <4 x float> %p, %p
1156    %2 = bitcast <4 x float> %1 to fp128
1157    %3 = fadd fp128 %2, %2
1158    ret fp128 %3
1159}
1160
1161define fp128 @test_f128_v4i32(<4 x i32> %p) {
1162; SOFT-LABEL: test_f128_v4i32:
1163; SOFT:       @ %bb.0:
1164; SOFT-NEXT:    .save {r11, lr}
1165; SOFT-NEXT:    push {r11, lr}
1166; SOFT-NEXT:    .pad #16
1167; SOFT-NEXT:    sub sp, sp, #16
1168; SOFT-NEXT:    vmov d17, r3, r2
1169; SOFT-NEXT:    vmov d16, r1, r0
1170; SOFT-NEXT:    vrev64.32 q8, q8
1171; SOFT-NEXT:    vadd.i32 q8, q8, q8
1172; SOFT-NEXT:    vmov r2, r3, d17
1173; SOFT-NEXT:    vmov r0, r1, d16
1174; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1175; SOFT-NEXT:    bl __addtf3
1176; SOFT-NEXT:    add sp, sp, #16
1177; SOFT-NEXT:    pop {r11, pc}
1178;
1179; HARD-LABEL: test_f128_v4i32:
1180; HARD:       @ %bb.0:
1181; HARD-NEXT:    .save {r11, lr}
1182; HARD-NEXT:    push {r11, lr}
1183; HARD-NEXT:    .pad #16
1184; HARD-NEXT:    sub sp, sp, #16
1185; HARD-NEXT:    vrev64.32 q8, q0
1186; HARD-NEXT:    vadd.i32 q8, q8, q8
1187; HARD-NEXT:    vmov r2, r3, d17
1188; HARD-NEXT:    vmov r0, r1, d16
1189; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1190; HARD-NEXT:    bl __addtf3
1191; HARD-NEXT:    add sp, sp, #16
1192; HARD-NEXT:    pop {r11, pc}
1193    %1 = add <4 x i32> %p, %p
1194    %2 = bitcast <4 x i32> %1 to fp128
1195    %3 = fadd fp128 %2, %2
1196    ret fp128 %3
1197}
1198
1199define fp128 @test_f128_v8i16(<8 x i16> %p) {
1200; SOFT-LABEL: test_f128_v8i16:
1201; SOFT:       @ %bb.0:
1202; SOFT-NEXT:    .save {r11, lr}
1203; SOFT-NEXT:    push {r11, lr}
1204; SOFT-NEXT:    .pad #16
1205; SOFT-NEXT:    sub sp, sp, #16
1206; SOFT-NEXT:    vmov d17, r3, r2
1207; SOFT-NEXT:    vmov d16, r1, r0
1208; SOFT-NEXT:    vrev64.16 q8, q8
1209; SOFT-NEXT:    vadd.i16 q8, q8, q8
1210; SOFT-NEXT:    vrev32.16 q8, q8
1211; SOFT-NEXT:    vmov r2, r3, d17
1212; SOFT-NEXT:    vmov r0, r1, d16
1213; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1214; SOFT-NEXT:    bl __addtf3
1215; SOFT-NEXT:    add sp, sp, #16
1216; SOFT-NEXT:    pop {r11, pc}
1217;
1218; HARD-LABEL: test_f128_v8i16:
1219; HARD:       @ %bb.0:
1220; HARD-NEXT:    .save {r11, lr}
1221; HARD-NEXT:    push {r11, lr}
1222; HARD-NEXT:    .pad #16
1223; HARD-NEXT:    sub sp, sp, #16
1224; HARD-NEXT:    vrev64.16 q8, q0
1225; HARD-NEXT:    vadd.i16 q8, q8, q8
1226; HARD-NEXT:    vrev32.16 q8, q8
1227; HARD-NEXT:    vmov r2, r3, d17
1228; HARD-NEXT:    vmov r0, r1, d16
1229; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1230; HARD-NEXT:    bl __addtf3
1231; HARD-NEXT:    add sp, sp, #16
1232; HARD-NEXT:    pop {r11, pc}
1233    %1 = add <8 x i16> %p, %p
1234    %2 = bitcast <8 x i16> %1 to fp128
1235    %3 = fadd fp128 %2, %2
1236    ret fp128 %3
1237}
1238
1239define fp128 @test_f128_v16i8(<16 x i8> %p) {
1240; SOFT-LABEL: test_f128_v16i8:
1241; SOFT:       @ %bb.0:
1242; SOFT-NEXT:    .save {r11, lr}
1243; SOFT-NEXT:    push {r11, lr}
1244; SOFT-NEXT:    .pad #16
1245; SOFT-NEXT:    sub sp, sp, #16
1246; SOFT-NEXT:    vmov d17, r3, r2
1247; SOFT-NEXT:    vmov d16, r1, r0
1248; SOFT-NEXT:    vrev64.8 q8, q8
1249; SOFT-NEXT:    vadd.i8 q8, q8, q8
1250; SOFT-NEXT:    vrev32.8 q8, q8
1251; SOFT-NEXT:    vmov r2, r3, d17
1252; SOFT-NEXT:    vmov r0, r1, d16
1253; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1254; SOFT-NEXT:    bl __addtf3
1255; SOFT-NEXT:    add sp, sp, #16
1256; SOFT-NEXT:    pop {r11, pc}
1257;
1258; HARD-LABEL: test_f128_v16i8:
1259; HARD:       @ %bb.0:
1260; HARD-NEXT:    .save {r11, lr}
1261; HARD-NEXT:    push {r11, lr}
1262; HARD-NEXT:    .pad #16
1263; HARD-NEXT:    sub sp, sp, #16
1264; HARD-NEXT:    vrev64.8 q8, q0
1265; HARD-NEXT:    vadd.i8 q8, q8, q8
1266; HARD-NEXT:    vrev32.8 q8, q8
1267; HARD-NEXT:    vmov r2, r3, d17
1268; HARD-NEXT:    vmov r0, r1, d16
1269; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1270; HARD-NEXT:    bl __addtf3
1271; HARD-NEXT:    add sp, sp, #16
1272; HARD-NEXT:    pop {r11, pc}
1273    %1 = add <16 x i8> %p, %p
1274    %2 = bitcast <16 x i8> %1 to fp128
1275    %3 = fadd fp128 %2, %2
1276    ret fp128 %3
1277}
1278
1279define <2 x double> @test_v2f64_f128(fp128 %p) {
1280; SOFT-LABEL: test_v2f64_f128:
1281; SOFT:       @ %bb.0:
1282; SOFT-NEXT:    .save {r11, lr}
1283; SOFT-NEXT:    push {r11, lr}
1284; SOFT-NEXT:    .pad #16
1285; SOFT-NEXT:    sub sp, sp, #16
1286; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1287; SOFT-NEXT:    bl __addtf3
1288; SOFT-NEXT:    vmov.32 d17[0], r2
1289; SOFT-NEXT:    vmov.32 d16[0], r0
1290; SOFT-NEXT:    vmov.32 d17[1], r3
1291; SOFT-NEXT:    vmov.32 d16[1], r1
1292; SOFT-NEXT:    vrev64.32 q8, q8
1293; SOFT-NEXT:    vadd.f64 d18, d16, d16
1294; SOFT-NEXT:    vadd.f64 d16, d17, d17
1295; SOFT-NEXT:    vmov r1, r0, d18
1296; SOFT-NEXT:    vmov r3, r2, d16
1297; SOFT-NEXT:    add sp, sp, #16
1298; SOFT-NEXT:    pop {r11, pc}
1299;
1300; HARD-LABEL: test_v2f64_f128:
1301; HARD:       @ %bb.0:
1302; HARD-NEXT:    .save {r11, lr}
1303; HARD-NEXT:    push {r11, lr}
1304; HARD-NEXT:    .pad #16
1305; HARD-NEXT:    sub sp, sp, #16
1306; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1307; HARD-NEXT:    bl __addtf3
1308; HARD-NEXT:    vmov.32 d17[0], r2
1309; HARD-NEXT:    vmov.32 d16[0], r0
1310; HARD-NEXT:    vmov.32 d17[1], r3
1311; HARD-NEXT:    vmov.32 d16[1], r1
1312; HARD-NEXT:    vrev64.32 q8, q8
1313; HARD-NEXT:    vadd.f64 d1, d17, d17
1314; HARD-NEXT:    vadd.f64 d0, d16, d16
1315; HARD-NEXT:    add sp, sp, #16
1316; HARD-NEXT:    pop {r11, pc}
1317    %1 = fadd fp128 %p, %p
1318    %2 = bitcast fp128 %1 to <2 x double>
1319    %3 = fadd <2 x double> %2, %2
1320    ret <2 x double> %3
1321}
1322
1323define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) {
1324; SOFT-LABEL: test_v2f64_v2i64:
1325; SOFT:       @ %bb.0:
1326; SOFT-NEXT:    vmov d17, r3, r2
1327; SOFT-NEXT:    vmov d16, r1, r0
1328; SOFT-NEXT:    vadd.i64 q8, q8, q8
1329; SOFT-NEXT:    vadd.f64 d18, d16, d16
1330; SOFT-NEXT:    vadd.f64 d16, d17, d17
1331; SOFT-NEXT:    vmov r1, r0, d18
1332; SOFT-NEXT:    vmov r3, r2, d16
1333; SOFT-NEXT:    bx lr
1334;
1335; HARD-LABEL: test_v2f64_v2i64:
1336; HARD:       @ %bb.0:
1337; HARD-NEXT:    vadd.i64 q8, q0, q0
1338; HARD-NEXT:    vadd.f64 d1, d17, d17
1339; HARD-NEXT:    vadd.f64 d0, d16, d16
1340; HARD-NEXT:    bx lr
1341    %1 = add <2 x i64> %p, %p
1342    %2 = bitcast <2 x i64> %1 to <2 x double>
1343    %3 = fadd <2 x double> %2, %2
1344    ret <2 x double> %3
1345}
1346
1347define <2 x double> @test_v2f64_v4f32(<4 x float> %p) {
1348; SOFT-LABEL: test_v2f64_v4f32:
1349; SOFT:       @ %bb.0:
1350; SOFT-NEXT:    vmov d17, r3, r2
1351; SOFT-NEXT:    vmov d16, r1, r0
1352; SOFT-NEXT:    vrev64.32 q8, q8
1353; SOFT-NEXT:    vadd.f32 q8, q8, q8
1354; SOFT-NEXT:    vrev64.32 q8, q8
1355; SOFT-NEXT:    vadd.f64 d18, d16, d16
1356; SOFT-NEXT:    vadd.f64 d16, d17, d17
1357; SOFT-NEXT:    vmov r1, r0, d18
1358; SOFT-NEXT:    vmov r3, r2, d16
1359; SOFT-NEXT:    bx lr
1360;
1361; HARD-LABEL: test_v2f64_v4f32:
1362; HARD:       @ %bb.0:
1363; HARD-NEXT:    vrev64.32 q8, q0
1364; HARD-NEXT:    vadd.f32 q8, q8, q8
1365; HARD-NEXT:    vrev64.32 q8, q8
1366; HARD-NEXT:    vadd.f64 d1, d17, d17
1367; HARD-NEXT:    vadd.f64 d0, d16, d16
1368; HARD-NEXT:    bx lr
1369    %1 = fadd <4 x float> %p, %p
1370    %2 = bitcast <4 x float> %1 to <2 x double>
1371    %3 = fadd <2 x double> %2, %2
1372    ret <2 x double> %3
1373}
1374
1375define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) {
1376; SOFT-LABEL: test_v2f64_v4i32:
1377; SOFT:       @ %bb.0:
1378; SOFT-NEXT:    vmov d17, r3, r2
1379; SOFT-NEXT:    vmov d16, r1, r0
1380; SOFT-NEXT:    vrev64.32 q8, q8
1381; SOFT-NEXT:    vadd.i32 q8, q8, q8
1382; SOFT-NEXT:    vrev64.32 q8, q8
1383; SOFT-NEXT:    vadd.f64 d18, d16, d16
1384; SOFT-NEXT:    vadd.f64 d16, d17, d17
1385; SOFT-NEXT:    vmov r1, r0, d18
1386; SOFT-NEXT:    vmov r3, r2, d16
1387; SOFT-NEXT:    bx lr
1388;
1389; HARD-LABEL: test_v2f64_v4i32:
1390; HARD:       @ %bb.0:
1391; HARD-NEXT:    vrev64.32 q8, q0
1392; HARD-NEXT:    vadd.i32 q8, q8, q8
1393; HARD-NEXT:    vrev64.32 q8, q8
1394; HARD-NEXT:    vadd.f64 d1, d17, d17
1395; HARD-NEXT:    vadd.f64 d0, d16, d16
1396; HARD-NEXT:    bx lr
1397    %1 = add <4 x i32> %p, %p
1398    %2 = bitcast <4 x i32> %1 to <2 x double>
1399    %3 = fadd <2 x double> %2, %2
1400    ret <2 x double> %3
1401}
1402
1403define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) {
1404; SOFT-LABEL: test_v2f64_v8i16:
1405; SOFT:       @ %bb.0:
1406; SOFT-NEXT:    vmov d17, r3, r2
1407; SOFT-NEXT:    vmov d16, r1, r0
1408; SOFT-NEXT:    vrev64.16 q8, q8
1409; SOFT-NEXT:    vadd.i16 q8, q8, q8
1410; SOFT-NEXT:    vrev64.16 q8, q8
1411; SOFT-NEXT:    vadd.f64 d18, d16, d16
1412; SOFT-NEXT:    vadd.f64 d16, d17, d17
1413; SOFT-NEXT:    vmov r1, r0, d18
1414; SOFT-NEXT:    vmov r3, r2, d16
1415; SOFT-NEXT:    bx lr
1416;
1417; HARD-LABEL: test_v2f64_v8i16:
1418; HARD:       @ %bb.0:
1419; HARD-NEXT:    vrev64.16 q8, q0
1420; HARD-NEXT:    vadd.i16 q8, q8, q8
1421; HARD-NEXT:    vrev64.16 q8, q8
1422; HARD-NEXT:    vadd.f64 d1, d17, d17
1423; HARD-NEXT:    vadd.f64 d0, d16, d16
1424; HARD-NEXT:    bx lr
1425    %1 = add <8 x i16> %p, %p
1426    %2 = bitcast <8 x i16> %1 to <2 x double>
1427    %3 = fadd <2 x double> %2, %2
1428    ret <2 x double> %3
1429}
1430
1431define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) {
1432; SOFT-LABEL: test_v2f64_v16i8:
1433; SOFT:       @ %bb.0:
1434; SOFT-NEXT:    vmov d17, r3, r2
1435; SOFT-NEXT:    vmov d16, r1, r0
1436; SOFT-NEXT:    vrev64.8 q8, q8
1437; SOFT-NEXT:    vadd.i8 q8, q8, q8
1438; SOFT-NEXT:    vrev64.8 q8, q8
1439; SOFT-NEXT:    vadd.f64 d18, d16, d16
1440; SOFT-NEXT:    vadd.f64 d16, d17, d17
1441; SOFT-NEXT:    vmov r1, r0, d18
1442; SOFT-NEXT:    vmov r3, r2, d16
1443; SOFT-NEXT:    bx lr
1444;
1445; HARD-LABEL: test_v2f64_v16i8:
1446; HARD:       @ %bb.0:
1447; HARD-NEXT:    vrev64.8 q8, q0
1448; HARD-NEXT:    vadd.i8 q8, q8, q8
1449; HARD-NEXT:    vrev64.8 q8, q8
1450; HARD-NEXT:    vadd.f64 d1, d17, d17
1451; HARD-NEXT:    vadd.f64 d0, d16, d16
1452; HARD-NEXT:    bx lr
1453    %1 = add <16 x i8> %p, %p
1454    %2 = bitcast <16 x i8> %1 to <2 x double>
1455    %3 = fadd <2 x double> %2, %2
1456    ret <2 x double> %3
1457}
1458
1459define <2 x i64> @test_v2i64_f128(fp128 %p) {
1460; SOFT-LABEL: test_v2i64_f128:
1461; SOFT:       @ %bb.0:
1462; SOFT-NEXT:    .save {r11, lr}
1463; SOFT-NEXT:    push {r11, lr}
1464; SOFT-NEXT:    .pad #16
1465; SOFT-NEXT:    sub sp, sp, #16
1466; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1467; SOFT-NEXT:    bl __addtf3
1468; SOFT-NEXT:    vmov.32 d17[0], r2
1469; SOFT-NEXT:    vmov.32 d16[0], r0
1470; SOFT-NEXT:    vmov.32 d17[1], r3
1471; SOFT-NEXT:    vmov.32 d16[1], r1
1472; SOFT-NEXT:    vrev64.32 q8, q8
1473; SOFT-NEXT:    vadd.i64 q8, q8, q8
1474; SOFT-NEXT:    vmov r1, r0, d16
1475; SOFT-NEXT:    vmov r3, r2, d17
1476; SOFT-NEXT:    add sp, sp, #16
1477; SOFT-NEXT:    pop {r11, pc}
1478;
1479; HARD-LABEL: test_v2i64_f128:
1480; HARD:       @ %bb.0:
1481; HARD-NEXT:    .save {r11, lr}
1482; HARD-NEXT:    push {r11, lr}
1483; HARD-NEXT:    .pad #16
1484; HARD-NEXT:    sub sp, sp, #16
1485; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1486; HARD-NEXT:    bl __addtf3
1487; HARD-NEXT:    vmov.32 d17[0], r2
1488; HARD-NEXT:    vmov.32 d16[0], r0
1489; HARD-NEXT:    vmov.32 d17[1], r3
1490; HARD-NEXT:    vmov.32 d16[1], r1
1491; HARD-NEXT:    vrev64.32 q8, q8
1492; HARD-NEXT:    vadd.i64 q0, q8, q8
1493; HARD-NEXT:    add sp, sp, #16
1494; HARD-NEXT:    pop {r11, pc}
1495    %1 = fadd fp128 %p, %p
1496    %2 = bitcast fp128 %1 to <2 x i64>
1497    %3 = add <2 x i64> %2, %2
1498    ret <2 x i64> %3
1499}
1500
1501define <2 x i64> @test_v2i64_v2f64(<2 x double> %p) {
1502; SOFT-LABEL: test_v2i64_v2f64:
1503; SOFT:       @ %bb.0:
1504; SOFT-NEXT:    vmov d16, r3, r2
1505; SOFT-NEXT:    vmov d17, r1, r0
1506; SOFT-NEXT:    vadd.f64 d19, d16, d16
1507; SOFT-NEXT:    vadd.f64 d18, d17, d17
1508; SOFT-NEXT:    vadd.i64 q8, q9, q9
1509; SOFT-NEXT:    vmov r1, r0, d16
1510; SOFT-NEXT:    vmov r3, r2, d17
1511; SOFT-NEXT:    bx lr
1512;
1513; HARD-LABEL: test_v2i64_v2f64:
1514; HARD:       @ %bb.0:
1515; HARD-NEXT:    vadd.f64 d17, d1, d1
1516; HARD-NEXT:    vadd.f64 d16, d0, d0
1517; HARD-NEXT:    vadd.i64 q0, q8, q8
1518; HARD-NEXT:    bx lr
1519    %1 = fadd <2 x double> %p, %p
1520    %2 = bitcast <2 x double> %1 to <2 x i64>
1521    %3 = add <2 x i64> %2, %2
1522    ret <2 x i64> %3
1523}
1524
1525define <2 x i64> @test_v2i64_v4f32(<4 x float> %p) {
1526; SOFT-LABEL: test_v2i64_v4f32:
1527; SOFT:       @ %bb.0:
1528; SOFT-NEXT:    vmov d17, r3, r2
1529; SOFT-NEXT:    vmov d16, r1, r0
1530; SOFT-NEXT:    vrev64.32 q8, q8
1531; SOFT-NEXT:    vadd.f32 q8, q8, q8
1532; SOFT-NEXT:    vrev64.32 q8, q8
1533; SOFT-NEXT:    vadd.i64 q8, q8, q8
1534; SOFT-NEXT:    vmov r1, r0, d16
1535; SOFT-NEXT:    vmov r3, r2, d17
1536; SOFT-NEXT:    bx lr
1537;
1538; HARD-LABEL: test_v2i64_v4f32:
1539; HARD:       @ %bb.0:
1540; HARD-NEXT:    vrev64.32 q8, q0
1541; HARD-NEXT:    vadd.f32 q8, q8, q8
1542; HARD-NEXT:    vrev64.32 q8, q8
1543; HARD-NEXT:    vadd.i64 q0, q8, q8
1544; HARD-NEXT:    bx lr
1545    %1 = fadd <4 x float> %p, %p
1546    %2 = bitcast <4 x float> %1 to <2 x i64>
1547    %3 = add <2 x i64> %2, %2
1548    ret <2 x i64> %3
1549}
1550
1551define <2 x i64> @test_v2i64_v4i32(<4 x i32> %p) {
1552; SOFT-LABEL: test_v2i64_v4i32:
1553; SOFT:       @ %bb.0:
1554; SOFT-NEXT:    vmov d17, r3, r2
1555; SOFT-NEXT:    vmov d16, r1, r0
1556; SOFT-NEXT:    vrev64.32 q8, q8
1557; SOFT-NEXT:    vadd.i32 q8, q8, q8
1558; SOFT-NEXT:    vrev64.32 q8, q8
1559; SOFT-NEXT:    vadd.i64 q8, q8, q8
1560; SOFT-NEXT:    vmov r1, r0, d16
1561; SOFT-NEXT:    vmov r3, r2, d17
1562; SOFT-NEXT:    bx lr
1563;
1564; HARD-LABEL: test_v2i64_v4i32:
1565; HARD:       @ %bb.0:
1566; HARD-NEXT:    vrev64.32 q8, q0
1567; HARD-NEXT:    vadd.i32 q8, q8, q8
1568; HARD-NEXT:    vrev64.32 q8, q8
1569; HARD-NEXT:    vadd.i64 q0, q8, q8
1570; HARD-NEXT:    bx lr
1571    %1 = add <4 x i32> %p, %p
1572    %2 = bitcast <4 x i32> %1 to <2 x i64>
1573    %3 = add <2 x i64> %2, %2
1574    ret <2 x i64> %3
1575}
1576
1577define <2 x i64> @test_v2i64_v8i16(<8 x i16> %p) {
1578; SOFT-LABEL: test_v2i64_v8i16:
1579; SOFT:       @ %bb.0:
1580; SOFT-NEXT:    vmov d17, r3, r2
1581; SOFT-NEXT:    vmov d16, r1, r0
1582; SOFT-NEXT:    vrev64.16 q8, q8
1583; SOFT-NEXT:    vadd.i16 q8, q8, q8
1584; SOFT-NEXT:    vrev64.16 q8, q8
1585; SOFT-NEXT:    vadd.i64 q8, q8, q8
1586; SOFT-NEXT:    vmov r1, r0, d16
1587; SOFT-NEXT:    vmov r3, r2, d17
1588; SOFT-NEXT:    bx lr
1589;
1590; HARD-LABEL: test_v2i64_v8i16:
1591; HARD:       @ %bb.0:
1592; HARD-NEXT:    vrev64.16 q8, q0
1593; HARD-NEXT:    vadd.i16 q8, q8, q8
1594; HARD-NEXT:    vrev64.16 q8, q8
1595; HARD-NEXT:    vadd.i64 q0, q8, q8
1596; HARD-NEXT:    bx lr
1597    %1 = add <8 x i16> %p, %p
1598    %2 = bitcast <8 x i16> %1 to <2 x i64>
1599    %3 = add <2 x i64> %2, %2
1600    ret <2 x i64> %3
1601}
1602
1603define <2 x i64> @test_v2i64_v16i8(<16 x i8> %p) {
1604; SOFT-LABEL: test_v2i64_v16i8:
1605; SOFT:       @ %bb.0:
1606; SOFT-NEXT:    vmov d17, r3, r2
1607; SOFT-NEXT:    vmov d16, r1, r0
1608; SOFT-NEXT:    vrev64.8 q8, q8
1609; SOFT-NEXT:    vadd.i8 q8, q8, q8
1610; SOFT-NEXT:    vrev64.8 q8, q8
1611; SOFT-NEXT:    vadd.i64 q8, q8, q8
1612; SOFT-NEXT:    vmov r1, r0, d16
1613; SOFT-NEXT:    vmov r3, r2, d17
1614; SOFT-NEXT:    bx lr
1615;
1616; HARD-LABEL: test_v2i64_v16i8:
1617; HARD:       @ %bb.0:
1618; HARD-NEXT:    vrev64.8 q8, q0
1619; HARD-NEXT:    vadd.i8 q8, q8, q8
1620; HARD-NEXT:    vrev64.8 q8, q8
1621; HARD-NEXT:    vadd.i64 q0, q8, q8
1622; HARD-NEXT:    bx lr
1623    %1 = add <16 x i8> %p, %p
1624    %2 = bitcast <16 x i8> %1 to <2 x i64>
1625    %3 = add <2 x i64> %2, %2
1626    ret <2 x i64> %3
1627}
1628
1629define <4 x float> @test_v4f32_f128(fp128 %p) {
1630; SOFT-LABEL: test_v4f32_f128:
1631; SOFT:       @ %bb.0:
1632; SOFT-NEXT:    .save {r11, lr}
1633; SOFT-NEXT:    push {r11, lr}
1634; SOFT-NEXT:    .pad #16
1635; SOFT-NEXT:    sub sp, sp, #16
1636; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1637; SOFT-NEXT:    bl __addtf3
1638; SOFT-NEXT:    vmov.32 d17[0], r2
1639; SOFT-NEXT:    vmov.32 d16[0], r0
1640; SOFT-NEXT:    vmov.32 d17[1], r3
1641; SOFT-NEXT:    vmov.32 d16[1], r1
1642; SOFT-NEXT:    vadd.f32 q8, q8, q8
1643; SOFT-NEXT:    vrev64.32 q8, q8
1644; SOFT-NEXT:    vmov r1, r0, d16
1645; SOFT-NEXT:    vmov r3, r2, d17
1646; SOFT-NEXT:    add sp, sp, #16
1647; SOFT-NEXT:    pop {r11, pc}
1648;
1649; HARD-LABEL: test_v4f32_f128:
1650; HARD:       @ %bb.0:
1651; HARD-NEXT:    .save {r11, lr}
1652; HARD-NEXT:    push {r11, lr}
1653; HARD-NEXT:    .pad #16
1654; HARD-NEXT:    sub sp, sp, #16
1655; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1656; HARD-NEXT:    bl __addtf3
1657; HARD-NEXT:    vmov.32 d17[0], r2
1658; HARD-NEXT:    vmov.32 d16[0], r0
1659; HARD-NEXT:    vmov.32 d17[1], r3
1660; HARD-NEXT:    vmov.32 d16[1], r1
1661; HARD-NEXT:    vadd.f32 q8, q8, q8
1662; HARD-NEXT:    vrev64.32 q0, q8
1663; HARD-NEXT:    add sp, sp, #16
1664; HARD-NEXT:    pop {r11, pc}
1665    %1 = fadd fp128 %p, %p
1666    %2 = bitcast fp128 %1 to <4 x float>
1667    %3 = fadd <4 x float> %2, %2
1668    ret <4 x float> %3
1669}
1670
1671define <4 x float> @test_v4f32_v2f64(<2 x double> %p) {
1672; SOFT-LABEL: test_v4f32_v2f64:
1673; SOFT:       @ %bb.0:
1674; SOFT-NEXT:    vmov d16, r3, r2
1675; SOFT-NEXT:    vmov d17, r1, r0
1676; SOFT-NEXT:    vadd.f64 d19, d16, d16
1677; SOFT-NEXT:    vadd.f64 d18, d17, d17
1678; SOFT-NEXT:    vrev64.32 q8, q9
1679; SOFT-NEXT:    vadd.f32 q8, q8, q8
1680; SOFT-NEXT:    vrev64.32 q8, q8
1681; SOFT-NEXT:    vmov r1, r0, d16
1682; SOFT-NEXT:    vmov r3, r2, d17
1683; SOFT-NEXT:    bx lr
1684;
1685; HARD-LABEL: test_v4f32_v2f64:
1686; HARD:       @ %bb.0:
1687; HARD-NEXT:    vadd.f64 d17, d1, d1
1688; HARD-NEXT:    vadd.f64 d16, d0, d0
1689; HARD-NEXT:    vrev64.32 q8, q8
1690; HARD-NEXT:    vadd.f32 q8, q8, q8
1691; HARD-NEXT:    vrev64.32 q0, q8
1692; HARD-NEXT:    bx lr
1693    %1 = fadd <2 x double> %p, %p
1694    %2 = bitcast <2 x double> %1 to <4 x float>
1695    %3 = fadd <4 x float> %2, %2
1696    ret <4 x float> %3
1697}
1698
1699define <4 x float> @test_v4f32_v2i64(<2 x i64> %p) {
1700; SOFT-LABEL: test_v4f32_v2i64:
1701; SOFT:       @ %bb.0:
1702; SOFT-NEXT:    vmov d17, r3, r2
1703; SOFT-NEXT:    vmov d16, r1, r0
1704; SOFT-NEXT:    vadd.i64 q8, q8, q8
1705; SOFT-NEXT:    vrev64.32 q8, q8
1706; SOFT-NEXT:    vadd.f32 q8, q8, q8
1707; SOFT-NEXT:    vrev64.32 q8, q8
1708; SOFT-NEXT:    vmov r1, r0, d16
1709; SOFT-NEXT:    vmov r3, r2, d17
1710; SOFT-NEXT:    bx lr
1711;
1712; HARD-LABEL: test_v4f32_v2i64:
1713; HARD:       @ %bb.0:
1714; HARD-NEXT:    vadd.i64 q8, q0, q0
1715; HARD-NEXT:    vrev64.32 q8, q8
1716; HARD-NEXT:    vadd.f32 q8, q8, q8
1717; HARD-NEXT:    vrev64.32 q0, q8
1718; HARD-NEXT:    bx lr
1719    %1 = add <2 x i64> %p, %p
1720    %2 = bitcast <2 x i64> %1 to <4 x float>
1721    %3 = fadd <4 x float> %2, %2
1722    ret <4 x float> %3
1723}
1724
1725define <4 x float> @test_v4f32_v4i32(<4 x i32> %p) {
1726; SOFT-LABEL: test_v4f32_v4i32:
1727; SOFT:       @ %bb.0:
1728; SOFT-NEXT:    vmov d17, r3, r2
1729; SOFT-NEXT:    vmov d16, r1, r0
1730; SOFT-NEXT:    vrev64.32 q8, q8
1731; SOFT-NEXT:    vadd.i32 q8, q8, q8
1732; SOFT-NEXT:    vadd.f32 q8, q8, q8
1733; SOFT-NEXT:    vrev64.32 q8, q8
1734; SOFT-NEXT:    vmov r1, r0, d16
1735; SOFT-NEXT:    vmov r3, r2, d17
1736; SOFT-NEXT:    bx lr
1737;
1738; HARD-LABEL: test_v4f32_v4i32:
1739; HARD:       @ %bb.0:
1740; HARD-NEXT:    vrev64.32 q8, q0
1741; HARD-NEXT:    vadd.i32 q8, q8, q8
1742; HARD-NEXT:    vadd.f32 q8, q8, q8
1743; HARD-NEXT:    vrev64.32 q0, q8
1744; HARD-NEXT:    bx lr
1745    %1 = add <4 x i32> %p, %p
1746    %2 = bitcast <4 x i32> %1 to <4 x float>
1747    %3 = fadd <4 x float> %2, %2
1748    ret <4 x float> %3
1749}
1750
1751define <4 x float> @test_v4f32_v8i16(<8 x i16> %p) {
1752; SOFT-LABEL: test_v4f32_v8i16:
1753; SOFT:       @ %bb.0:
1754; SOFT-NEXT:    vmov d17, r3, r2
1755; SOFT-NEXT:    vmov d16, r1, r0
1756; SOFT-NEXT:    vrev64.16 q8, q8
1757; SOFT-NEXT:    vadd.i16 q8, q8, q8
1758; SOFT-NEXT:    vrev32.16 q8, q8
1759; SOFT-NEXT:    vadd.f32 q8, q8, q8
1760; SOFT-NEXT:    vrev64.32 q8, q8
1761; SOFT-NEXT:    vmov r1, r0, d16
1762; SOFT-NEXT:    vmov r3, r2, d17
1763; SOFT-NEXT:    bx lr
1764;
1765; HARD-LABEL: test_v4f32_v8i16:
1766; HARD:       @ %bb.0:
1767; HARD-NEXT:    vrev64.16 q8, q0
1768; HARD-NEXT:    vadd.i16 q8, q8, q8
1769; HARD-NEXT:    vrev32.16 q8, q8
1770; HARD-NEXT:    vadd.f32 q8, q8, q8
1771; HARD-NEXT:    vrev64.32 q0, q8
1772; HARD-NEXT:    bx lr
1773    %1 = add <8 x i16> %p, %p
1774    %2 = bitcast <8 x i16> %1 to <4 x float>
1775    %3 = fadd <4 x float> %2, %2
1776    ret <4 x float> %3
1777}
1778
1779define <4 x float> @test_v4f32_v16i8(<16 x i8> %p) {
1780; SOFT-LABEL: test_v4f32_v16i8:
1781; SOFT:       @ %bb.0:
1782; SOFT-NEXT:    vmov d17, r3, r2
1783; SOFT-NEXT:    vmov d16, r1, r0
1784; SOFT-NEXT:    vrev64.8 q8, q8
1785; SOFT-NEXT:    vadd.i8 q8, q8, q8
1786; SOFT-NEXT:    vrev32.8 q8, q8
1787; SOFT-NEXT:    vadd.f32 q8, q8, q8
1788; SOFT-NEXT:    vrev64.32 q8, q8
1789; SOFT-NEXT:    vmov r1, r0, d16
1790; SOFT-NEXT:    vmov r3, r2, d17
1791; SOFT-NEXT:    bx lr
1792;
1793; HARD-LABEL: test_v4f32_v16i8:
1794; HARD:       @ %bb.0:
1795; HARD-NEXT:    vrev64.8 q8, q0
1796; HARD-NEXT:    vadd.i8 q8, q8, q8
1797; HARD-NEXT:    vrev32.8 q8, q8
1798; HARD-NEXT:    vadd.f32 q8, q8, q8
1799; HARD-NEXT:    vrev64.32 q0, q8
1800; HARD-NEXT:    bx lr
1801    %1 = add <16 x i8> %p, %p
1802    %2 = bitcast <16 x i8> %1 to <4 x float>
1803    %3 = fadd <4 x float> %2, %2
1804    ret <4 x float> %3
1805}
1806
1807define <4 x i32> @test_v4i32_f128(fp128 %p) {
1808; SOFT-LABEL: test_v4i32_f128:
1809; SOFT:       @ %bb.0:
1810; SOFT-NEXT:    .save {r11, lr}
1811; SOFT-NEXT:    push {r11, lr}
1812; SOFT-NEXT:    .pad #16
1813; SOFT-NEXT:    sub sp, sp, #16
1814; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1815; SOFT-NEXT:    bl __addtf3
1816; SOFT-NEXT:    vmov.32 d17[0], r2
1817; SOFT-NEXT:    vmov.32 d16[0], r0
1818; SOFT-NEXT:    vmov.32 d17[1], r3
1819; SOFT-NEXT:    vmov.32 d16[1], r1
1820; SOFT-NEXT:    vadd.i32 q8, q8, q8
1821; SOFT-NEXT:    vrev64.32 q8, q8
1822; SOFT-NEXT:    vmov r1, r0, d16
1823; SOFT-NEXT:    vmov r3, r2, d17
1824; SOFT-NEXT:    add sp, sp, #16
1825; SOFT-NEXT:    pop {r11, pc}
1826;
1827; HARD-LABEL: test_v4i32_f128:
1828; HARD:       @ %bb.0:
1829; HARD-NEXT:    .save {r11, lr}
1830; HARD-NEXT:    push {r11, lr}
1831; HARD-NEXT:    .pad #16
1832; HARD-NEXT:    sub sp, sp, #16
1833; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1834; HARD-NEXT:    bl __addtf3
1835; HARD-NEXT:    vmov.32 d17[0], r2
1836; HARD-NEXT:    vmov.32 d16[0], r0
1837; HARD-NEXT:    vmov.32 d17[1], r3
1838; HARD-NEXT:    vmov.32 d16[1], r1
1839; HARD-NEXT:    vadd.i32 q8, q8, q8
1840; HARD-NEXT:    vrev64.32 q0, q8
1841; HARD-NEXT:    add sp, sp, #16
1842; HARD-NEXT:    pop {r11, pc}
1843    %1 = fadd fp128 %p, %p
1844    %2 = bitcast fp128 %1 to <4 x i32>
1845    %3 = add <4 x i32> %2, %2
1846    ret <4 x i32> %3
1847}
1848
1849define <4 x i32> @test_v4i32_v2f64(<2 x double> %p) {
1850; SOFT-LABEL: test_v4i32_v2f64:
1851; SOFT:       @ %bb.0:
1852; SOFT-NEXT:    vmov d16, r3, r2
1853; SOFT-NEXT:    vmov d17, r1, r0
1854; SOFT-NEXT:    vadd.f64 d19, d16, d16
1855; SOFT-NEXT:    vadd.f64 d18, d17, d17
1856; SOFT-NEXT:    vrev64.32 q8, q9
1857; SOFT-NEXT:    vadd.i32 q8, q8, q8
1858; SOFT-NEXT:    vrev64.32 q8, q8
1859; SOFT-NEXT:    vmov r1, r0, d16
1860; SOFT-NEXT:    vmov r3, r2, d17
1861; SOFT-NEXT:    bx lr
1862;
1863; HARD-LABEL: test_v4i32_v2f64:
1864; HARD:       @ %bb.0:
1865; HARD-NEXT:    vadd.f64 d17, d1, d1
1866; HARD-NEXT:    vadd.f64 d16, d0, d0
1867; HARD-NEXT:    vrev64.32 q8, q8
1868; HARD-NEXT:    vadd.i32 q8, q8, q8
1869; HARD-NEXT:    vrev64.32 q0, q8
1870; HARD-NEXT:    bx lr
1871    %1 = fadd <2 x double> %p, %p
1872    %2 = bitcast <2 x double> %1 to <4 x i32>
1873    %3 = add <4 x i32> %2, %2
1874    ret <4 x i32> %3
1875}
1876
1877define <4 x i32> @test_v4i32_v2i64(<2 x i64> %p) {
1878; SOFT-LABEL: test_v4i32_v2i64:
1879; SOFT:       @ %bb.0:
1880; SOFT-NEXT:    vmov d17, r3, r2
1881; SOFT-NEXT:    vmov d16, r1, r0
1882; SOFT-NEXT:    vadd.i64 q8, q8, q8
1883; SOFT-NEXT:    vrev64.32 q8, q8
1884; SOFT-NEXT:    vadd.i32 q8, q8, q8
1885; SOFT-NEXT:    vrev64.32 q8, q8
1886; SOFT-NEXT:    vmov r1, r0, d16
1887; SOFT-NEXT:    vmov r3, r2, d17
1888; SOFT-NEXT:    bx lr
1889;
1890; HARD-LABEL: test_v4i32_v2i64:
1891; HARD:       @ %bb.0:
1892; HARD-NEXT:    vadd.i64 q8, q0, q0
1893; HARD-NEXT:    vrev64.32 q8, q8
1894; HARD-NEXT:    vadd.i32 q8, q8, q8
1895; HARD-NEXT:    vrev64.32 q0, q8
1896; HARD-NEXT:    bx lr
1897    %1 = add <2 x i64> %p, %p
1898    %2 = bitcast <2 x i64> %1 to <4 x i32>
1899    %3 = add <4 x i32> %2, %2
1900    ret <4 x i32> %3
1901}
1902
1903define <4 x i32> @test_v4i32_v4f32(<4 x float> %p) {
1904; SOFT-LABEL: test_v4i32_v4f32:
1905; SOFT:       @ %bb.0:
1906; SOFT-NEXT:    vmov d17, r3, r2
1907; SOFT-NEXT:    vmov d16, r1, r0
1908; SOFT-NEXT:    vrev64.32 q8, q8
1909; SOFT-NEXT:    vadd.f32 q8, q8, q8
1910; SOFT-NEXT:    vadd.i32 q8, q8, q8
1911; SOFT-NEXT:    vrev64.32 q8, q8
1912; SOFT-NEXT:    vmov r1, r0, d16
1913; SOFT-NEXT:    vmov r3, r2, d17
1914; SOFT-NEXT:    bx lr
1915;
1916; HARD-LABEL: test_v4i32_v4f32:
1917; HARD:       @ %bb.0:
1918; HARD-NEXT:    vrev64.32 q8, q0
1919; HARD-NEXT:    vadd.f32 q8, q8, q8
1920; HARD-NEXT:    vadd.i32 q8, q8, q8
1921; HARD-NEXT:    vrev64.32 q0, q8
1922; HARD-NEXT:    bx lr
1923    %1 = fadd <4 x float> %p, %p
1924    %2 = bitcast <4 x float> %1 to <4 x i32>
1925    %3 = add <4 x i32> %2, %2
1926    ret <4 x i32> %3
1927}
1928
1929define <4 x i32> @test_v4i32_v8i16(<8 x i16> %p) {
1930; SOFT-LABEL: test_v4i32_v8i16:
1931; SOFT:       @ %bb.0:
1932; SOFT-NEXT:    vmov d17, r3, r2
1933; SOFT-NEXT:    vmov d16, r1, r0
1934; SOFT-NEXT:    vrev64.16 q8, q8
1935; SOFT-NEXT:    vadd.i16 q8, q8, q8
1936; SOFT-NEXT:    vrev32.16 q8, q8
1937; SOFT-NEXT:    vadd.i32 q8, q8, q8
1938; SOFT-NEXT:    vrev64.32 q8, q8
1939; SOFT-NEXT:    vmov r1, r0, d16
1940; SOFT-NEXT:    vmov r3, r2, d17
1941; SOFT-NEXT:    bx lr
1942;
1943; HARD-LABEL: test_v4i32_v8i16:
1944; HARD:       @ %bb.0:
1945; HARD-NEXT:    vrev64.16 q8, q0
1946; HARD-NEXT:    vadd.i16 q8, q8, q8
1947; HARD-NEXT:    vrev32.16 q8, q8
1948; HARD-NEXT:    vadd.i32 q8, q8, q8
1949; HARD-NEXT:    vrev64.32 q0, q8
1950; HARD-NEXT:    bx lr
1951    %1 = add <8 x i16> %p, %p
1952    %2 = bitcast <8 x i16> %1 to <4 x i32>
1953    %3 = add <4 x i32> %2, %2
1954    ret <4 x i32> %3
1955}
1956
1957define <4 x i32> @test_v4i32_v16i8(<16 x i8> %p) {
1958; SOFT-LABEL: test_v4i32_v16i8:
1959; SOFT:       @ %bb.0:
1960; SOFT-NEXT:    vmov d17, r3, r2
1961; SOFT-NEXT:    vmov d16, r1, r0
1962; SOFT-NEXT:    vrev64.8 q8, q8
1963; SOFT-NEXT:    vadd.i8 q8, q8, q8
1964; SOFT-NEXT:    vrev32.8 q8, q8
1965; SOFT-NEXT:    vadd.i32 q8, q8, q8
1966; SOFT-NEXT:    vrev64.32 q8, q8
1967; SOFT-NEXT:    vmov r1, r0, d16
1968; SOFT-NEXT:    vmov r3, r2, d17
1969; SOFT-NEXT:    bx lr
1970;
1971; HARD-LABEL: test_v4i32_v16i8:
1972; HARD:       @ %bb.0:
1973; HARD-NEXT:    vrev64.8 q8, q0
1974; HARD-NEXT:    vadd.i8 q8, q8, q8
1975; HARD-NEXT:    vrev32.8 q8, q8
1976; HARD-NEXT:    vadd.i32 q8, q8, q8
1977; HARD-NEXT:    vrev64.32 q0, q8
1978; HARD-NEXT:    bx lr
1979    %1 = add <16 x i8> %p, %p
1980    %2 = bitcast <16 x i8> %1 to <4 x i32>
1981    %3 = add <4 x i32> %2, %2
1982    ret <4 x i32> %3
1983}
1984
1985define <8 x i16> @test_v8i16_f128(fp128 %p) {
1986; SOFT-LABEL: test_v8i16_f128:
1987; SOFT:       @ %bb.0:
1988; SOFT-NEXT:    .save {r11, lr}
1989; SOFT-NEXT:    push {r11, lr}
1990; SOFT-NEXT:    .pad #16
1991; SOFT-NEXT:    sub sp, sp, #16
1992; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1993; SOFT-NEXT:    bl __addtf3
1994; SOFT-NEXT:    vmov.32 d17[0], r2
1995; SOFT-NEXT:    vmov.32 d16[0], r0
1996; SOFT-NEXT:    vmov.32 d17[1], r3
1997; SOFT-NEXT:    vmov.32 d16[1], r1
1998; SOFT-NEXT:    vrev32.16 q8, q8
1999; SOFT-NEXT:    vadd.i16 q8, q8, q8
2000; SOFT-NEXT:    vrev64.16 q8, q8
2001; SOFT-NEXT:    vmov r1, r0, d16
2002; SOFT-NEXT:    vmov r3, r2, d17
2003; SOFT-NEXT:    add sp, sp, #16
2004; SOFT-NEXT:    pop {r11, pc}
2005;
2006; HARD-LABEL: test_v8i16_f128:
2007; HARD:       @ %bb.0:
2008; HARD-NEXT:    .save {r11, lr}
2009; HARD-NEXT:    push {r11, lr}
2010; HARD-NEXT:    .pad #16
2011; HARD-NEXT:    sub sp, sp, #16
2012; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
2013; HARD-NEXT:    bl __addtf3
2014; HARD-NEXT:    vmov.32 d17[0], r2
2015; HARD-NEXT:    vmov.32 d16[0], r0
2016; HARD-NEXT:    vmov.32 d17[1], r3
2017; HARD-NEXT:    vmov.32 d16[1], r1
2018; HARD-NEXT:    vrev32.16 q8, q8
2019; HARD-NEXT:    vadd.i16 q8, q8, q8
2020; HARD-NEXT:    vrev64.16 q0, q8
2021; HARD-NEXT:    add sp, sp, #16
2022; HARD-NEXT:    pop {r11, pc}
2023    %1 = fadd fp128 %p, %p
2024    %2 = bitcast fp128 %1 to <8 x i16>
2025    %3 = add <8 x i16> %2, %2
2026    ret <8 x i16> %3
2027}
2028
2029define <8 x i16> @test_v8i16_v2f64(<2 x double> %p) {
2030; SOFT-LABEL: test_v8i16_v2f64:
2031; SOFT:       @ %bb.0:
2032; SOFT-NEXT:    vmov d16, r3, r2
2033; SOFT-NEXT:    vmov d17, r1, r0
2034; SOFT-NEXT:    vadd.f64 d19, d16, d16
2035; SOFT-NEXT:    vadd.f64 d18, d17, d17
2036; SOFT-NEXT:    vrev64.16 q8, q9
2037; SOFT-NEXT:    vadd.i16 q8, q8, q8
2038; SOFT-NEXT:    vrev64.16 q8, q8
2039; SOFT-NEXT:    vmov r1, r0, d16
2040; SOFT-NEXT:    vmov r3, r2, d17
2041; SOFT-NEXT:    bx lr
2042;
2043; HARD-LABEL: test_v8i16_v2f64:
2044; HARD:       @ %bb.0:
2045; HARD-NEXT:    vadd.f64 d17, d1, d1
2046; HARD-NEXT:    vadd.f64 d16, d0, d0
2047; HARD-NEXT:    vrev64.16 q8, q8
2048; HARD-NEXT:    vadd.i16 q8, q8, q8
2049; HARD-NEXT:    vrev64.16 q0, q8
2050; HARD-NEXT:    bx lr
2051    %1 = fadd <2 x double> %p, %p
2052    %2 = bitcast <2 x double> %1 to <8 x i16>
2053    %3 = add <8 x i16> %2, %2
2054    ret <8 x i16> %3
2055}
2056
2057define <8 x i16> @test_v8i16_v2i64(<2 x i64> %p) {
2058; SOFT-LABEL: test_v8i16_v2i64:
2059; SOFT:       @ %bb.0:
2060; SOFT-NEXT:    vmov d17, r3, r2
2061; SOFT-NEXT:    vmov d16, r1, r0
2062; SOFT-NEXT:    vadd.i64 q8, q8, q8
2063; SOFT-NEXT:    vrev64.16 q8, q8
2064; SOFT-NEXT:    vadd.i16 q8, q8, q8
2065; SOFT-NEXT:    vrev64.16 q8, q8
2066; SOFT-NEXT:    vmov r1, r0, d16
2067; SOFT-NEXT:    vmov r3, r2, d17
2068; SOFT-NEXT:    bx lr
2069;
2070; HARD-LABEL: test_v8i16_v2i64:
2071; HARD:       @ %bb.0:
2072; HARD-NEXT:    vadd.i64 q8, q0, q0
2073; HARD-NEXT:    vrev64.16 q8, q8
2074; HARD-NEXT:    vadd.i16 q8, q8, q8
2075; HARD-NEXT:    vrev64.16 q0, q8
2076; HARD-NEXT:    bx lr
2077    %1 = add <2 x i64> %p, %p
2078    %2 = bitcast <2 x i64> %1 to <8 x i16>
2079    %3 = add <8 x i16> %2, %2
2080    ret <8 x i16> %3
2081}
2082
2083define <8 x i16> @test_v8i16_v4f32(<4 x float> %p) {
2084; SOFT-LABEL: test_v8i16_v4f32:
2085; SOFT:       @ %bb.0:
2086; SOFT-NEXT:    vmov d17, r3, r2
2087; SOFT-NEXT:    vmov d16, r1, r0
2088; SOFT-NEXT:    vrev64.32 q8, q8
2089; SOFT-NEXT:    vadd.f32 q8, q8, q8
2090; SOFT-NEXT:    vrev32.16 q8, q8
2091; SOFT-NEXT:    vadd.i16 q8, q8, q8
2092; SOFT-NEXT:    vrev64.16 q8, q8
2093; SOFT-NEXT:    vmov r1, r0, d16
2094; SOFT-NEXT:    vmov r3, r2, d17
2095; SOFT-NEXT:    bx lr
2096;
2097; HARD-LABEL: test_v8i16_v4f32:
2098; HARD:       @ %bb.0:
2099; HARD-NEXT:    vrev64.32 q8, q0
2100; HARD-NEXT:    vadd.f32 q8, q8, q8
2101; HARD-NEXT:    vrev32.16 q8, q8
2102; HARD-NEXT:    vadd.i16 q8, q8, q8
2103; HARD-NEXT:    vrev64.16 q0, q8
2104; HARD-NEXT:    bx lr
2105    %1 = fadd <4 x float> %p, %p
2106    %2 = bitcast <4 x float> %1 to <8 x i16>
2107    %3 = add <8 x i16> %2, %2
2108    ret <8 x i16> %3
2109}
2110
2111define <8 x i16> @test_v8i16_v4i32(<4 x i32> %p) {
2112; SOFT-LABEL: test_v8i16_v4i32:
2113; SOFT:       @ %bb.0:
2114; SOFT-NEXT:    vmov d17, r3, r2
2115; SOFT-NEXT:    vmov d16, r1, r0
2116; SOFT-NEXT:    vrev64.32 q8, q8
2117; SOFT-NEXT:    vadd.i32 q8, q8, q8
2118; SOFT-NEXT:    vrev32.16 q8, q8
2119; SOFT-NEXT:    vadd.i16 q8, q8, q8
2120; SOFT-NEXT:    vrev64.16 q8, q8
2121; SOFT-NEXT:    vmov r1, r0, d16
2122; SOFT-NEXT:    vmov r3, r2, d17
2123; SOFT-NEXT:    bx lr
2124;
2125; HARD-LABEL: test_v8i16_v4i32:
2126; HARD:       @ %bb.0:
2127; HARD-NEXT:    vrev64.32 q8, q0
2128; HARD-NEXT:    vadd.i32 q8, q8, q8
2129; HARD-NEXT:    vrev32.16 q8, q8
2130; HARD-NEXT:    vadd.i16 q8, q8, q8
2131; HARD-NEXT:    vrev64.16 q0, q8
2132; HARD-NEXT:    bx lr
2133    %1 = add <4 x i32> %p, %p
2134    %2 = bitcast <4 x i32> %1 to <8 x i16>
2135    %3 = add <8 x i16> %2, %2
2136    ret <8 x i16> %3
2137}
2138
2139define <8 x i16> @test_v8i16_v16i8(<16 x i8> %p) {
2140; SOFT-LABEL: test_v8i16_v16i8:
2141; SOFT:       @ %bb.0:
2142; SOFT-NEXT:    vmov d17, r3, r2
2143; SOFT-NEXT:    vmov d16, r1, r0
2144; SOFT-NEXT:    vrev64.8 q8, q8
2145; SOFT-NEXT:    vadd.i8 q8, q8, q8
2146; SOFT-NEXT:    vrev16.8 q8, q8
2147; SOFT-NEXT:    vadd.i16 q8, q8, q8
2148; SOFT-NEXT:    vrev64.16 q8, q8
2149; SOFT-NEXT:    vmov r1, r0, d16
2150; SOFT-NEXT:    vmov r3, r2, d17
2151; SOFT-NEXT:    bx lr
2152;
2153; HARD-LABEL: test_v8i16_v16i8:
2154; HARD:       @ %bb.0:
2155; HARD-NEXT:    vrev64.8 q8, q0
2156; HARD-NEXT:    vadd.i8 q8, q8, q8
2157; HARD-NEXT:    vrev16.8 q8, q8
2158; HARD-NEXT:    vadd.i16 q8, q8, q8
2159; HARD-NEXT:    vrev64.16 q0, q8
2160; HARD-NEXT:    bx lr
2161    %1 = add <16 x i8> %p, %p
2162    %2 = bitcast <16 x i8> %1 to <8 x i16>
2163    %3 = add <8 x i16> %2, %2
2164    ret <8 x i16> %3
2165}
2166
2167define <16 x i8> @test_v16i8_f128(fp128 %p) {
2168; SOFT-LABEL: test_v16i8_f128:
2169; SOFT:       @ %bb.0:
2170; SOFT-NEXT:    .save {r11, lr}
2171; SOFT-NEXT:    push {r11, lr}
2172; SOFT-NEXT:    .pad #16
2173; SOFT-NEXT:    sub sp, sp, #16
2174; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
2175; SOFT-NEXT:    bl __addtf3
2176; SOFT-NEXT:    vmov.32 d17[0], r2
2177; SOFT-NEXT:    vmov.32 d16[0], r0
2178; SOFT-NEXT:    vmov.32 d17[1], r3
2179; SOFT-NEXT:    vmov.32 d16[1], r1
2180; SOFT-NEXT:    vrev32.8 q8, q8
2181; SOFT-NEXT:    vadd.i8 q8, q8, q8
2182; SOFT-NEXT:    vrev64.8 q8, q8
2183; SOFT-NEXT:    vmov r1, r0, d16
2184; SOFT-NEXT:    vmov r3, r2, d17
2185; SOFT-NEXT:    add sp, sp, #16
2186; SOFT-NEXT:    pop {r11, pc}
2187;
2188; HARD-LABEL: test_v16i8_f128:
2189; HARD:       @ %bb.0:
2190; HARD-NEXT:    .save {r11, lr}
2191; HARD-NEXT:    push {r11, lr}
2192; HARD-NEXT:    .pad #16
2193; HARD-NEXT:    sub sp, sp, #16
2194; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
2195; HARD-NEXT:    bl __addtf3
2196; HARD-NEXT:    vmov.32 d17[0], r2
2197; HARD-NEXT:    vmov.32 d16[0], r0
2198; HARD-NEXT:    vmov.32 d17[1], r3
2199; HARD-NEXT:    vmov.32 d16[1], r1
2200; HARD-NEXT:    vrev32.8 q8, q8
2201; HARD-NEXT:    vadd.i8 q8, q8, q8
2202; HARD-NEXT:    vrev64.8 q0, q8
2203; HARD-NEXT:    add sp, sp, #16
2204; HARD-NEXT:    pop {r11, pc}
2205    %1 = fadd fp128 %p, %p
2206    %2 = bitcast fp128 %1 to <16 x i8>
2207    %3 = add <16 x i8> %2, %2
2208    ret <16 x i8> %3
2209}
2210
2211define <16 x i8> @test_v16i8_v2f64(<2 x double> %p) {
2212; SOFT-LABEL: test_v16i8_v2f64:
2213; SOFT:       @ %bb.0:
2214; SOFT-NEXT:    vmov d16, r3, r2
2215; SOFT-NEXT:    vmov d17, r1, r0
2216; SOFT-NEXT:    vadd.f64 d19, d16, d16
2217; SOFT-NEXT:    vadd.f64 d18, d17, d17
2218; SOFT-NEXT:    vrev64.8 q8, q9
2219; SOFT-NEXT:    vadd.i8 q8, q8, q8
2220; SOFT-NEXT:    vrev64.8 q8, q8
2221; SOFT-NEXT:    vmov r1, r0, d16
2222; SOFT-NEXT:    vmov r3, r2, d17
2223; SOFT-NEXT:    bx lr
2224;
2225; HARD-LABEL: test_v16i8_v2f64:
2226; HARD:       @ %bb.0:
2227; HARD-NEXT:    vadd.f64 d17, d1, d1
2228; HARD-NEXT:    vadd.f64 d16, d0, d0
2229; HARD-NEXT:    vrev64.8 q8, q8
2230; HARD-NEXT:    vadd.i8 q8, q8, q8
2231; HARD-NEXT:    vrev64.8 q0, q8
2232; HARD-NEXT:    bx lr
2233    %1 = fadd <2 x double> %p, %p
2234    %2 = bitcast <2 x double> %1 to <16 x i8>
2235    %3 = add <16 x i8> %2, %2
2236    ret <16 x i8> %3
2237}
2238
2239define <16 x i8> @test_v16i8_v2i64(<2 x i64> %p) {
2240; SOFT-LABEL: test_v16i8_v2i64:
2241; SOFT:       @ %bb.0:
2242; SOFT-NEXT:    vmov d17, r3, r2
2243; SOFT-NEXT:    vmov d16, r1, r0
2244; SOFT-NEXT:    vadd.i64 q8, q8, q8
2245; SOFT-NEXT:    vrev64.8 q8, q8
2246; SOFT-NEXT:    vadd.i8 q8, q8, q8
2247; SOFT-NEXT:    vrev64.8 q8, q8
2248; SOFT-NEXT:    vmov r1, r0, d16
2249; SOFT-NEXT:    vmov r3, r2, d17
2250; SOFT-NEXT:    bx lr
2251;
2252; HARD-LABEL: test_v16i8_v2i64:
2253; HARD:       @ %bb.0:
2254; HARD-NEXT:    vadd.i64 q8, q0, q0
2255; HARD-NEXT:    vrev64.8 q8, q8
2256; HARD-NEXT:    vadd.i8 q8, q8, q8
2257; HARD-NEXT:    vrev64.8 q0, q8
2258; HARD-NEXT:    bx lr
2259    %1 = add <2 x i64> %p, %p
2260    %2 = bitcast <2 x i64> %1 to <16 x i8>
2261    %3 = add <16 x i8> %2, %2
2262    ret <16 x i8> %3
2263}
2264
2265define <16 x i8> @test_v16i8_v4f32(<4 x float> %p) {
2266; SOFT-LABEL: test_v16i8_v4f32:
2267; SOFT:       @ %bb.0:
2268; SOFT-NEXT:    vmov d17, r3, r2
2269; SOFT-NEXT:    vmov d16, r1, r0
2270; SOFT-NEXT:    vrev64.32 q8, q8
2271; SOFT-NEXT:    vadd.f32 q8, q8, q8
2272; SOFT-NEXT:    vrev32.8 q8, q8
2273; SOFT-NEXT:    vadd.i8 q8, q8, q8
2274; SOFT-NEXT:    vrev64.8 q8, q8
2275; SOFT-NEXT:    vmov r1, r0, d16
2276; SOFT-NEXT:    vmov r3, r2, d17
2277; SOFT-NEXT:    bx lr
2278;
2279; HARD-LABEL: test_v16i8_v4f32:
2280; HARD:       @ %bb.0:
2281; HARD-NEXT:    vrev64.32 q8, q0
2282; HARD-NEXT:    vadd.f32 q8, q8, q8
2283; HARD-NEXT:    vrev32.8 q8, q8
2284; HARD-NEXT:    vadd.i8 q8, q8, q8
2285; HARD-NEXT:    vrev64.8 q0, q8
2286; HARD-NEXT:    bx lr
2287    %1 = fadd <4 x float> %p, %p
2288    %2 = bitcast <4 x float> %1 to <16 x i8>
2289    %3 = add <16 x i8> %2, %2
2290    ret <16 x i8> %3
2291}
2292
2293define <16 x i8> @test_v16i8_v4i32(<4 x i32> %p) {
2294; SOFT-LABEL: test_v16i8_v4i32:
2295; SOFT:       @ %bb.0:
2296; SOFT-NEXT:    vmov d17, r3, r2
2297; SOFT-NEXT:    vmov d16, r1, r0
2298; SOFT-NEXT:    vrev64.32 q8, q8
2299; SOFT-NEXT:    vadd.i32 q8, q8, q8
2300; SOFT-NEXT:    vrev32.8 q8, q8
2301; SOFT-NEXT:    vadd.i8 q8, q8, q8
2302; SOFT-NEXT:    vrev64.8 q8, q8
2303; SOFT-NEXT:    vmov r1, r0, d16
2304; SOFT-NEXT:    vmov r3, r2, d17
2305; SOFT-NEXT:    bx lr
2306;
2307; HARD-LABEL: test_v16i8_v4i32:
2308; HARD:       @ %bb.0:
2309; HARD-NEXT:    vrev64.32 q8, q0
2310; HARD-NEXT:    vadd.i32 q8, q8, q8
2311; HARD-NEXT:    vrev32.8 q8, q8
2312; HARD-NEXT:    vadd.i8 q8, q8, q8
2313; HARD-NEXT:    vrev64.8 q0, q8
2314; HARD-NEXT:    bx lr
2315    %1 = add <4 x i32> %p, %p
2316    %2 = bitcast <4 x i32> %1 to <16 x i8>
2317    %3 = add <16 x i8> %2, %2
2318    ret <16 x i8> %3
2319}
2320
2321define <16 x i8> @test_v16i8_v8i16(<8 x i16> %p) {
2322; SOFT-LABEL: test_v16i8_v8i16:
2323; SOFT:       @ %bb.0:
2324; SOFT-NEXT:    vmov d17, r3, r2
2325; SOFT-NEXT:    vmov d16, r1, r0
2326; SOFT-NEXT:    vrev64.16 q8, q8
2327; SOFT-NEXT:    vadd.i16 q8, q8, q8
2328; SOFT-NEXT:    vrev16.8 q8, q8
2329; SOFT-NEXT:    vadd.i8 q8, q8, q8
2330; SOFT-NEXT:    vrev64.8 q8, q8
2331; SOFT-NEXT:    vmov r1, r0, d16
2332; SOFT-NEXT:    vmov r3, r2, d17
2333; SOFT-NEXT:    bx lr
2334;
2335; HARD-LABEL: test_v16i8_v8i16:
2336; HARD:       @ %bb.0:
2337; HARD-NEXT:    vrev64.16 q8, q0
2338; HARD-NEXT:    vadd.i16 q8, q8, q8
2339; HARD-NEXT:    vrev16.8 q8, q8
2340; HARD-NEXT:    vadd.i8 q8, q8, q8
2341; HARD-NEXT:    vrev64.8 q0, q8
2342; HARD-NEXT:    bx lr
2343    %1 = add <8 x i16> %p, %p
2344    %2 = bitcast <8 x i16> %1 to <16 x i8>
2345    %3 = add <16 x i8> %2, %2
2346    ret <16 x i8> %3
2347}
2348