xref: /llvm-project/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix SOFT
3; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix HARD
4
5declare i64 @test_i64_f64_helper(double %p)
6define void @test_i64_f64(ptr %p, ptr %q) {
7; SOFT-LABEL: test_i64_f64:
8; SOFT:       @ %bb.0:
9; SOFT-NEXT:    .save {r4, lr}
10; SOFT-NEXT:    push {r4, lr}
11; SOFT-NEXT:    vldr d16, [r0]
12; SOFT-NEXT:    mov r4, r1
13; SOFT-NEXT:    vadd.f64 d16, d16, d16
14; SOFT-NEXT:    vmov r1, r0, d16
15; SOFT-NEXT:    bl test_i64_f64_helper
16; SOFT-NEXT:    adds r1, r1, r1
17; SOFT-NEXT:    adc r0, r0, r0
18; SOFT-NEXT:    strd r0, r1, [r4]
19; SOFT-NEXT:    pop {r4, pc}
20;
21; HARD-LABEL: test_i64_f64:
22; HARD:       @ %bb.0:
23; HARD-NEXT:    .save {r4, lr}
24; HARD-NEXT:    push {r4, lr}
25; HARD-NEXT:    vldr d16, [r0]
26; HARD-NEXT:    mov r4, r1
27; HARD-NEXT:    vadd.f64 d0, d16, d16
28; HARD-NEXT:    bl test_i64_f64_helper
29; HARD-NEXT:    adds r1, r1, r1
30; HARD-NEXT:    adc r0, r0, r0
31; HARD-NEXT:    strd r0, r1, [r4]
32; HARD-NEXT:    pop {r4, pc}
33    %1 = load double, ptr %p
34    %2 = fadd double %1, %1
35    %3 = call i64 @test_i64_f64_helper(double %2)
36    %4 = add i64 %3, %3
37    store i64 %4, ptr %q
38    ret void
39}
40
41declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
42define void @test_i64_v1i64(ptr %p, ptr %q) {
43; SOFT-LABEL: test_i64_v1i64:
44; SOFT:       @ %bb.0:
45; SOFT-NEXT:    .save {r4, lr}
46; SOFT-NEXT:    push {r4, lr}
47; SOFT-NEXT:    vldr d16, [r0]
48; SOFT-NEXT:    mov r4, r1
49; SOFT-NEXT:    vadd.i64 d16, d16, d16
50; SOFT-NEXT:    vmov r1, r0, d16
51; SOFT-NEXT:    bl test_i64_v1i64_helper
52; SOFT-NEXT:    adds r1, r1, r1
53; SOFT-NEXT:    adc r0, r0, r0
54; SOFT-NEXT:    strd r0, r1, [r4]
55; SOFT-NEXT:    pop {r4, pc}
56;
57; HARD-LABEL: test_i64_v1i64:
58; HARD:       @ %bb.0:
59; HARD-NEXT:    .save {r4, lr}
60; HARD-NEXT:    push {r4, lr}
61; HARD-NEXT:    vldr d16, [r0]
62; HARD-NEXT:    mov r4, r1
63; HARD-NEXT:    vadd.i64 d0, d16, d16
64; HARD-NEXT:    bl test_i64_v1i64_helper
65; HARD-NEXT:    adds r1, r1, r1
66; HARD-NEXT:    adc r0, r0, r0
67; HARD-NEXT:    strd r0, r1, [r4]
68; HARD-NEXT:    pop {r4, pc}
69    %1 = load <1 x i64>, ptr %p
70    %2 = add <1 x i64> %1, %1
71    %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
72    %4 = add i64 %3, %3
73    store i64 %4, ptr %q
74    ret void
75}
76
77declare i64 @test_i64_v2f32_helper(<2 x float> %p)
78define void @test_i64_v2f32(ptr %p, ptr %q) {
79; SOFT-LABEL: test_i64_v2f32:
80; SOFT:       @ %bb.0:
81; SOFT-NEXT:    .save {r4, lr}
82; SOFT-NEXT:    push {r4, lr}
83; SOFT-NEXT:    vldr d16, [r0]
84; SOFT-NEXT:    mov r4, r1
85; SOFT-NEXT:    vrev64.32 d16, d16
86; SOFT-NEXT:    vadd.f32 d16, d16, d16
87; SOFT-NEXT:    vrev64.32 d16, d16
88; SOFT-NEXT:    vmov r1, r0, d16
89; SOFT-NEXT:    bl test_i64_v2f32_helper
90; SOFT-NEXT:    adds r1, r1, r1
91; SOFT-NEXT:    adc r0, r0, r0
92; SOFT-NEXT:    strd r0, r1, [r4]
93; SOFT-NEXT:    pop {r4, pc}
94;
95; HARD-LABEL: test_i64_v2f32:
96; HARD:       @ %bb.0:
97; HARD-NEXT:    .save {r4, lr}
98; HARD-NEXT:    push {r4, lr}
99; HARD-NEXT:    vldr d16, [r0]
100; HARD-NEXT:    mov r4, r1
101; HARD-NEXT:    vrev64.32 d16, d16
102; HARD-NEXT:    vadd.f32 d16, d16, d16
103; HARD-NEXT:    vrev64.32 d0, d16
104; HARD-NEXT:    bl test_i64_v2f32_helper
105; HARD-NEXT:    adds r1, r1, r1
106; HARD-NEXT:    adc r0, r0, r0
107; HARD-NEXT:    strd r0, r1, [r4]
108; HARD-NEXT:    pop {r4, pc}
109    %1 = load <2 x float>, ptr %p
110    %2 = fadd <2 x float> %1, %1
111    %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
112    %4 = add i64 %3, %3
113    store i64 %4, ptr %q
114    ret void
115}
116
117declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
118define void @test_i64_v2i32(ptr %p, ptr %q) {
119; SOFT-LABEL: test_i64_v2i32:
120; SOFT:       @ %bb.0:
121; SOFT-NEXT:    .save {r4, lr}
122; SOFT-NEXT:    push {r4, lr}
123; SOFT-NEXT:    vldr d16, [r0]
124; SOFT-NEXT:    mov r4, r1
125; SOFT-NEXT:    vrev64.32 d16, d16
126; SOFT-NEXT:    vadd.i32 d16, d16, d16
127; SOFT-NEXT:    vrev64.32 d16, d16
128; SOFT-NEXT:    vmov r1, r0, d16
129; SOFT-NEXT:    bl test_i64_v2i32_helper
130; SOFT-NEXT:    adds r1, r1, r1
131; SOFT-NEXT:    adc r0, r0, r0
132; SOFT-NEXT:    strd r0, r1, [r4]
133; SOFT-NEXT:    pop {r4, pc}
134;
135; HARD-LABEL: test_i64_v2i32:
136; HARD:       @ %bb.0:
137; HARD-NEXT:    .save {r4, lr}
138; HARD-NEXT:    push {r4, lr}
139; HARD-NEXT:    vldr d16, [r0]
140; HARD-NEXT:    mov r4, r1
141; HARD-NEXT:    vrev64.32 d16, d16
142; HARD-NEXT:    vadd.i32 d16, d16, d16
143; HARD-NEXT:    vrev64.32 d0, d16
144; HARD-NEXT:    bl test_i64_v2i32_helper
145; HARD-NEXT:    adds r1, r1, r1
146; HARD-NEXT:    adc r0, r0, r0
147; HARD-NEXT:    strd r0, r1, [r4]
148; HARD-NEXT:    pop {r4, pc}
149    %1 = load <2 x i32>, ptr %p
150    %2 = add <2 x i32> %1, %1
151    %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
152    %4 = add i64 %3, %3
153    store i64 %4, ptr %q
154    ret void
155}
156
157declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
158define void @test_i64_v4i16(ptr %p, ptr %q) {
159; SOFT-LABEL: test_i64_v4i16:
160; SOFT:       @ %bb.0:
161; SOFT-NEXT:    .save {r4, lr}
162; SOFT-NEXT:    push {r4, lr}
163; SOFT-NEXT:    vldr d16, [r0]
164; SOFT-NEXT:    mov r4, r1
165; SOFT-NEXT:    vrev64.16 d16, d16
166; SOFT-NEXT:    vadd.i16 d16, d16, d16
167; SOFT-NEXT:    vrev64.16 d16, d16
168; SOFT-NEXT:    vmov r1, r0, d16
169; SOFT-NEXT:    bl test_i64_v4i16_helper
170; SOFT-NEXT:    adds r1, r1, r1
171; SOFT-NEXT:    adc r0, r0, r0
172; SOFT-NEXT:    strd r0, r1, [r4]
173; SOFT-NEXT:    pop {r4, pc}
174;
175; HARD-LABEL: test_i64_v4i16:
176; HARD:       @ %bb.0:
177; HARD-NEXT:    .save {r4, lr}
178; HARD-NEXT:    push {r4, lr}
179; HARD-NEXT:    vldr d16, [r0]
180; HARD-NEXT:    mov r4, r1
181; HARD-NEXT:    vrev64.16 d16, d16
182; HARD-NEXT:    vadd.i16 d16, d16, d16
183; HARD-NEXT:    vrev64.16 d0, d16
184; HARD-NEXT:    bl test_i64_v4i16_helper
185; HARD-NEXT:    adds r1, r1, r1
186; HARD-NEXT:    adc r0, r0, r0
187; HARD-NEXT:    strd r0, r1, [r4]
188; HARD-NEXT:    pop {r4, pc}
189    %1 = load <4 x i16>, ptr %p
190    %2 = add <4 x i16> %1, %1
191    %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
192    %4 = add i64 %3, %3
193    store i64 %4, ptr %q
194    ret void
195}
196
197declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
198define void @test_i64_v8i8(ptr %p, ptr %q) {
199; SOFT-LABEL: test_i64_v8i8:
200; SOFT:       @ %bb.0:
201; SOFT-NEXT:    .save {r4, lr}
202; SOFT-NEXT:    push {r4, lr}
203; SOFT-NEXT:    vldr d16, [r0]
204; SOFT-NEXT:    mov r4, r1
205; SOFT-NEXT:    vrev64.8 d16, d16
206; SOFT-NEXT:    vadd.i8 d16, d16, d16
207; SOFT-NEXT:    vrev64.8 d16, d16
208; SOFT-NEXT:    vmov r1, r0, d16
209; SOFT-NEXT:    bl test_i64_v8i8_helper
210; SOFT-NEXT:    adds r1, r1, r1
211; SOFT-NEXT:    adc r0, r0, r0
212; SOFT-NEXT:    strd r0, r1, [r4]
213; SOFT-NEXT:    pop {r4, pc}
214;
215; HARD-LABEL: test_i64_v8i8:
216; HARD:       @ %bb.0:
217; HARD-NEXT:    .save {r4, lr}
218; HARD-NEXT:    push {r4, lr}
219; HARD-NEXT:    vldr d16, [r0]
220; HARD-NEXT:    mov r4, r1
221; HARD-NEXT:    vrev64.8 d16, d16
222; HARD-NEXT:    vadd.i8 d16, d16, d16
223; HARD-NEXT:    vrev64.8 d0, d16
224; HARD-NEXT:    bl test_i64_v8i8_helper
225; HARD-NEXT:    adds r1, r1, r1
226; HARD-NEXT:    adc r0, r0, r0
227; HARD-NEXT:    strd r0, r1, [r4]
228; HARD-NEXT:    pop {r4, pc}
229    %1 = load <8 x i8>, ptr %p
230    %2 = add <8 x i8> %1, %1
231    %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
232    %4 = add i64 %3, %3
233    store i64 %4, ptr %q
234    ret void
235}
236
237declare double @test_f64_i64_helper(i64 %p)
238define void @test_f64_i64(ptr %p, ptr %q) {
239; SOFT-LABEL: test_f64_i64:
240; SOFT:       @ %bb.0:
241; SOFT-NEXT:    .save {r4, lr}
242; SOFT-NEXT:    push {r4, lr}
243; SOFT-NEXT:    mov r4, r1
244; SOFT-NEXT:    ldrd r0, r1, [r0]
245; SOFT-NEXT:    adds r1, r1, r1
246; SOFT-NEXT:    adc r0, r0, r0
247; SOFT-NEXT:    bl test_f64_i64_helper
248; SOFT-NEXT:    vmov d16, r1, r0
249; SOFT-NEXT:    vadd.f64 d16, d16, d16
250; SOFT-NEXT:    vstr d16, [r4]
251; SOFT-NEXT:    pop {r4, pc}
252;
253; HARD-LABEL: test_f64_i64:
254; HARD:       @ %bb.0:
255; HARD-NEXT:    .save {r4, lr}
256; HARD-NEXT:    push {r4, lr}
257; HARD-NEXT:    mov r4, r1
258; HARD-NEXT:    ldrd r0, r1, [r0]
259; HARD-NEXT:    adds r1, r1, r1
260; HARD-NEXT:    adc r0, r0, r0
261; HARD-NEXT:    bl test_f64_i64_helper
262; HARD-NEXT:    vadd.f64 d16, d0, d0
263; HARD-NEXT:    vstr d16, [r4]
264; HARD-NEXT:    pop {r4, pc}
265    %1 = load i64, ptr %p
266    %2 = add i64 %1, %1
267    %3 = call double @test_f64_i64_helper(i64 %2)
268    %4 = fadd double %3, %3
269    store double %4, ptr %q
270    ret void
271}
272
273declare double @test_f64_v1i64_helper(<1 x i64> %p)
274define void @test_f64_v1i64(ptr %p, ptr %q) {
275; SOFT-LABEL: test_f64_v1i64:
276; SOFT:       @ %bb.0:
277; SOFT-NEXT:    .save {r4, lr}
278; SOFT-NEXT:    push {r4, lr}
279; SOFT-NEXT:    vldr d16, [r0]
280; SOFT-NEXT:    mov r4, r1
281; SOFT-NEXT:    vadd.i64 d16, d16, d16
282; SOFT-NEXT:    vmov r1, r0, d16
283; SOFT-NEXT:    bl test_f64_v1i64_helper
284; SOFT-NEXT:    vmov d16, r1, r0
285; SOFT-NEXT:    vadd.f64 d16, d16, d16
286; SOFT-NEXT:    vstr d16, [r4]
287; SOFT-NEXT:    pop {r4, pc}
288;
289; HARD-LABEL: test_f64_v1i64:
290; HARD:       @ %bb.0:
291; HARD-NEXT:    .save {r4, lr}
292; HARD-NEXT:    push {r4, lr}
293; HARD-NEXT:    vldr d16, [r0]
294; HARD-NEXT:    mov r4, r1
295; HARD-NEXT:    vadd.i64 d0, d16, d16
296; HARD-NEXT:    bl test_f64_v1i64_helper
297; HARD-NEXT:    vadd.f64 d16, d0, d0
298; HARD-NEXT:    vstr d16, [r4]
299; HARD-NEXT:    pop {r4, pc}
300    %1 = load <1 x i64>, ptr %p
301    %2 = add <1 x i64> %1, %1
302    %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
303    %4 = fadd double %3, %3
304    store double %4, ptr %q
305    ret void
306}
307
308declare double @test_f64_v2f32_helper(<2 x float> %p)
309define void @test_f64_v2f32(ptr %p, ptr %q) {
310; SOFT-LABEL: test_f64_v2f32:
311; SOFT:       @ %bb.0:
312; SOFT-NEXT:    .save {r4, lr}
313; SOFT-NEXT:    push {r4, lr}
314; SOFT-NEXT:    vldr d16, [r0]
315; SOFT-NEXT:    mov r4, r1
316; SOFT-NEXT:    vrev64.32 d16, d16
317; SOFT-NEXT:    vadd.f32 d16, d16, d16
318; SOFT-NEXT:    vrev64.32 d16, d16
319; SOFT-NEXT:    vmov r1, r0, d16
320; SOFT-NEXT:    bl test_f64_v2f32_helper
321; SOFT-NEXT:    vmov d16, r1, r0
322; SOFT-NEXT:    vadd.f64 d16, d16, d16
323; SOFT-NEXT:    vstr d16, [r4]
324; SOFT-NEXT:    pop {r4, pc}
325;
326; HARD-LABEL: test_f64_v2f32:
327; HARD:       @ %bb.0:
328; HARD-NEXT:    .save {r4, lr}
329; HARD-NEXT:    push {r4, lr}
330; HARD-NEXT:    vldr d16, [r0]
331; HARD-NEXT:    mov r4, r1
332; HARD-NEXT:    vrev64.32 d16, d16
333; HARD-NEXT:    vadd.f32 d16, d16, d16
334; HARD-NEXT:    vrev64.32 d0, d16
335; HARD-NEXT:    bl test_f64_v2f32_helper
336; HARD-NEXT:    vadd.f64 d16, d0, d0
337; HARD-NEXT:    vstr d16, [r4]
338; HARD-NEXT:    pop {r4, pc}
339    %1 = load <2 x float>, ptr %p
340    %2 = fadd <2 x float> %1, %1
341    %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
342    %4 = fadd double %3, %3
343    store double %4, ptr %q
344    ret void
345}
346
347declare double @test_f64_v2i32_helper(<2 x i32> %p)
348define void @test_f64_v2i32(ptr %p, ptr %q) {
349; SOFT-LABEL: test_f64_v2i32:
350; SOFT:       @ %bb.0:
351; SOFT-NEXT:    .save {r4, lr}
352; SOFT-NEXT:    push {r4, lr}
353; SOFT-NEXT:    vldr d16, [r0]
354; SOFT-NEXT:    mov r4, r1
355; SOFT-NEXT:    vrev64.32 d16, d16
356; SOFT-NEXT:    vadd.i32 d16, d16, d16
357; SOFT-NEXT:    vrev64.32 d16, d16
358; SOFT-NEXT:    vmov r1, r0, d16
359; SOFT-NEXT:    bl test_f64_v2i32_helper
360; SOFT-NEXT:    vmov d16, r1, r0
361; SOFT-NEXT:    vadd.f64 d16, d16, d16
362; SOFT-NEXT:    vstr d16, [r4]
363; SOFT-NEXT:    pop {r4, pc}
364;
365; HARD-LABEL: test_f64_v2i32:
366; HARD:       @ %bb.0:
367; HARD-NEXT:    .save {r4, lr}
368; HARD-NEXT:    push {r4, lr}
369; HARD-NEXT:    vldr d16, [r0]
370; HARD-NEXT:    mov r4, r1
371; HARD-NEXT:    vrev64.32 d16, d16
372; HARD-NEXT:    vadd.i32 d16, d16, d16
373; HARD-NEXT:    vrev64.32 d0, d16
374; HARD-NEXT:    bl test_f64_v2i32_helper
375; HARD-NEXT:    vadd.f64 d16, d0, d0
376; HARD-NEXT:    vstr d16, [r4]
377; HARD-NEXT:    pop {r4, pc}
378    %1 = load <2 x i32>, ptr %p
379    %2 = add <2 x i32> %1, %1
380    %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
381    %4 = fadd double %3, %3
382    store double %4, ptr %q
383    ret void
384}
385
386declare double @test_f64_v4i16_helper(<4 x i16> %p)
387define void @test_f64_v4i16(ptr %p, ptr %q) {
388; SOFT-LABEL: test_f64_v4i16:
389; SOFT:       @ %bb.0:
390; SOFT-NEXT:    .save {r4, lr}
391; SOFT-NEXT:    push {r4, lr}
392; SOFT-NEXT:    vldr d16, [r0]
393; SOFT-NEXT:    mov r4, r1
394; SOFT-NEXT:    vrev64.16 d16, d16
395; SOFT-NEXT:    vadd.i16 d16, d16, d16
396; SOFT-NEXT:    vrev64.16 d16, d16
397; SOFT-NEXT:    vmov r1, r0, d16
398; SOFT-NEXT:    bl test_f64_v4i16_helper
399; SOFT-NEXT:    vmov d16, r1, r0
400; SOFT-NEXT:    vadd.f64 d16, d16, d16
401; SOFT-NEXT:    vstr d16, [r4]
402; SOFT-NEXT:    pop {r4, pc}
403;
404; HARD-LABEL: test_f64_v4i16:
405; HARD:       @ %bb.0:
406; HARD-NEXT:    .save {r4, lr}
407; HARD-NEXT:    push {r4, lr}
408; HARD-NEXT:    vldr d16, [r0]
409; HARD-NEXT:    mov r4, r1
410; HARD-NEXT:    vrev64.16 d16, d16
411; HARD-NEXT:    vadd.i16 d16, d16, d16
412; HARD-NEXT:    vrev64.16 d0, d16
413; HARD-NEXT:    bl test_f64_v4i16_helper
414; HARD-NEXT:    vadd.f64 d16, d0, d0
415; HARD-NEXT:    vstr d16, [r4]
416; HARD-NEXT:    pop {r4, pc}
417    %1 = load <4 x i16>, ptr %p
418    %2 = add <4 x i16> %1, %1
419    %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
420    %4 = fadd double %3, %3
421    store double %4, ptr %q
422    ret void
423}
424
425declare double @test_f64_v8i8_helper(<8 x i8> %p)
426define void @test_f64_v8i8(ptr %p, ptr %q) {
427; SOFT-LABEL: test_f64_v8i8:
428; SOFT:       @ %bb.0:
429; SOFT-NEXT:    .save {r4, lr}
430; SOFT-NEXT:    push {r4, lr}
431; SOFT-NEXT:    vldr d16, [r0]
432; SOFT-NEXT:    mov r4, r1
433; SOFT-NEXT:    vrev64.8 d16, d16
434; SOFT-NEXT:    vadd.i8 d16, d16, d16
435; SOFT-NEXT:    vrev64.8 d16, d16
436; SOFT-NEXT:    vmov r1, r0, d16
437; SOFT-NEXT:    bl test_f64_v8i8_helper
438; SOFT-NEXT:    vmov d16, r1, r0
439; SOFT-NEXT:    vadd.f64 d16, d16, d16
440; SOFT-NEXT:    vstr d16, [r4]
441; SOFT-NEXT:    pop {r4, pc}
442;
443; HARD-LABEL: test_f64_v8i8:
444; HARD:       @ %bb.0:
445; HARD-NEXT:    .save {r4, lr}
446; HARD-NEXT:    push {r4, lr}
447; HARD-NEXT:    vldr d16, [r0]
448; HARD-NEXT:    mov r4, r1
449; HARD-NEXT:    vrev64.8 d16, d16
450; HARD-NEXT:    vadd.i8 d16, d16, d16
451; HARD-NEXT:    vrev64.8 d0, d16
452; HARD-NEXT:    bl test_f64_v8i8_helper
453; HARD-NEXT:    vadd.f64 d16, d0, d0
454; HARD-NEXT:    vstr d16, [r4]
455; HARD-NEXT:    pop {r4, pc}
456    %1 = load <8 x i8>, ptr %p
457    %2 = add <8 x i8> %1, %1
458    %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
459    %4 = fadd double %3, %3
460    store double %4, ptr %q
461    ret void
462}
463
464declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
465define void @test_v1i64_i64(ptr %p, ptr %q) {
466; SOFT-LABEL: test_v1i64_i64:
467; SOFT:       @ %bb.0:
468; SOFT-NEXT:    .save {r4, lr}
469; SOFT-NEXT:    push {r4, lr}
470; SOFT-NEXT:    mov r4, r1
471; SOFT-NEXT:    ldrd r0, r1, [r0]
472; SOFT-NEXT:    adds r1, r1, r1
473; SOFT-NEXT:    adc r0, r0, r0
474; SOFT-NEXT:    bl test_v1i64_i64_helper
475; SOFT-NEXT:    vmov d16, r1, r0
476; SOFT-NEXT:    vadd.i64 d16, d16, d16
477; SOFT-NEXT:    vstr d16, [r4]
478; SOFT-NEXT:    pop {r4, pc}
479;
480; HARD-LABEL: test_v1i64_i64:
481; HARD:       @ %bb.0:
482; HARD-NEXT:    .save {r4, lr}
483; HARD-NEXT:    push {r4, lr}
484; HARD-NEXT:    mov r4, r1
485; HARD-NEXT:    ldrd r0, r1, [r0]
486; HARD-NEXT:    adds r1, r1, r1
487; HARD-NEXT:    adc r0, r0, r0
488; HARD-NEXT:    bl test_v1i64_i64_helper
489; HARD-NEXT:    vadd.i64 d16, d0, d0
490; HARD-NEXT:    vstr d16, [r4]
491; HARD-NEXT:    pop {r4, pc}
492    %1 = load i64, ptr %p
493    %2 = add i64 %1, %1
494    %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
495    %4 = add <1 x i64> %3, %3
496    store <1 x i64> %4, ptr %q
497    ret void
498}
499
500declare <1 x i64> @test_v1i64_f64_helper(double %p)
501define void @test_v1i64_f64(ptr %p, ptr %q) {
502; SOFT-LABEL: test_v1i64_f64:
503; SOFT:       @ %bb.0:
504; SOFT-NEXT:    .save {r4, lr}
505; SOFT-NEXT:    push {r4, lr}
506; SOFT-NEXT:    vldr d16, [r0]
507; SOFT-NEXT:    mov r4, r1
508; SOFT-NEXT:    vadd.f64 d16, d16, d16
509; SOFT-NEXT:    vmov r1, r0, d16
510; SOFT-NEXT:    bl test_v1i64_f64_helper
511; SOFT-NEXT:    vmov d16, r1, r0
512; SOFT-NEXT:    vadd.i64 d16, d16, d16
513; SOFT-NEXT:    vstr d16, [r4]
514; SOFT-NEXT:    pop {r4, pc}
515;
516; HARD-LABEL: test_v1i64_f64:
517; HARD:       @ %bb.0:
518; HARD-NEXT:    .save {r4, lr}
519; HARD-NEXT:    push {r4, lr}
520; HARD-NEXT:    vldr d16, [r0]
521; HARD-NEXT:    mov r4, r1
522; HARD-NEXT:    vadd.f64 d0, d16, d16
523; HARD-NEXT:    bl test_v1i64_f64_helper
524; HARD-NEXT:    vadd.i64 d16, d0, d0
525; HARD-NEXT:    vstr d16, [r4]
526; HARD-NEXT:    pop {r4, pc}
527    %1 = load double, ptr %p
528    %2 = fadd double %1, %1
529    %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
530    %4 = add <1 x i64> %3, %3
531    store <1 x i64> %4, ptr %q
532    ret void
533}
534
535declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
536define void @test_v1i64_v2f32(ptr %p, ptr %q) {
537; SOFT-LABEL: test_v1i64_v2f32:
538; SOFT:       @ %bb.0:
539; SOFT-NEXT:    .save {r4, lr}
540; SOFT-NEXT:    push {r4, lr}
541; SOFT-NEXT:    vldr d16, [r0]
542; SOFT-NEXT:    mov r4, r1
543; SOFT-NEXT:    vrev64.32 d16, d16
544; SOFT-NEXT:    vadd.f32 d16, d16, d16
545; SOFT-NEXT:    vrev64.32 d16, d16
546; SOFT-NEXT:    vmov r1, r0, d16
547; SOFT-NEXT:    bl test_v1i64_v2f32_helper
548; SOFT-NEXT:    vmov d16, r1, r0
549; SOFT-NEXT:    vadd.i64 d16, d16, d16
550; SOFT-NEXT:    vstr d16, [r4]
551; SOFT-NEXT:    pop {r4, pc}
552;
553; HARD-LABEL: test_v1i64_v2f32:
554; HARD:       @ %bb.0:
555; HARD-NEXT:    .save {r4, lr}
556; HARD-NEXT:    push {r4, lr}
557; HARD-NEXT:    vldr d16, [r0]
558; HARD-NEXT:    mov r4, r1
559; HARD-NEXT:    vrev64.32 d16, d16
560; HARD-NEXT:    vadd.f32 d16, d16, d16
561; HARD-NEXT:    vrev64.32 d0, d16
562; HARD-NEXT:    bl test_v1i64_v2f32_helper
563; HARD-NEXT:    vadd.i64 d16, d0, d0
564; HARD-NEXT:    vstr d16, [r4]
565; HARD-NEXT:    pop {r4, pc}
566    %1 = load <2 x float>, ptr %p
567    %2 = fadd <2 x float> %1, %1
568    %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
569    %4 = add <1 x i64> %3, %3
570    store <1 x i64> %4, ptr %q
571    ret void
572}
573
574declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
575define void @test_v1i64_v2i32(ptr %p, ptr %q) {
576; SOFT-LABEL: test_v1i64_v2i32:
577; SOFT:       @ %bb.0:
578; SOFT-NEXT:    .save {r4, lr}
579; SOFT-NEXT:    push {r4, lr}
580; SOFT-NEXT:    vldr d16, [r0]
581; SOFT-NEXT:    mov r4, r1
582; SOFT-NEXT:    vrev64.32 d16, d16
583; SOFT-NEXT:    vadd.i32 d16, d16, d16
584; SOFT-NEXT:    vrev64.32 d16, d16
585; SOFT-NEXT:    vmov r1, r0, d16
586; SOFT-NEXT:    bl test_v1i64_v2i32_helper
587; SOFT-NEXT:    vmov d16, r1, r0
588; SOFT-NEXT:    vadd.i64 d16, d16, d16
589; SOFT-NEXT:    vstr d16, [r4]
590; SOFT-NEXT:    pop {r4, pc}
591;
592; HARD-LABEL: test_v1i64_v2i32:
593; HARD:       @ %bb.0:
594; HARD-NEXT:    .save {r4, lr}
595; HARD-NEXT:    push {r4, lr}
596; HARD-NEXT:    vldr d16, [r0]
597; HARD-NEXT:    mov r4, r1
598; HARD-NEXT:    vrev64.32 d16, d16
599; HARD-NEXT:    vadd.i32 d16, d16, d16
600; HARD-NEXT:    vrev64.32 d0, d16
601; HARD-NEXT:    bl test_v1i64_v2i32_helper
602; HARD-NEXT:    vadd.i64 d16, d0, d0
603; HARD-NEXT:    vstr d16, [r4]
604; HARD-NEXT:    pop {r4, pc}
605    %1 = load <2 x i32>, ptr %p
606    %2 = add <2 x i32> %1, %1
607    %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
608    %4 = add <1 x i64> %3, %3
609    store <1 x i64> %4, ptr %q
610    ret void
611}
612
613declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
614define void @test_v1i64_v4i16(ptr %p, ptr %q) {
615; SOFT-LABEL: test_v1i64_v4i16:
616; SOFT:       @ %bb.0:
617; SOFT-NEXT:    .save {r4, lr}
618; SOFT-NEXT:    push {r4, lr}
619; SOFT-NEXT:    vldr d16, [r0]
620; SOFT-NEXT:    mov r4, r1
621; SOFT-NEXT:    vrev64.16 d16, d16
622; SOFT-NEXT:    vadd.i16 d16, d16, d16
623; SOFT-NEXT:    vrev64.16 d16, d16
624; SOFT-NEXT:    vmov r1, r0, d16
625; SOFT-NEXT:    bl test_v1i64_v4i16_helper
626; SOFT-NEXT:    vmov d16, r1, r0
627; SOFT-NEXT:    vadd.i64 d16, d16, d16
628; SOFT-NEXT:    vstr d16, [r4]
629; SOFT-NEXT:    pop {r4, pc}
630;
631; HARD-LABEL: test_v1i64_v4i16:
632; HARD:       @ %bb.0:
633; HARD-NEXT:    .save {r4, lr}
634; HARD-NEXT:    push {r4, lr}
635; HARD-NEXT:    vldr d16, [r0]
636; HARD-NEXT:    mov r4, r1
637; HARD-NEXT:    vrev64.16 d16, d16
638; HARD-NEXT:    vadd.i16 d16, d16, d16
639; HARD-NEXT:    vrev64.16 d0, d16
640; HARD-NEXT:    bl test_v1i64_v4i16_helper
641; HARD-NEXT:    vadd.i64 d16, d0, d0
642; HARD-NEXT:    vstr d16, [r4]
643; HARD-NEXT:    pop {r4, pc}
644    %1 = load <4 x i16>, ptr %p
645    %2 = add <4 x i16> %1, %1
646    %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
647    %4 = add <1 x i64> %3, %3
648    store <1 x i64> %4, ptr %q
649    ret void
650}
651
652declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
653define void @test_v1i64_v8i8(ptr %p, ptr %q) {
654; SOFT-LABEL: test_v1i64_v8i8:
655; SOFT:       @ %bb.0:
656; SOFT-NEXT:    .save {r4, lr}
657; SOFT-NEXT:    push {r4, lr}
658; SOFT-NEXT:    vldr d16, [r0]
659; SOFT-NEXT:    mov r4, r1
660; SOFT-NEXT:    vrev64.8 d16, d16
661; SOFT-NEXT:    vadd.i8 d16, d16, d16
662; SOFT-NEXT:    vrev64.8 d16, d16
663; SOFT-NEXT:    vmov r1, r0, d16
664; SOFT-NEXT:    bl test_v1i64_v8i8_helper
665; SOFT-NEXT:    vmov d16, r1, r0
666; SOFT-NEXT:    vadd.i64 d16, d16, d16
667; SOFT-NEXT:    vstr d16, [r4]
668; SOFT-NEXT:    pop {r4, pc}
669;
670; HARD-LABEL: test_v1i64_v8i8:
671; HARD:       @ %bb.0:
672; HARD-NEXT:    .save {r4, lr}
673; HARD-NEXT:    push {r4, lr}
674; HARD-NEXT:    vldr d16, [r0]
675; HARD-NEXT:    mov r4, r1
676; HARD-NEXT:    vrev64.8 d16, d16
677; HARD-NEXT:    vadd.i8 d16, d16, d16
678; HARD-NEXT:    vrev64.8 d0, d16
679; HARD-NEXT:    bl test_v1i64_v8i8_helper
680; HARD-NEXT:    vadd.i64 d16, d0, d0
681; HARD-NEXT:    vstr d16, [r4]
682; HARD-NEXT:    pop {r4, pc}
683    %1 = load <8 x i8>, ptr %p
684    %2 = add <8 x i8> %1, %1
685    %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
686    %4 = add <1 x i64> %3, %3
687    store <1 x i64> %4, ptr %q
688    ret void
689}
690
691declare <2 x float> @test_v2f32_i64_helper(i64 %p)
692define void @test_v2f32_i64(ptr %p, ptr %q) {
693; SOFT-LABEL: test_v2f32_i64:
694; SOFT:       @ %bb.0:
695; SOFT-NEXT:    .save {r4, lr}
696; SOFT-NEXT:    push {r4, lr}
697; SOFT-NEXT:    mov r4, r1
698; SOFT-NEXT:    ldrd r0, r1, [r0]
699; SOFT-NEXT:    adds r1, r1, r1
700; SOFT-NEXT:    adc r0, r0, r0
701; SOFT-NEXT:    bl test_v2f32_i64_helper
702; SOFT-NEXT:    vmov d16, r1, r0
703; SOFT-NEXT:    vrev64.32 d16, d16
704; SOFT-NEXT:    vadd.f32 d16, d16, d16
705; SOFT-NEXT:    vrev64.32 d16, d16
706; SOFT-NEXT:    vstr d16, [r4]
707; SOFT-NEXT:    pop {r4, pc}
708;
709; HARD-LABEL: test_v2f32_i64:
710; HARD:       @ %bb.0:
711; HARD-NEXT:    .save {r4, lr}
712; HARD-NEXT:    push {r4, lr}
713; HARD-NEXT:    mov r4, r1
714; HARD-NEXT:    ldrd r0, r1, [r0]
715; HARD-NEXT:    adds r1, r1, r1
716; HARD-NEXT:    adc r0, r0, r0
717; HARD-NEXT:    bl test_v2f32_i64_helper
718; HARD-NEXT:    vrev64.32 d16, d0
719; HARD-NEXT:    vadd.f32 d16, d16, d16
720; HARD-NEXT:    vrev64.32 d16, d16
721; HARD-NEXT:    vstr d16, [r4]
722; HARD-NEXT:    pop {r4, pc}
723    %1 = load i64, ptr %p
724    %2 = add i64 %1, %1
725    %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
726    %4 = fadd <2 x float> %3, %3
727    store <2 x float> %4, ptr %q
728    ret void
729}
730
731declare <2 x float> @test_v2f32_f64_helper(double %p)
732define void @test_v2f32_f64(ptr %p, ptr %q) {
733; SOFT-LABEL: test_v2f32_f64:
734; SOFT:       @ %bb.0:
735; SOFT-NEXT:    .save {r4, lr}
736; SOFT-NEXT:    push {r4, lr}
737; SOFT-NEXT:    vldr d16, [r0]
738; SOFT-NEXT:    mov r4, r1
739; SOFT-NEXT:    vadd.f64 d16, d16, d16
740; SOFT-NEXT:    vmov r1, r0, d16
741; SOFT-NEXT:    bl test_v2f32_f64_helper
742; SOFT-NEXT:    vmov d16, r1, r0
743; SOFT-NEXT:    vrev64.32 d16, d16
744; SOFT-NEXT:    vadd.f32 d16, d16, d16
745; SOFT-NEXT:    vrev64.32 d16, d16
746; SOFT-NEXT:    vstr d16, [r4]
747; SOFT-NEXT:    pop {r4, pc}
748;
749; HARD-LABEL: test_v2f32_f64:
750; HARD:       @ %bb.0:
751; HARD-NEXT:    .save {r4, lr}
752; HARD-NEXT:    push {r4, lr}
753; HARD-NEXT:    vldr d16, [r0]
754; HARD-NEXT:    mov r4, r1
755; HARD-NEXT:    vadd.f64 d0, d16, d16
756; HARD-NEXT:    bl test_v2f32_f64_helper
757; HARD-NEXT:    vrev64.32 d16, d0
758; HARD-NEXT:    vadd.f32 d16, d16, d16
759; HARD-NEXT:    vrev64.32 d16, d16
760; HARD-NEXT:    vstr d16, [r4]
761; HARD-NEXT:    pop {r4, pc}
762    %1 = load double, ptr %p
763    %2 = fadd double %1, %1
764    %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
765    %4 = fadd <2 x float> %3, %3
766    store <2 x float> %4, ptr %q
767    ret void
768}
769
770declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
771define void @test_v2f32_v1i64(ptr %p, ptr %q) {
772; SOFT-LABEL: test_v2f32_v1i64:
773; SOFT:       @ %bb.0:
774; SOFT-NEXT:    .save {r4, lr}
775; SOFT-NEXT:    push {r4, lr}
776; SOFT-NEXT:    vldr d16, [r0]
777; SOFT-NEXT:    mov r4, r1
778; SOFT-NEXT:    vadd.i64 d16, d16, d16
779; SOFT-NEXT:    vmov r1, r0, d16
780; SOFT-NEXT:    bl test_v2f32_v1i64_helper
781; SOFT-NEXT:    vmov d16, r1, r0
782; SOFT-NEXT:    vrev64.32 d16, d16
783; SOFT-NEXT:    vadd.f32 d16, d16, d16
784; SOFT-NEXT:    vrev64.32 d16, d16
785; SOFT-NEXT:    vstr d16, [r4]
786; SOFT-NEXT:    pop {r4, pc}
787;
788; HARD-LABEL: test_v2f32_v1i64:
789; HARD:       @ %bb.0:
790; HARD-NEXT:    .save {r4, lr}
791; HARD-NEXT:    push {r4, lr}
792; HARD-NEXT:    vldr d16, [r0]
793; HARD-NEXT:    mov r4, r1
794; HARD-NEXT:    vadd.i64 d0, d16, d16
795; HARD-NEXT:    bl test_v2f32_v1i64_helper
796; HARD-NEXT:    vrev64.32 d16, d0
797; HARD-NEXT:    vadd.f32 d16, d16, d16
798; HARD-NEXT:    vrev64.32 d16, d16
799; HARD-NEXT:    vstr d16, [r4]
800; HARD-NEXT:    pop {r4, pc}
801    %1 = load <1 x i64>, ptr %p
802    %2 = add <1 x i64> %1, %1
803    %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
804    %4 = fadd <2 x float> %3, %3
805    store <2 x float> %4, ptr %q
806    ret void
807}
808
809declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
810define void @test_v2f32_v2i32(ptr %p, ptr %q) {
811; SOFT-LABEL: test_v2f32_v2i32:
812; SOFT:       @ %bb.0:
813; SOFT-NEXT:    .save {r4, lr}
814; SOFT-NEXT:    push {r4, lr}
815; SOFT-NEXT:    vldr d16, [r0]
816; SOFT-NEXT:    mov r4, r1
817; SOFT-NEXT:    vrev64.32 d16, d16
818; SOFT-NEXT:    vadd.i32 d16, d16, d16
819; SOFT-NEXT:    vrev64.32 d16, d16
820; SOFT-NEXT:    vmov r1, r0, d16
821; SOFT-NEXT:    bl test_v2f32_v2i32_helper
822; SOFT-NEXT:    vmov d16, r1, r0
823; SOFT-NEXT:    vrev64.32 d16, d16
824; SOFT-NEXT:    vadd.f32 d16, d16, d16
825; SOFT-NEXT:    vrev64.32 d16, d16
826; SOFT-NEXT:    vstr d16, [r4]
827; SOFT-NEXT:    pop {r4, pc}
828;
829; HARD-LABEL: test_v2f32_v2i32:
830; HARD:       @ %bb.0:
831; HARD-NEXT:    .save {r4, lr}
832; HARD-NEXT:    push {r4, lr}
833; HARD-NEXT:    vldr d16, [r0]
834; HARD-NEXT:    mov r4, r1
835; HARD-NEXT:    vrev64.32 d16, d16
836; HARD-NEXT:    vadd.i32 d16, d16, d16
837; HARD-NEXT:    vrev64.32 d0, d16
838; HARD-NEXT:    bl test_v2f32_v2i32_helper
839; HARD-NEXT:    vrev64.32 d16, d0
840; HARD-NEXT:    vadd.f32 d16, d16, d16
841; HARD-NEXT:    vrev64.32 d16, d16
842; HARD-NEXT:    vstr d16, [r4]
843; HARD-NEXT:    pop {r4, pc}
844    %1 = load <2 x i32>, ptr %p
845    %2 = add <2 x i32> %1, %1
846    %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
847    %4 = fadd <2 x float> %3, %3
848    store <2 x float> %4, ptr %q
849    ret void
850}
851
852declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
853define void @test_v2f32_v4i16(ptr %p, ptr %q) {
854; SOFT-LABEL: test_v2f32_v4i16:
855; SOFT:       @ %bb.0:
856; SOFT-NEXT:    .save {r4, lr}
857; SOFT-NEXT:    push {r4, lr}
858; SOFT-NEXT:    vldr d16, [r0]
859; SOFT-NEXT:    mov r4, r1
860; SOFT-NEXT:    vrev64.16 d16, d16
861; SOFT-NEXT:    vadd.i16 d16, d16, d16
862; SOFT-NEXT:    vrev64.16 d16, d16
863; SOFT-NEXT:    vmov r1, r0, d16
864; SOFT-NEXT:    bl test_v2f32_v4i16_helper
865; SOFT-NEXT:    vmov d16, r1, r0
866; SOFT-NEXT:    vrev64.32 d16, d16
867; SOFT-NEXT:    vadd.f32 d16, d16, d16
868; SOFT-NEXT:    vrev64.32 d16, d16
869; SOFT-NEXT:    vstr d16, [r4]
870; SOFT-NEXT:    pop {r4, pc}
871;
872; HARD-LABEL: test_v2f32_v4i16:
873; HARD:       @ %bb.0:
874; HARD-NEXT:    .save {r4, lr}
875; HARD-NEXT:    push {r4, lr}
876; HARD-NEXT:    vldr d16, [r0]
877; HARD-NEXT:    mov r4, r1
878; HARD-NEXT:    vrev64.16 d16, d16
879; HARD-NEXT:    vadd.i16 d16, d16, d16
880; HARD-NEXT:    vrev64.16 d0, d16
881; HARD-NEXT:    bl test_v2f32_v4i16_helper
882; HARD-NEXT:    vrev64.32 d16, d0
883; HARD-NEXT:    vadd.f32 d16, d16, d16
884; HARD-NEXT:    vrev64.32 d16, d16
885; HARD-NEXT:    vstr d16, [r4]
886; HARD-NEXT:    pop {r4, pc}
887    %1 = load <4 x i16>, ptr %p
888    %2 = add <4 x i16> %1, %1
889    %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
890    %4 = fadd <2 x float> %3, %3
891    store <2 x float> %4, ptr %q
892    ret void
893}
894
895declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
896define void @test_v2f32_v8i8(ptr %p, ptr %q) {
897; SOFT-LABEL: test_v2f32_v8i8:
898; SOFT:       @ %bb.0:
899; SOFT-NEXT:    .save {r4, lr}
900; SOFT-NEXT:    push {r4, lr}
901; SOFT-NEXT:    vldr d16, [r0]
902; SOFT-NEXT:    mov r4, r1
903; SOFT-NEXT:    vrev64.8 d16, d16
904; SOFT-NEXT:    vadd.i8 d16, d16, d16
905; SOFT-NEXT:    vrev64.8 d16, d16
906; SOFT-NEXT:    vmov r1, r0, d16
907; SOFT-NEXT:    bl test_v2f32_v8i8_helper
908; SOFT-NEXT:    vmov d16, r1, r0
909; SOFT-NEXT:    vrev64.32 d16, d16
910; SOFT-NEXT:    vadd.f32 d16, d16, d16
911; SOFT-NEXT:    vrev64.32 d16, d16
912; SOFT-NEXT:    vstr d16, [r4]
913; SOFT-NEXT:    pop {r4, pc}
914;
915; HARD-LABEL: test_v2f32_v8i8:
916; HARD:       @ %bb.0:
917; HARD-NEXT:    .save {r4, lr}
918; HARD-NEXT:    push {r4, lr}
919; HARD-NEXT:    vldr d16, [r0]
920; HARD-NEXT:    mov r4, r1
921; HARD-NEXT:    vrev64.8 d16, d16
922; HARD-NEXT:    vadd.i8 d16, d16, d16
923; HARD-NEXT:    vrev64.8 d0, d16
924; HARD-NEXT:    bl test_v2f32_v8i8_helper
925; HARD-NEXT:    vrev64.32 d16, d0
926; HARD-NEXT:    vadd.f32 d16, d16, d16
927; HARD-NEXT:    vrev64.32 d16, d16
928; HARD-NEXT:    vstr d16, [r4]
929; HARD-NEXT:    pop {r4, pc}
930    %1 = load <8 x i8>, ptr %p
931    %2 = add <8 x i8> %1, %1
932    %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
933    %4 = fadd <2 x float> %3, %3
934    store <2 x float> %4, ptr %q
935    ret void
936}
937
938declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
939define void @test_v2i32_i64(ptr %p, ptr %q) {
940; SOFT-LABEL: test_v2i32_i64:
941; SOFT:       @ %bb.0:
942; SOFT-NEXT:    .save {r4, lr}
943; SOFT-NEXT:    push {r4, lr}
944; SOFT-NEXT:    mov r4, r1
945; SOFT-NEXT:    ldrd r0, r1, [r0]
946; SOFT-NEXT:    adds r1, r1, r1
947; SOFT-NEXT:    adc r0, r0, r0
948; SOFT-NEXT:    bl test_v2i32_i64_helper
949; SOFT-NEXT:    vmov d16, r1, r0
950; SOFT-NEXT:    vrev64.32 d16, d16
951; SOFT-NEXT:    vadd.i32 d16, d16, d16
952; SOFT-NEXT:    vrev64.32 d16, d16
953; SOFT-NEXT:    vstr d16, [r4]
954; SOFT-NEXT:    pop {r4, pc}
955;
956; HARD-LABEL: test_v2i32_i64:
957; HARD:       @ %bb.0:
958; HARD-NEXT:    .save {r4, lr}
959; HARD-NEXT:    push {r4, lr}
960; HARD-NEXT:    mov r4, r1
961; HARD-NEXT:    ldrd r0, r1, [r0]
962; HARD-NEXT:    adds r1, r1, r1
963; HARD-NEXT:    adc r0, r0, r0
964; HARD-NEXT:    bl test_v2i32_i64_helper
965; HARD-NEXT:    vrev64.32 d16, d0
966; HARD-NEXT:    vadd.i32 d16, d16, d16
967; HARD-NEXT:    vrev64.32 d16, d16
968; HARD-NEXT:    vstr d16, [r4]
969; HARD-NEXT:    pop {r4, pc}
970    %1 = load i64, ptr %p
971    %2 = add i64 %1, %1
972    %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
973    %4 = add <2 x i32> %3, %3
974    store <2 x i32> %4, ptr %q
975    ret void
976}
977
978declare <2 x i32> @test_v2i32_f64_helper(double %p)
979define void @test_v2i32_f64(ptr %p, ptr %q) {
980; SOFT-LABEL: test_v2i32_f64:
981; SOFT:       @ %bb.0:
982; SOFT-NEXT:    .save {r4, lr}
983; SOFT-NEXT:    push {r4, lr}
984; SOFT-NEXT:    vldr d16, [r0]
985; SOFT-NEXT:    mov r4, r1
986; SOFT-NEXT:    vadd.f64 d16, d16, d16
987; SOFT-NEXT:    vmov r1, r0, d16
988; SOFT-NEXT:    bl test_v2i32_f64_helper
989; SOFT-NEXT:    vmov d16, r1, r0
990; SOFT-NEXT:    vrev64.32 d16, d16
991; SOFT-NEXT:    vadd.i32 d16, d16, d16
992; SOFT-NEXT:    vrev64.32 d16, d16
993; SOFT-NEXT:    vstr d16, [r4]
994; SOFT-NEXT:    pop {r4, pc}
995;
996; HARD-LABEL: test_v2i32_f64:
997; HARD:       @ %bb.0:
998; HARD-NEXT:    .save {r4, lr}
999; HARD-NEXT:    push {r4, lr}
1000; HARD-NEXT:    vldr d16, [r0]
1001; HARD-NEXT:    mov r4, r1
1002; HARD-NEXT:    vadd.f64 d0, d16, d16
1003; HARD-NEXT:    bl test_v2i32_f64_helper
1004; HARD-NEXT:    vrev64.32 d16, d0
1005; HARD-NEXT:    vadd.i32 d16, d16, d16
1006; HARD-NEXT:    vrev64.32 d16, d16
1007; HARD-NEXT:    vstr d16, [r4]
1008; HARD-NEXT:    pop {r4, pc}
1009    %1 = load double, ptr %p
1010    %2 = fadd double %1, %1
1011    %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
1012    %4 = add <2 x i32> %3, %3
1013    store <2 x i32> %4, ptr %q
1014    ret void
1015}
1016
1017declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
1018define void @test_v2i32_v1i64(ptr %p, ptr %q) {
1019; SOFT-LABEL: test_v2i32_v1i64:
1020; SOFT:       @ %bb.0:
1021; SOFT-NEXT:    .save {r4, lr}
1022; SOFT-NEXT:    push {r4, lr}
1023; SOFT-NEXT:    vldr d16, [r0]
1024; SOFT-NEXT:    mov r4, r1
1025; SOFT-NEXT:    vadd.i64 d16, d16, d16
1026; SOFT-NEXT:    vmov r1, r0, d16
1027; SOFT-NEXT:    bl test_v2i32_v1i64_helper
1028; SOFT-NEXT:    vmov d16, r1, r0
1029; SOFT-NEXT:    vrev64.32 d16, d16
1030; SOFT-NEXT:    vadd.i32 d16, d16, d16
1031; SOFT-NEXT:    vrev64.32 d16, d16
1032; SOFT-NEXT:    vstr d16, [r4]
1033; SOFT-NEXT:    pop {r4, pc}
1034;
1035; HARD-LABEL: test_v2i32_v1i64:
1036; HARD:       @ %bb.0:
1037; HARD-NEXT:    .save {r4, lr}
1038; HARD-NEXT:    push {r4, lr}
1039; HARD-NEXT:    vldr d16, [r0]
1040; HARD-NEXT:    mov r4, r1
1041; HARD-NEXT:    vadd.i64 d0, d16, d16
1042; HARD-NEXT:    bl test_v2i32_v1i64_helper
1043; HARD-NEXT:    vrev64.32 d16, d0
1044; HARD-NEXT:    vadd.i32 d16, d16, d16
1045; HARD-NEXT:    vrev64.32 d16, d16
1046; HARD-NEXT:    vstr d16, [r4]
1047; HARD-NEXT:    pop {r4, pc}
1048    %1 = load <1 x i64>, ptr %p
1049    %2 = add <1 x i64> %1, %1
1050    %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
1051    %4 = add <2 x i32> %3, %3
1052    store <2 x i32> %4, ptr %q
1053    ret void
1054}
1055
1056declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
1057define void @test_v2i32_v2f32(ptr %p, ptr %q) {
1058; SOFT-LABEL: test_v2i32_v2f32:
1059; SOFT:       @ %bb.0:
1060; SOFT-NEXT:    .save {r4, lr}
1061; SOFT-NEXT:    push {r4, lr}
1062; SOFT-NEXT:    vldr d16, [r0]
1063; SOFT-NEXT:    mov r4, r1
1064; SOFT-NEXT:    vrev64.32 d16, d16
1065; SOFT-NEXT:    vadd.f32 d16, d16, d16
1066; SOFT-NEXT:    vrev64.32 d16, d16
1067; SOFT-NEXT:    vmov r1, r0, d16
1068; SOFT-NEXT:    bl test_v2i32_v2f32_helper
1069; SOFT-NEXT:    vmov d16, r1, r0
1070; SOFT-NEXT:    vrev64.32 d16, d16
1071; SOFT-NEXT:    vadd.i32 d16, d16, d16
1072; SOFT-NEXT:    vrev64.32 d16, d16
1073; SOFT-NEXT:    vstr d16, [r4]
1074; SOFT-NEXT:    pop {r4, pc}
1075;
1076; HARD-LABEL: test_v2i32_v2f32:
1077; HARD:       @ %bb.0:
1078; HARD-NEXT:    .save {r4, lr}
1079; HARD-NEXT:    push {r4, lr}
1080; HARD-NEXT:    vldr d16, [r0]
1081; HARD-NEXT:    mov r4, r1
1082; HARD-NEXT:    vrev64.32 d16, d16
1083; HARD-NEXT:    vadd.f32 d16, d16, d16
1084; HARD-NEXT:    vrev64.32 d0, d16
1085; HARD-NEXT:    bl test_v2i32_v2f32_helper
1086; HARD-NEXT:    vrev64.32 d16, d0
1087; HARD-NEXT:    vadd.i32 d16, d16, d16
1088; HARD-NEXT:    vrev64.32 d16, d16
1089; HARD-NEXT:    vstr d16, [r4]
1090; HARD-NEXT:    pop {r4, pc}
1091    %1 = load <2 x float>, ptr %p
1092    %2 = fadd <2 x float> %1, %1
1093    %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
1094    %4 = add <2 x i32> %3, %3
1095    store <2 x i32> %4, ptr %q
1096    ret void
1097}
1098
1099declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
1100define void @test_v2i32_v4i16(ptr %p, ptr %q) {
1101; SOFT-LABEL: test_v2i32_v4i16:
1102; SOFT:       @ %bb.0:
1103; SOFT-NEXT:    .save {r4, lr}
1104; SOFT-NEXT:    push {r4, lr}
1105; SOFT-NEXT:    vldr d16, [r0]
1106; SOFT-NEXT:    mov r4, r1
1107; SOFT-NEXT:    vrev64.16 d16, d16
1108; SOFT-NEXT:    vadd.i16 d16, d16, d16
1109; SOFT-NEXT:    vrev64.16 d16, d16
1110; SOFT-NEXT:    vmov r1, r0, d16
1111; SOFT-NEXT:    bl test_v2i32_v4i16_helper
1112; SOFT-NEXT:    vmov d16, r1, r0
1113; SOFT-NEXT:    vrev64.32 d16, d16
1114; SOFT-NEXT:    vadd.i32 d16, d16, d16
1115; SOFT-NEXT:    vrev64.32 d16, d16
1116; SOFT-NEXT:    vstr d16, [r4]
1117; SOFT-NEXT:    pop {r4, pc}
1118;
1119; HARD-LABEL: test_v2i32_v4i16:
1120; HARD:       @ %bb.0:
1121; HARD-NEXT:    .save {r4, lr}
1122; HARD-NEXT:    push {r4, lr}
1123; HARD-NEXT:    vldr d16, [r0]
1124; HARD-NEXT:    mov r4, r1
1125; HARD-NEXT:    vrev64.16 d16, d16
1126; HARD-NEXT:    vadd.i16 d16, d16, d16
1127; HARD-NEXT:    vrev64.16 d0, d16
1128; HARD-NEXT:    bl test_v2i32_v4i16_helper
1129; HARD-NEXT:    vrev64.32 d16, d0
1130; HARD-NEXT:    vadd.i32 d16, d16, d16
1131; HARD-NEXT:    vrev64.32 d16, d16
1132; HARD-NEXT:    vstr d16, [r4]
1133; HARD-NEXT:    pop {r4, pc}
1134    %1 = load <4 x i16>, ptr %p
1135    %2 = add <4 x i16> %1, %1
1136    %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
1137    %4 = add <2 x i32> %3, %3
1138    store <2 x i32> %4, ptr %q
1139    ret void
1140}
1141
1142declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
1143define void @test_v2i32_v8i8(ptr %p, ptr %q) {
1144; SOFT-LABEL: test_v2i32_v8i8:
1145; SOFT:       @ %bb.0:
1146; SOFT-NEXT:    .save {r4, lr}
1147; SOFT-NEXT:    push {r4, lr}
1148; SOFT-NEXT:    vldr d16, [r0]
1149; SOFT-NEXT:    mov r4, r1
1150; SOFT-NEXT:    vrev64.8 d16, d16
1151; SOFT-NEXT:    vadd.i8 d16, d16, d16
1152; SOFT-NEXT:    vrev64.8 d16, d16
1153; SOFT-NEXT:    vmov r1, r0, d16
1154; SOFT-NEXT:    bl test_v2i32_v8i8_helper
1155; SOFT-NEXT:    vmov d16, r1, r0
1156; SOFT-NEXT:    vrev64.32 d16, d16
1157; SOFT-NEXT:    vadd.i32 d16, d16, d16
1158; SOFT-NEXT:    vrev64.32 d16, d16
1159; SOFT-NEXT:    vstr d16, [r4]
1160; SOFT-NEXT:    pop {r4, pc}
1161;
1162; HARD-LABEL: test_v2i32_v8i8:
1163; HARD:       @ %bb.0:
1164; HARD-NEXT:    .save {r4, lr}
1165; HARD-NEXT:    push {r4, lr}
1166; HARD-NEXT:    vldr d16, [r0]
1167; HARD-NEXT:    mov r4, r1
1168; HARD-NEXT:    vrev64.8 d16, d16
1169; HARD-NEXT:    vadd.i8 d16, d16, d16
1170; HARD-NEXT:    vrev64.8 d0, d16
1171; HARD-NEXT:    bl test_v2i32_v8i8_helper
1172; HARD-NEXT:    vrev64.32 d16, d0
1173; HARD-NEXT:    vadd.i32 d16, d16, d16
1174; HARD-NEXT:    vrev64.32 d16, d16
1175; HARD-NEXT:    vstr d16, [r4]
1176; HARD-NEXT:    pop {r4, pc}
1177    %1 = load <8 x i8>, ptr %p
1178    %2 = add <8 x i8> %1, %1
1179    %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
1180    %4 = add <2 x i32> %3, %3
1181    store <2 x i32> %4, ptr %q
1182    ret void
1183}
1184
1185declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
1186define void @test_v4i16_i64(ptr %p, ptr %q) {
1187; SOFT-LABEL: test_v4i16_i64:
1188; SOFT:       @ %bb.0:
1189; SOFT-NEXT:    .save {r4, lr}
1190; SOFT-NEXT:    push {r4, lr}
1191; SOFT-NEXT:    mov r4, r1
1192; SOFT-NEXT:    ldrd r0, r1, [r0]
1193; SOFT-NEXT:    adds r1, r1, r1
1194; SOFT-NEXT:    adc r0, r0, r0
1195; SOFT-NEXT:    bl test_v4i16_i64_helper
1196; SOFT-NEXT:    vmov d16, r1, r0
1197; SOFT-NEXT:    vrev64.16 d16, d16
1198; SOFT-NEXT:    vadd.i16 d16, d16, d16
1199; SOFT-NEXT:    vrev64.16 d16, d16
1200; SOFT-NEXT:    vstr d16, [r4]
1201; SOFT-NEXT:    pop {r4, pc}
1202;
1203; HARD-LABEL: test_v4i16_i64:
1204; HARD:       @ %bb.0:
1205; HARD-NEXT:    .save {r4, lr}
1206; HARD-NEXT:    push {r4, lr}
1207; HARD-NEXT:    mov r4, r1
1208; HARD-NEXT:    ldrd r0, r1, [r0]
1209; HARD-NEXT:    adds r1, r1, r1
1210; HARD-NEXT:    adc r0, r0, r0
1211; HARD-NEXT:    bl test_v4i16_i64_helper
1212; HARD-NEXT:    vrev64.16 d16, d0
1213; HARD-NEXT:    vadd.i16 d16, d16, d16
1214; HARD-NEXT:    vrev64.16 d16, d16
1215; HARD-NEXT:    vstr d16, [r4]
1216; HARD-NEXT:    pop {r4, pc}
1217    %1 = load i64, ptr %p
1218    %2 = add i64 %1, %1
1219    %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
1220    %4 = add <4 x i16> %3, %3
1221    store <4 x i16> %4, ptr %q
1222    ret void
1223}
1224
1225declare <4 x i16> @test_v4i16_f64_helper(double %p)
1226define void @test_v4i16_f64(ptr %p, ptr %q) {
1227; SOFT-LABEL: test_v4i16_f64:
1228; SOFT:       @ %bb.0:
1229; SOFT-NEXT:    .save {r4, lr}
1230; SOFT-NEXT:    push {r4, lr}
1231; SOFT-NEXT:    vldr d16, [r0]
1232; SOFT-NEXT:    mov r4, r1
1233; SOFT-NEXT:    vadd.f64 d16, d16, d16
1234; SOFT-NEXT:    vmov r1, r0, d16
1235; SOFT-NEXT:    bl test_v4i16_f64_helper
1236; SOFT-NEXT:    vmov d16, r1, r0
1237; SOFT-NEXT:    vrev64.16 d16, d16
1238; SOFT-NEXT:    vadd.i16 d16, d16, d16
1239; SOFT-NEXT:    vrev64.16 d16, d16
1240; SOFT-NEXT:    vstr d16, [r4]
1241; SOFT-NEXT:    pop {r4, pc}
1242;
1243; HARD-LABEL: test_v4i16_f64:
1244; HARD:       @ %bb.0:
1245; HARD-NEXT:    .save {r4, lr}
1246; HARD-NEXT:    push {r4, lr}
1247; HARD-NEXT:    vldr d16, [r0]
1248; HARD-NEXT:    mov r4, r1
1249; HARD-NEXT:    vadd.f64 d0, d16, d16
1250; HARD-NEXT:    bl test_v4i16_f64_helper
1251; HARD-NEXT:    vrev64.16 d16, d0
1252; HARD-NEXT:    vadd.i16 d16, d16, d16
1253; HARD-NEXT:    vrev64.16 d16, d16
1254; HARD-NEXT:    vstr d16, [r4]
1255; HARD-NEXT:    pop {r4, pc}
1256    %1 = load double, ptr %p
1257    %2 = fadd double %1, %1
1258    %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
1259    %4 = add <4 x i16> %3, %3
1260    store <4 x i16> %4, ptr %q
1261    ret void
1262}
1263
1264declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
1265define void @test_v4i16_v1i64(ptr %p, ptr %q) {
1266; SOFT-LABEL: test_v4i16_v1i64:
1267; SOFT:       @ %bb.0:
1268; SOFT-NEXT:    .save {r4, lr}
1269; SOFT-NEXT:    push {r4, lr}
1270; SOFT-NEXT:    vldr d16, [r0]
1271; SOFT-NEXT:    mov r4, r1
1272; SOFT-NEXT:    vadd.i64 d16, d16, d16
1273; SOFT-NEXT:    vmov r1, r0, d16
1274; SOFT-NEXT:    bl test_v4i16_v1i64_helper
1275; SOFT-NEXT:    vmov d16, r1, r0
1276; SOFT-NEXT:    vrev64.16 d16, d16
1277; SOFT-NEXT:    vadd.i16 d16, d16, d16
1278; SOFT-NEXT:    vrev64.16 d16, d16
1279; SOFT-NEXT:    vstr d16, [r4]
1280; SOFT-NEXT:    pop {r4, pc}
1281;
1282; HARD-LABEL: test_v4i16_v1i64:
1283; HARD:       @ %bb.0:
1284; HARD-NEXT:    .save {r4, lr}
1285; HARD-NEXT:    push {r4, lr}
1286; HARD-NEXT:    vldr d16, [r0]
1287; HARD-NEXT:    mov r4, r1
1288; HARD-NEXT:    vadd.i64 d0, d16, d16
1289; HARD-NEXT:    bl test_v4i16_v1i64_helper
1290; HARD-NEXT:    vrev64.16 d16, d0
1291; HARD-NEXT:    vadd.i16 d16, d16, d16
1292; HARD-NEXT:    vrev64.16 d16, d16
1293; HARD-NEXT:    vstr d16, [r4]
1294; HARD-NEXT:    pop {r4, pc}
1295    %1 = load <1 x i64>, ptr %p
1296    %2 = add <1 x i64> %1, %1
1297    %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
1298    %4 = add <4 x i16> %3, %3
1299    store <4 x i16> %4, ptr %q
1300    ret void
1301}
1302
1303declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
1304define void @test_v4i16_v2f32(ptr %p, ptr %q) {
1305; SOFT-LABEL: test_v4i16_v2f32:
1306; SOFT:       @ %bb.0:
1307; SOFT-NEXT:    .save {r4, lr}
1308; SOFT-NEXT:    push {r4, lr}
1309; SOFT-NEXT:    vldr d16, [r0]
1310; SOFT-NEXT:    mov r4, r1
1311; SOFT-NEXT:    vrev64.32 d16, d16
1312; SOFT-NEXT:    vadd.f32 d16, d16, d16
1313; SOFT-NEXT:    vrev64.32 d16, d16
1314; SOFT-NEXT:    vmov r1, r0, d16
1315; SOFT-NEXT:    bl test_v4i16_v2f32_helper
1316; SOFT-NEXT:    vmov d16, r1, r0
1317; SOFT-NEXT:    vrev64.16 d16, d16
1318; SOFT-NEXT:    vadd.i16 d16, d16, d16
1319; SOFT-NEXT:    vrev64.16 d16, d16
1320; SOFT-NEXT:    vstr d16, [r4]
1321; SOFT-NEXT:    pop {r4, pc}
1322;
1323; HARD-LABEL: test_v4i16_v2f32:
1324; HARD:       @ %bb.0:
1325; HARD-NEXT:    .save {r4, lr}
1326; HARD-NEXT:    push {r4, lr}
1327; HARD-NEXT:    vldr d16, [r0]
1328; HARD-NEXT:    mov r4, r1
1329; HARD-NEXT:    vrev64.32 d16, d16
1330; HARD-NEXT:    vadd.f32 d16, d16, d16
1331; HARD-NEXT:    vrev64.32 d0, d16
1332; HARD-NEXT:    bl test_v4i16_v2f32_helper
1333; HARD-NEXT:    vrev64.16 d16, d0
1334; HARD-NEXT:    vadd.i16 d16, d16, d16
1335; HARD-NEXT:    vrev64.16 d16, d16
1336; HARD-NEXT:    vstr d16, [r4]
1337; HARD-NEXT:    pop {r4, pc}
1338    %1 = load <2 x float>, ptr %p
1339    %2 = fadd <2 x float> %1, %1
1340    %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
1341    %4 = add <4 x i16> %3, %3
1342    store <4 x i16> %4, ptr %q
1343    ret void
1344}
1345
1346declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
1347define void @test_v4i16_v2i32(ptr %p, ptr %q) {
1348; SOFT-LABEL: test_v4i16_v2i32:
1349; SOFT:       @ %bb.0:
1350; SOFT-NEXT:    .save {r4, lr}
1351; SOFT-NEXT:    push {r4, lr}
1352; SOFT-NEXT:    vldr d16, [r0]
1353; SOFT-NEXT:    mov r4, r1
1354; SOFT-NEXT:    vrev64.32 d16, d16
1355; SOFT-NEXT:    vadd.i32 d16, d16, d16
1356; SOFT-NEXT:    vrev64.32 d16, d16
1357; SOFT-NEXT:    vmov r1, r0, d16
1358; SOFT-NEXT:    bl test_v4i16_v2i32_helper
1359; SOFT-NEXT:    vmov d16, r1, r0
1360; SOFT-NEXT:    vrev64.16 d16, d16
1361; SOFT-NEXT:    vadd.i16 d16, d16, d16
1362; SOFT-NEXT:    vrev64.16 d16, d16
1363; SOFT-NEXT:    vstr d16, [r4]
1364; SOFT-NEXT:    pop {r4, pc}
1365;
1366; HARD-LABEL: test_v4i16_v2i32:
1367; HARD:       @ %bb.0:
1368; HARD-NEXT:    .save {r4, lr}
1369; HARD-NEXT:    push {r4, lr}
1370; HARD-NEXT:    vldr d16, [r0]
1371; HARD-NEXT:    mov r4, r1
1372; HARD-NEXT:    vrev64.32 d16, d16
1373; HARD-NEXT:    vadd.i32 d16, d16, d16
1374; HARD-NEXT:    vrev64.32 d0, d16
1375; HARD-NEXT:    bl test_v4i16_v2i32_helper
1376; HARD-NEXT:    vrev64.16 d16, d0
1377; HARD-NEXT:    vadd.i16 d16, d16, d16
1378; HARD-NEXT:    vrev64.16 d16, d16
1379; HARD-NEXT:    vstr d16, [r4]
1380; HARD-NEXT:    pop {r4, pc}
1381    %1 = load <2 x i32>, ptr %p
1382    %2 = add <2 x i32> %1, %1
1383    %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
1384    %4 = add <4 x i16> %3, %3
1385    store <4 x i16> %4, ptr %q
1386    ret void
1387}
1388
1389declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
1390define void @test_v4i16_v8i8(ptr %p, ptr %q) {
1391; SOFT-LABEL: test_v4i16_v8i8:
1392; SOFT:       @ %bb.0:
1393; SOFT-NEXT:    .save {r4, lr}
1394; SOFT-NEXT:    push {r4, lr}
1395; SOFT-NEXT:    vldr d16, [r0]
1396; SOFT-NEXT:    mov r4, r1
1397; SOFT-NEXT:    vrev64.8 d16, d16
1398; SOFT-NEXT:    vadd.i8 d16, d16, d16
1399; SOFT-NEXT:    vrev64.8 d16, d16
1400; SOFT-NEXT:    vmov r1, r0, d16
1401; SOFT-NEXT:    bl test_v4i16_v8i8_helper
1402; SOFT-NEXT:    vmov d16, r1, r0
1403; SOFT-NEXT:    vrev64.16 d16, d16
1404; SOFT-NEXT:    vadd.i16 d16, d16, d16
1405; SOFT-NEXT:    vrev64.16 d16, d16
1406; SOFT-NEXT:    vstr d16, [r4]
1407; SOFT-NEXT:    pop {r4, pc}
1408;
1409; HARD-LABEL: test_v4i16_v8i8:
1410; HARD:       @ %bb.0:
1411; HARD-NEXT:    .save {r4, lr}
1412; HARD-NEXT:    push {r4, lr}
1413; HARD-NEXT:    vldr d16, [r0]
1414; HARD-NEXT:    mov r4, r1
1415; HARD-NEXT:    vrev64.8 d16, d16
1416; HARD-NEXT:    vadd.i8 d16, d16, d16
1417; HARD-NEXT:    vrev64.8 d0, d16
1418; HARD-NEXT:    bl test_v4i16_v8i8_helper
1419; HARD-NEXT:    vrev64.16 d16, d0
1420; HARD-NEXT:    vadd.i16 d16, d16, d16
1421; HARD-NEXT:    vrev64.16 d16, d16
1422; HARD-NEXT:    vstr d16, [r4]
1423; HARD-NEXT:    pop {r4, pc}
1424    %1 = load <8 x i8>, ptr %p
1425    %2 = add <8 x i8> %1, %1
1426    %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
1427    %4 = add <4 x i16> %3, %3
1428    store <4 x i16> %4, ptr %q
1429    ret void
1430}
1431
1432declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
1433define void @test_v8i8_i64(ptr %p, ptr %q) {
1434; SOFT-LABEL: test_v8i8_i64:
1435; SOFT:       @ %bb.0:
1436; SOFT-NEXT:    .save {r4, lr}
1437; SOFT-NEXT:    push {r4, lr}
1438; SOFT-NEXT:    mov r4, r1
1439; SOFT-NEXT:    ldrd r0, r1, [r0]
1440; SOFT-NEXT:    adds r1, r1, r1
1441; SOFT-NEXT:    adc r0, r0, r0
1442; SOFT-NEXT:    bl test_v8i8_i64_helper
1443; SOFT-NEXT:    vmov d16, r1, r0
1444; SOFT-NEXT:    vrev64.8 d16, d16
1445; SOFT-NEXT:    vadd.i8 d16, d16, d16
1446; SOFT-NEXT:    vrev64.8 d16, d16
1447; SOFT-NEXT:    vstr d16, [r4]
1448; SOFT-NEXT:    pop {r4, pc}
1449;
1450; HARD-LABEL: test_v8i8_i64:
1451; HARD:       @ %bb.0:
1452; HARD-NEXT:    .save {r4, lr}
1453; HARD-NEXT:    push {r4, lr}
1454; HARD-NEXT:    mov r4, r1
1455; HARD-NEXT:    ldrd r0, r1, [r0]
1456; HARD-NEXT:    adds r1, r1, r1
1457; HARD-NEXT:    adc r0, r0, r0
1458; HARD-NEXT:    bl test_v8i8_i64_helper
1459; HARD-NEXT:    vrev64.8 d16, d0
1460; HARD-NEXT:    vadd.i8 d16, d16, d16
1461; HARD-NEXT:    vrev64.8 d16, d16
1462; HARD-NEXT:    vstr d16, [r4]
1463; HARD-NEXT:    pop {r4, pc}
1464    %1 = load i64, ptr %p
1465    %2 = add i64 %1, %1
1466    %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
1467    %4 = add <8 x i8> %3, %3
1468    store <8 x i8> %4, ptr %q
1469    ret void
1470}
1471
1472declare <8 x i8> @test_v8i8_f64_helper(double %p)
1473define void @test_v8i8_f64(ptr %p, ptr %q) {
1474; SOFT-LABEL: test_v8i8_f64:
1475; SOFT:       @ %bb.0:
1476; SOFT-NEXT:    .save {r4, lr}
1477; SOFT-NEXT:    push {r4, lr}
1478; SOFT-NEXT:    vldr d16, [r0]
1479; SOFT-NEXT:    mov r4, r1
1480; SOFT-NEXT:    vadd.f64 d16, d16, d16
1481; SOFT-NEXT:    vmov r1, r0, d16
1482; SOFT-NEXT:    bl test_v8i8_f64_helper
1483; SOFT-NEXT:    vmov d16, r1, r0
1484; SOFT-NEXT:    vrev64.8 d16, d16
1485; SOFT-NEXT:    vadd.i8 d16, d16, d16
1486; SOFT-NEXT:    vrev64.8 d16, d16
1487; SOFT-NEXT:    vstr d16, [r4]
1488; SOFT-NEXT:    pop {r4, pc}
1489;
1490; HARD-LABEL: test_v8i8_f64:
1491; HARD:       @ %bb.0:
1492; HARD-NEXT:    .save {r4, lr}
1493; HARD-NEXT:    push {r4, lr}
1494; HARD-NEXT:    vldr d16, [r0]
1495; HARD-NEXT:    mov r4, r1
1496; HARD-NEXT:    vadd.f64 d0, d16, d16
1497; HARD-NEXT:    bl test_v8i8_f64_helper
1498; HARD-NEXT:    vrev64.8 d16, d0
1499; HARD-NEXT:    vadd.i8 d16, d16, d16
1500; HARD-NEXT:    vrev64.8 d16, d16
1501; HARD-NEXT:    vstr d16, [r4]
1502; HARD-NEXT:    pop {r4, pc}
1503    %1 = load double, ptr %p
1504    %2 = fadd double %1, %1
1505    %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
1506    %4 = add <8 x i8> %3, %3
1507    store <8 x i8> %4, ptr %q
1508    ret void
1509}
1510
1511declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
1512define void @test_v8i8_v1i64(ptr %p, ptr %q) {
1513; SOFT-LABEL: test_v8i8_v1i64:
1514; SOFT:       @ %bb.0:
1515; SOFT-NEXT:    .save {r4, lr}
1516; SOFT-NEXT:    push {r4, lr}
1517; SOFT-NEXT:    vldr d16, [r0]
1518; SOFT-NEXT:    mov r4, r1
1519; SOFT-NEXT:    vadd.i64 d16, d16, d16
1520; SOFT-NEXT:    vmov r1, r0, d16
1521; SOFT-NEXT:    bl test_v8i8_v1i64_helper
1522; SOFT-NEXT:    vmov d16, r1, r0
1523; SOFT-NEXT:    vrev64.8 d16, d16
1524; SOFT-NEXT:    vadd.i8 d16, d16, d16
1525; SOFT-NEXT:    vrev64.8 d16, d16
1526; SOFT-NEXT:    vstr d16, [r4]
1527; SOFT-NEXT:    pop {r4, pc}
1528;
1529; HARD-LABEL: test_v8i8_v1i64:
1530; HARD:       @ %bb.0:
1531; HARD-NEXT:    .save {r4, lr}
1532; HARD-NEXT:    push {r4, lr}
1533; HARD-NEXT:    vldr d16, [r0]
1534; HARD-NEXT:    mov r4, r1
1535; HARD-NEXT:    vadd.i64 d0, d16, d16
1536; HARD-NEXT:    bl test_v8i8_v1i64_helper
1537; HARD-NEXT:    vrev64.8 d16, d0
1538; HARD-NEXT:    vadd.i8 d16, d16, d16
1539; HARD-NEXT:    vrev64.8 d16, d16
1540; HARD-NEXT:    vstr d16, [r4]
1541; HARD-NEXT:    pop {r4, pc}
1542    %1 = load <1 x i64>, ptr %p
1543    %2 = add <1 x i64> %1, %1
1544    %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
1545    %4 = add <8 x i8> %3, %3
1546    store <8 x i8> %4, ptr %q
1547    ret void
1548}
1549
1550declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
1551define void @test_v8i8_v2f32(ptr %p, ptr %q) {
1552; SOFT-LABEL: test_v8i8_v2f32:
1553; SOFT:       @ %bb.0:
1554; SOFT-NEXT:    .save {r4, lr}
1555; SOFT-NEXT:    push {r4, lr}
1556; SOFT-NEXT:    vldr d16, [r0]
1557; SOFT-NEXT:    mov r4, r1
1558; SOFT-NEXT:    vrev64.32 d16, d16
1559; SOFT-NEXT:    vadd.f32 d16, d16, d16
1560; SOFT-NEXT:    vrev64.32 d16, d16
1561; SOFT-NEXT:    vmov r1, r0, d16
1562; SOFT-NEXT:    bl test_v8i8_v2f32_helper
1563; SOFT-NEXT:    vmov d16, r1, r0
1564; SOFT-NEXT:    vrev64.8 d16, d16
1565; SOFT-NEXT:    vadd.i8 d16, d16, d16
1566; SOFT-NEXT:    vrev64.8 d16, d16
1567; SOFT-NEXT:    vstr d16, [r4]
1568; SOFT-NEXT:    pop {r4, pc}
1569;
1570; HARD-LABEL: test_v8i8_v2f32:
1571; HARD:       @ %bb.0:
1572; HARD-NEXT:    .save {r4, lr}
1573; HARD-NEXT:    push {r4, lr}
1574; HARD-NEXT:    vldr d16, [r0]
1575; HARD-NEXT:    mov r4, r1
1576; HARD-NEXT:    vrev64.32 d16, d16
1577; HARD-NEXT:    vadd.f32 d16, d16, d16
1578; HARD-NEXT:    vrev64.32 d0, d16
1579; HARD-NEXT:    bl test_v8i8_v2f32_helper
1580; HARD-NEXT:    vrev64.8 d16, d0
1581; HARD-NEXT:    vadd.i8 d16, d16, d16
1582; HARD-NEXT:    vrev64.8 d16, d16
1583; HARD-NEXT:    vstr d16, [r4]
1584; HARD-NEXT:    pop {r4, pc}
1585    %1 = load <2 x float>, ptr %p
1586    %2 = fadd <2 x float> %1, %1
1587    %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
1588    %4 = add <8 x i8> %3, %3
1589    store <8 x i8> %4, ptr %q
1590    ret void
1591}
1592
1593declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
1594define void @test_v8i8_v2i32(ptr %p, ptr %q) {
1595; SOFT-LABEL: test_v8i8_v2i32:
1596; SOFT:       @ %bb.0:
1597; SOFT-NEXT:    .save {r4, lr}
1598; SOFT-NEXT:    push {r4, lr}
1599; SOFT-NEXT:    vldr d16, [r0]
1600; SOFT-NEXT:    mov r4, r1
1601; SOFT-NEXT:    vrev64.32 d16, d16
1602; SOFT-NEXT:    vadd.i32 d16, d16, d16
1603; SOFT-NEXT:    vrev64.32 d16, d16
1604; SOFT-NEXT:    vmov r1, r0, d16
1605; SOFT-NEXT:    bl test_v8i8_v2i32_helper
1606; SOFT-NEXT:    vmov d16, r1, r0
1607; SOFT-NEXT:    vrev64.8 d16, d16
1608; SOFT-NEXT:    vadd.i8 d16, d16, d16
1609; SOFT-NEXT:    vrev64.8 d16, d16
1610; SOFT-NEXT:    vstr d16, [r4]
1611; SOFT-NEXT:    pop {r4, pc}
1612;
1613; HARD-LABEL: test_v8i8_v2i32:
1614; HARD:       @ %bb.0:
1615; HARD-NEXT:    .save {r4, lr}
1616; HARD-NEXT:    push {r4, lr}
1617; HARD-NEXT:    vldr d16, [r0]
1618; HARD-NEXT:    mov r4, r1
1619; HARD-NEXT:    vrev64.32 d16, d16
1620; HARD-NEXT:    vadd.i32 d16, d16, d16
1621; HARD-NEXT:    vrev64.32 d0, d16
1622; HARD-NEXT:    bl test_v8i8_v2i32_helper
1623; HARD-NEXT:    vrev64.8 d16, d0
1624; HARD-NEXT:    vadd.i8 d16, d16, d16
1625; HARD-NEXT:    vrev64.8 d16, d16
1626; HARD-NEXT:    vstr d16, [r4]
1627; HARD-NEXT:    pop {r4, pc}
1628    %1 = load <2 x i32>, ptr %p
1629    %2 = add <2 x i32> %1, %1
1630    %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
1631    %4 = add <8 x i8> %3, %3
1632    store <8 x i8> %4, ptr %q
1633    ret void
1634}
1635
1636declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
1637define void @test_v8i8_v4i16(ptr %p, ptr %q) {
1638; SOFT-LABEL: test_v8i8_v4i16:
1639; SOFT:       @ %bb.0:
1640; SOFT-NEXT:    .save {r4, lr}
1641; SOFT-NEXT:    push {r4, lr}
1642; SOFT-NEXT:    vldr d16, [r0]
1643; SOFT-NEXT:    mov r4, r1
1644; SOFT-NEXT:    vrev64.16 d16, d16
1645; SOFT-NEXT:    vadd.i16 d16, d16, d16
1646; SOFT-NEXT:    vrev64.16 d16, d16
1647; SOFT-NEXT:    vmov r1, r0, d16
1648; SOFT-NEXT:    bl test_v8i8_v4i16_helper
1649; SOFT-NEXT:    vmov d16, r1, r0
1650; SOFT-NEXT:    vrev64.8 d16, d16
1651; SOFT-NEXT:    vadd.i8 d16, d16, d16
1652; SOFT-NEXT:    vrev64.8 d16, d16
1653; SOFT-NEXT:    vstr d16, [r4]
1654; SOFT-NEXT:    pop {r4, pc}
1655;
1656; HARD-LABEL: test_v8i8_v4i16:
1657; HARD:       @ %bb.0:
1658; HARD-NEXT:    .save {r4, lr}
1659; HARD-NEXT:    push {r4, lr}
1660; HARD-NEXT:    vldr d16, [r0]
1661; HARD-NEXT:    mov r4, r1
1662; HARD-NEXT:    vrev64.16 d16, d16
1663; HARD-NEXT:    vadd.i16 d16, d16, d16
1664; HARD-NEXT:    vrev64.16 d0, d16
1665; HARD-NEXT:    bl test_v8i8_v4i16_helper
1666; HARD-NEXT:    vrev64.8 d16, d0
1667; HARD-NEXT:    vadd.i8 d16, d16, d16
1668; HARD-NEXT:    vrev64.8 d16, d16
1669; HARD-NEXT:    vstr d16, [r4]
1670; HARD-NEXT:    pop {r4, pc}
1671    %1 = load <4 x i16>, ptr %p
1672    %2 = add <4 x i16> %1, %1
1673    %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
1674    %4 = add <8 x i8> %3, %3
1675    store <8 x i8> %4, ptr %q
1676    ret void
1677}
1678
1679declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
1680define void @test_f128_v2f64(ptr %p, ptr %q) {
1681; SOFT-LABEL: test_f128_v2f64:
1682; SOFT:       @ %bb.0:
1683; SOFT-NEXT:    .save {r4, lr}
1684; SOFT-NEXT:    push {r4, lr}
1685; SOFT-NEXT:    .pad #16
1686; SOFT-NEXT:    sub sp, sp, #16
1687; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
1688; SOFT-NEXT:    mov r4, r1
1689; SOFT-NEXT:    vadd.f64 d18, d16, d16
1690; SOFT-NEXT:    vadd.f64 d16, d17, d17
1691; SOFT-NEXT:    vmov r1, r0, d18
1692; SOFT-NEXT:    vmov r3, r2, d16
1693; SOFT-NEXT:    bl test_f128_v2f64_helper
1694; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1695; SOFT-NEXT:    bl __addtf3
1696; SOFT-NEXT:    stm r4, {r0, r1, r2, r3}
1697; SOFT-NEXT:    add sp, sp, #16
1698; SOFT-NEXT:    pop {r4, pc}
1699;
1700; HARD-LABEL: test_f128_v2f64:
1701; HARD:       @ %bb.0:
1702; HARD-NEXT:    .save {r4, lr}
1703; HARD-NEXT:    push {r4, lr}
1704; HARD-NEXT:    .pad #16
1705; HARD-NEXT:    sub sp, sp, #16
1706; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
1707; HARD-NEXT:    mov r4, r1
1708; HARD-NEXT:    vadd.f64 d1, d17, d17
1709; HARD-NEXT:    vadd.f64 d0, d16, d16
1710; HARD-NEXT:    bl test_f128_v2f64_helper
1711; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1712; HARD-NEXT:    bl __addtf3
1713; HARD-NEXT:    stm r4, {r0, r1, r2, r3}
1714; HARD-NEXT:    add sp, sp, #16
1715; HARD-NEXT:    pop {r4, pc}
1716    %1 = load <2 x double>, ptr %p
1717    %2 = fadd <2 x double> %1, %1
1718    %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
1719    %4 = fadd fp128 %3, %3
1720    store fp128 %4, ptr %q
1721    ret void
1722}
1723
1724declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
1725define void @test_f128_v2i64(ptr %p, ptr %q) {
1726; SOFT-LABEL: test_f128_v2i64:
1727; SOFT:       @ %bb.0:
1728; SOFT-NEXT:    .save {r4, lr}
1729; SOFT-NEXT:    push {r4, lr}
1730; SOFT-NEXT:    .pad #16
1731; SOFT-NEXT:    sub sp, sp, #16
1732; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
1733; SOFT-NEXT:    mov r4, r1
1734; SOFT-NEXT:    vadd.i64 q8, q8, q8
1735; SOFT-NEXT:    vmov r1, r0, d16
1736; SOFT-NEXT:    vmov r3, r2, d17
1737; SOFT-NEXT:    bl test_f128_v2i64_helper
1738; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1739; SOFT-NEXT:    bl __addtf3
1740; SOFT-NEXT:    stm r4, {r0, r1, r2, r3}
1741; SOFT-NEXT:    add sp, sp, #16
1742; SOFT-NEXT:    pop {r4, pc}
1743;
1744; HARD-LABEL: test_f128_v2i64:
1745; HARD:       @ %bb.0:
1746; HARD-NEXT:    .save {r4, lr}
1747; HARD-NEXT:    push {r4, lr}
1748; HARD-NEXT:    .pad #16
1749; HARD-NEXT:    sub sp, sp, #16
1750; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
1751; HARD-NEXT:    mov r4, r1
1752; HARD-NEXT:    vadd.i64 q0, q8, q8
1753; HARD-NEXT:    bl test_f128_v2i64_helper
1754; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1755; HARD-NEXT:    bl __addtf3
1756; HARD-NEXT:    stm r4, {r0, r1, r2, r3}
1757; HARD-NEXT:    add sp, sp, #16
1758; HARD-NEXT:    pop {r4, pc}
1759    %1 = load <2 x i64>, ptr %p
1760    %2 = add <2 x i64> %1, %1
1761    %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
1762    %4 = fadd fp128 %3, %3
1763    store fp128 %4, ptr %q
1764    ret void
1765}
1766
1767declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
1768define void @test_f128_v4f32(ptr %p, ptr %q) {
1769; SOFT-LABEL: test_f128_v4f32:
1770; SOFT:       @ %bb.0:
1771; SOFT-NEXT:    .save {r4, lr}
1772; SOFT-NEXT:    push {r4, lr}
1773; SOFT-NEXT:    .pad #16
1774; SOFT-NEXT:    sub sp, sp, #16
1775; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
1776; SOFT-NEXT:    mov r4, r1
1777; SOFT-NEXT:    vrev64.32 q8, q8
1778; SOFT-NEXT:    vadd.f32 q8, q8, q8
1779; SOFT-NEXT:    vrev64.32 q8, q8
1780; SOFT-NEXT:    vmov r1, r0, d16
1781; SOFT-NEXT:    vmov r3, r2, d17
1782; SOFT-NEXT:    bl test_f128_v4f32_helper
1783; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1784; SOFT-NEXT:    bl __addtf3
1785; SOFT-NEXT:    stm r4, {r0, r1, r2, r3}
1786; SOFT-NEXT:    add sp, sp, #16
1787; SOFT-NEXT:    pop {r4, pc}
1788;
1789; HARD-LABEL: test_f128_v4f32:
1790; HARD:       @ %bb.0:
1791; HARD-NEXT:    .save {r4, lr}
1792; HARD-NEXT:    push {r4, lr}
1793; HARD-NEXT:    .pad #16
1794; HARD-NEXT:    sub sp, sp, #16
1795; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
1796; HARD-NEXT:    mov r4, r1
1797; HARD-NEXT:    vrev64.32 q8, q8
1798; HARD-NEXT:    vadd.f32 q8, q8, q8
1799; HARD-NEXT:    vrev64.32 q0, q8
1800; HARD-NEXT:    bl test_f128_v4f32_helper
1801; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1802; HARD-NEXT:    bl __addtf3
1803; HARD-NEXT:    stm r4, {r0, r1, r2, r3}
1804; HARD-NEXT:    add sp, sp, #16
1805; HARD-NEXT:    pop {r4, pc}
1806    %1 = load <4 x float>, ptr %p
1807    %2 = fadd <4 x float> %1, %1
1808    %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
1809    %4 = fadd fp128 %3, %3
1810    store fp128 %4, ptr %q
1811    ret void
1812}
1813
1814declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
1815define void @test_f128_v4i32(ptr %p, ptr %q) {
1816; SOFT-LABEL: test_f128_v4i32:
1817; SOFT:       @ %bb.0:
1818; SOFT-NEXT:    .save {r4, lr}
1819; SOFT-NEXT:    push {r4, lr}
1820; SOFT-NEXT:    .pad #16
1821; SOFT-NEXT:    sub sp, sp, #16
1822; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
1823; SOFT-NEXT:    mov r4, r1
1824; SOFT-NEXT:    vrev64.32 q8, q8
1825; SOFT-NEXT:    vadd.i32 q8, q8, q8
1826; SOFT-NEXT:    vrev64.32 q8, q8
1827; SOFT-NEXT:    vmov r1, r0, d16
1828; SOFT-NEXT:    vmov r3, r2, d17
1829; SOFT-NEXT:    bl test_f128_v4i32_helper
1830; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1831; SOFT-NEXT:    bl __addtf3
1832; SOFT-NEXT:    stm r4, {r0, r1, r2, r3}
1833; SOFT-NEXT:    add sp, sp, #16
1834; SOFT-NEXT:    pop {r4, pc}
1835;
1836; HARD-LABEL: test_f128_v4i32:
1837; HARD:       @ %bb.0:
1838; HARD-NEXT:    .save {r4, lr}
1839; HARD-NEXT:    push {r4, lr}
1840; HARD-NEXT:    .pad #16
1841; HARD-NEXT:    sub sp, sp, #16
1842; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
1843; HARD-NEXT:    mov r4, r1
1844; HARD-NEXT:    vrev64.32 q8, q8
1845; HARD-NEXT:    vadd.i32 q8, q8, q8
1846; HARD-NEXT:    vrev64.32 q0, q8
1847; HARD-NEXT:    bl test_f128_v4i32_helper
1848; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1849; HARD-NEXT:    bl __addtf3
1850; HARD-NEXT:    stm r4, {r0, r1, r2, r3}
1851; HARD-NEXT:    add sp, sp, #16
1852; HARD-NEXT:    pop {r4, pc}
1853    %1 = load <4 x i32>, ptr %p
1854    %2 = add <4 x i32> %1, %1
1855    %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
1856    %4 = fadd fp128 %3, %3
1857    store fp128 %4, ptr %q
1858    ret void
1859}
1860
1861declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
1862define void @test_f128_v8i16(ptr %p, ptr %q) {
1863; SOFT-LABEL: test_f128_v8i16:
1864; SOFT:       @ %bb.0:
1865; SOFT-NEXT:    .save {r4, lr}
1866; SOFT-NEXT:    push {r4, lr}
1867; SOFT-NEXT:    .pad #16
1868; SOFT-NEXT:    sub sp, sp, #16
1869; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
1870; SOFT-NEXT:    mov r4, r1
1871; SOFT-NEXT:    vrev64.16 q8, q8
1872; SOFT-NEXT:    vadd.i16 q8, q8, q8
1873; SOFT-NEXT:    vrev64.16 q8, q8
1874; SOFT-NEXT:    vmov r1, r0, d16
1875; SOFT-NEXT:    vmov r3, r2, d17
1876; SOFT-NEXT:    bl test_f128_v8i16_helper
1877; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1878; SOFT-NEXT:    bl __addtf3
1879; SOFT-NEXT:    stm r4, {r0, r1, r2, r3}
1880; SOFT-NEXT:    add sp, sp, #16
1881; SOFT-NEXT:    pop {r4, pc}
1882;
1883; HARD-LABEL: test_f128_v8i16:
1884; HARD:       @ %bb.0:
1885; HARD-NEXT:    .save {r4, lr}
1886; HARD-NEXT:    push {r4, lr}
1887; HARD-NEXT:    .pad #16
1888; HARD-NEXT:    sub sp, sp, #16
1889; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
1890; HARD-NEXT:    mov r4, r1
1891; HARD-NEXT:    vrev64.16 q8, q8
1892; HARD-NEXT:    vadd.i16 q8, q8, q8
1893; HARD-NEXT:    vrev64.16 q0, q8
1894; HARD-NEXT:    bl test_f128_v8i16_helper
1895; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1896; HARD-NEXT:    bl __addtf3
1897; HARD-NEXT:    stm r4, {r0, r1, r2, r3}
1898; HARD-NEXT:    add sp, sp, #16
1899; HARD-NEXT:    pop {r4, pc}
1900    %1 = load <8 x i16>, ptr %p
1901    %2 = add <8 x i16> %1, %1
1902    %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
1903    %4 = fadd fp128 %3, %3
1904    store fp128 %4, ptr %q
1905    ret void
1906}
1907
1908declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
1909define void @test_f128_v16i8(ptr %p, ptr %q) {
1910; SOFT-LABEL: test_f128_v16i8:
1911; SOFT:       @ %bb.0:
1912; SOFT-NEXT:    .save {r4, lr}
1913; SOFT-NEXT:    push {r4, lr}
1914; SOFT-NEXT:    .pad #16
1915; SOFT-NEXT:    sub sp, sp, #16
1916; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
1917; SOFT-NEXT:    mov r4, r1
1918; SOFT-NEXT:    vrev64.8 q8, q8
1919; SOFT-NEXT:    vadd.i8 q8, q8, q8
1920; SOFT-NEXT:    vrev64.8 q8, q8
1921; SOFT-NEXT:    vmov r1, r0, d16
1922; SOFT-NEXT:    vmov r3, r2, d17
1923; SOFT-NEXT:    bl test_f128_v16i8_helper
1924; SOFT-NEXT:    stm sp, {r0, r1, r2, r3}
1925; SOFT-NEXT:    bl __addtf3
1926; SOFT-NEXT:    stm r4, {r0, r1, r2, r3}
1927; SOFT-NEXT:    add sp, sp, #16
1928; SOFT-NEXT:    pop {r4, pc}
1929;
1930; HARD-LABEL: test_f128_v16i8:
1931; HARD:       @ %bb.0:
1932; HARD-NEXT:    .save {r4, lr}
1933; HARD-NEXT:    push {r4, lr}
1934; HARD-NEXT:    .pad #16
1935; HARD-NEXT:    sub sp, sp, #16
1936; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
1937; HARD-NEXT:    mov r4, r1
1938; HARD-NEXT:    vrev64.8 q8, q8
1939; HARD-NEXT:    vadd.i8 q8, q8, q8
1940; HARD-NEXT:    vrev64.8 q0, q8
1941; HARD-NEXT:    bl test_f128_v16i8_helper
1942; HARD-NEXT:    stm sp, {r0, r1, r2, r3}
1943; HARD-NEXT:    bl __addtf3
1944; HARD-NEXT:    stm r4, {r0, r1, r2, r3}
1945; HARD-NEXT:    add sp, sp, #16
1946; HARD-NEXT:    pop {r4, pc}
1947    %1 = load <16 x i8>, ptr %p
1948    %2 = add <16 x i8> %1, %1
1949    %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
1950    %4 = fadd fp128 %3, %3
1951    store fp128 %4, ptr %q
1952    ret void
1953}
1954
1955declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
1956define void @test_v2f64_f128(ptr %p, ptr %q) {
1957; SOFT-LABEL: test_v2f64_f128:
1958; SOFT:       @ %bb.0:
1959; SOFT-NEXT:    .save {r4, r5, r11, lr}
1960; SOFT-NEXT:    push {r4, r5, r11, lr}
1961; SOFT-NEXT:    .pad #16
1962; SOFT-NEXT:    sub sp, sp, #16
1963; SOFT-NEXT:    ldr r4, [r0]
1964; SOFT-NEXT:    mov r5, r1
1965; SOFT-NEXT:    ldmib r0, {r1, r2, r3}
1966; SOFT-NEXT:    mov r0, r4
1967; SOFT-NEXT:    str r4, [sp]
1968; SOFT-NEXT:    stmib sp, {r1, r2, r3}
1969; SOFT-NEXT:    bl __addtf3
1970; SOFT-NEXT:    bl test_v2f64_f128_helper
1971; SOFT-NEXT:    vmov d16, r3, r2
1972; SOFT-NEXT:    vmov d17, r1, r0
1973; SOFT-NEXT:    vadd.f64 d19, d16, d16
1974; SOFT-NEXT:    vadd.f64 d18, d17, d17
1975; SOFT-NEXT:    vst1.64 {d18, d19}, [r5]
1976; SOFT-NEXT:    add sp, sp, #16
1977; SOFT-NEXT:    pop {r4, r5, r11, pc}
1978;
1979; HARD-LABEL: test_v2f64_f128:
1980; HARD:       @ %bb.0:
1981; HARD-NEXT:    .save {r4, r5, r11, lr}
1982; HARD-NEXT:    push {r4, r5, r11, lr}
1983; HARD-NEXT:    .pad #16
1984; HARD-NEXT:    sub sp, sp, #16
1985; HARD-NEXT:    ldr r4, [r0]
1986; HARD-NEXT:    mov r5, r1
1987; HARD-NEXT:    ldmib r0, {r1, r2, r3}
1988; HARD-NEXT:    mov r0, r4
1989; HARD-NEXT:    str r4, [sp]
1990; HARD-NEXT:    stmib sp, {r1, r2, r3}
1991; HARD-NEXT:    bl __addtf3
1992; HARD-NEXT:    bl test_v2f64_f128_helper
1993; HARD-NEXT:    vadd.f64 d17, d1, d1
1994; HARD-NEXT:    vadd.f64 d16, d0, d0
1995; HARD-NEXT:    vst1.64 {d16, d17}, [r5]
1996; HARD-NEXT:    add sp, sp, #16
1997; HARD-NEXT:    pop {r4, r5, r11, pc}
1998    %1 = load fp128, ptr %p
1999    %2 = fadd fp128 %1, %1
2000    %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
2001    %4 = fadd <2 x double> %3, %3
2002    store <2 x double> %4, ptr %q
2003    ret void
2004
2005}
2006
2007declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
2008define void @test_v2f64_v2i64(ptr %p, ptr %q) {
2009; SOFT-LABEL: test_v2f64_v2i64:
2010; SOFT:       @ %bb.0:
2011; SOFT-NEXT:    .save {r4, lr}
2012; SOFT-NEXT:    push {r4, lr}
2013; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2014; SOFT-NEXT:    mov r4, r1
2015; SOFT-NEXT:    vadd.i64 q8, q8, q8
2016; SOFT-NEXT:    vmov r1, r0, d16
2017; SOFT-NEXT:    vmov r3, r2, d17
2018; SOFT-NEXT:    bl test_v2f64_v2i64_helper
2019; SOFT-NEXT:    vmov d16, r3, r2
2020; SOFT-NEXT:    vmov d17, r1, r0
2021; SOFT-NEXT:    vadd.f64 d19, d16, d16
2022; SOFT-NEXT:    vadd.f64 d18, d17, d17
2023; SOFT-NEXT:    vst1.64 {d18, d19}, [r4]
2024; SOFT-NEXT:    pop {r4, pc}
2025;
2026; HARD-LABEL: test_v2f64_v2i64:
2027; HARD:       @ %bb.0:
2028; HARD-NEXT:    .save {r4, lr}
2029; HARD-NEXT:    push {r4, lr}
2030; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2031; HARD-NEXT:    mov r4, r1
2032; HARD-NEXT:    vadd.i64 q0, q8, q8
2033; HARD-NEXT:    bl test_v2f64_v2i64_helper
2034; HARD-NEXT:    vadd.f64 d17, d1, d1
2035; HARD-NEXT:    vadd.f64 d16, d0, d0
2036; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2037; HARD-NEXT:    pop {r4, pc}
2038    %1 = load <2 x i64>, ptr %p
2039    %2 = add <2 x i64> %1, %1
2040    %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
2041    %4 = fadd <2 x double> %3, %3
2042    store <2 x double> %4, ptr %q
2043    ret void
2044}
2045
2046declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
2047define void @test_v2f64_v4f32(ptr %p, ptr %q) {
2048; SOFT-LABEL: test_v2f64_v4f32:
2049; SOFT:       @ %bb.0:
2050; SOFT-NEXT:    .save {r4, lr}
2051; SOFT-NEXT:    push {r4, lr}
2052; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2053; SOFT-NEXT:    mov r4, r1
2054; SOFT-NEXT:    vrev64.32 q8, q8
2055; SOFT-NEXT:    vadd.f32 q8, q8, q8
2056; SOFT-NEXT:    vrev64.32 q8, q8
2057; SOFT-NEXT:    vmov r1, r0, d16
2058; SOFT-NEXT:    vmov r3, r2, d17
2059; SOFT-NEXT:    bl test_v2f64_v4f32_helper
2060; SOFT-NEXT:    vmov d16, r3, r2
2061; SOFT-NEXT:    vmov d17, r1, r0
2062; SOFT-NEXT:    vadd.f64 d19, d16, d16
2063; SOFT-NEXT:    vadd.f64 d18, d17, d17
2064; SOFT-NEXT:    vst1.64 {d18, d19}, [r4]
2065; SOFT-NEXT:    pop {r4, pc}
2066;
2067; HARD-LABEL: test_v2f64_v4f32:
2068; HARD:       @ %bb.0:
2069; HARD-NEXT:    .save {r4, lr}
2070; HARD-NEXT:    push {r4, lr}
2071; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2072; HARD-NEXT:    mov r4, r1
2073; HARD-NEXT:    vrev64.32 q8, q8
2074; HARD-NEXT:    vadd.f32 q8, q8, q8
2075; HARD-NEXT:    vrev64.32 q0, q8
2076; HARD-NEXT:    bl test_v2f64_v4f32_helper
2077; HARD-NEXT:    vadd.f64 d17, d1, d1
2078; HARD-NEXT:    vadd.f64 d16, d0, d0
2079; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2080; HARD-NEXT:    pop {r4, pc}
2081    %1 = load <4 x float>, ptr %p
2082    %2 = fadd <4 x float> %1, %1
2083    %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
2084    %4 = fadd <2 x double> %3, %3
2085    store <2 x double> %4, ptr %q
2086    ret void
2087}
2088
2089declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
2090define void @test_v2f64_v4i32(ptr %p, ptr %q) {
2091; SOFT-LABEL: test_v2f64_v4i32:
2092; SOFT:       @ %bb.0:
2093; SOFT-NEXT:    .save {r4, lr}
2094; SOFT-NEXT:    push {r4, lr}
2095; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2096; SOFT-NEXT:    mov r4, r1
2097; SOFT-NEXT:    vrev64.32 q8, q8
2098; SOFT-NEXT:    vadd.i32 q8, q8, q8
2099; SOFT-NEXT:    vrev64.32 q8, q8
2100; SOFT-NEXT:    vmov r1, r0, d16
2101; SOFT-NEXT:    vmov r3, r2, d17
2102; SOFT-NEXT:    bl test_v2f64_v4i32_helper
2103; SOFT-NEXT:    vmov d16, r3, r2
2104; SOFT-NEXT:    vmov d17, r1, r0
2105; SOFT-NEXT:    vadd.f64 d19, d16, d16
2106; SOFT-NEXT:    vadd.f64 d18, d17, d17
2107; SOFT-NEXT:    vst1.64 {d18, d19}, [r4]
2108; SOFT-NEXT:    pop {r4, pc}
2109;
2110; HARD-LABEL: test_v2f64_v4i32:
2111; HARD:       @ %bb.0:
2112; HARD-NEXT:    .save {r4, lr}
2113; HARD-NEXT:    push {r4, lr}
2114; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2115; HARD-NEXT:    mov r4, r1
2116; HARD-NEXT:    vrev64.32 q8, q8
2117; HARD-NEXT:    vadd.i32 q8, q8, q8
2118; HARD-NEXT:    vrev64.32 q0, q8
2119; HARD-NEXT:    bl test_v2f64_v4i32_helper
2120; HARD-NEXT:    vadd.f64 d17, d1, d1
2121; HARD-NEXT:    vadd.f64 d16, d0, d0
2122; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2123; HARD-NEXT:    pop {r4, pc}
2124    %1 = load <4 x i32>, ptr %p
2125    %2 = add <4 x i32> %1, %1
2126    %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
2127    %4 = fadd <2 x double> %3, %3
2128    store <2 x double> %4, ptr %q
2129    ret void
2130}
2131
2132declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
2133define void @test_v2f64_v8i16(ptr %p, ptr %q) {
2134; SOFT-LABEL: test_v2f64_v8i16:
2135; SOFT:       @ %bb.0:
2136; SOFT-NEXT:    .save {r4, lr}
2137; SOFT-NEXT:    push {r4, lr}
2138; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2139; SOFT-NEXT:    mov r4, r1
2140; SOFT-NEXT:    vrev64.16 q8, q8
2141; SOFT-NEXT:    vadd.i16 q8, q8, q8
2142; SOFT-NEXT:    vrev64.16 q8, q8
2143; SOFT-NEXT:    vmov r1, r0, d16
2144; SOFT-NEXT:    vmov r3, r2, d17
2145; SOFT-NEXT:    bl test_v2f64_v8i16_helper
2146; SOFT-NEXT:    vmov d16, r3, r2
2147; SOFT-NEXT:    vmov d17, r1, r0
2148; SOFT-NEXT:    vadd.f64 d19, d16, d16
2149; SOFT-NEXT:    vadd.f64 d18, d17, d17
2150; SOFT-NEXT:    vst1.64 {d18, d19}, [r4]
2151; SOFT-NEXT:    pop {r4, pc}
2152;
2153; HARD-LABEL: test_v2f64_v8i16:
2154; HARD:       @ %bb.0:
2155; HARD-NEXT:    .save {r4, lr}
2156; HARD-NEXT:    push {r4, lr}
2157; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2158; HARD-NEXT:    mov r4, r1
2159; HARD-NEXT:    vrev64.16 q8, q8
2160; HARD-NEXT:    vadd.i16 q8, q8, q8
2161; HARD-NEXT:    vrev64.16 q0, q8
2162; HARD-NEXT:    bl test_v2f64_v8i16_helper
2163; HARD-NEXT:    vadd.f64 d17, d1, d1
2164; HARD-NEXT:    vadd.f64 d16, d0, d0
2165; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2166; HARD-NEXT:    pop {r4, pc}
2167    %1 = load <8 x i16>, ptr %p
2168    %2 = add <8 x i16> %1, %1
2169    %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
2170    %4 = fadd <2 x double> %3, %3
2171    store <2 x double> %4, ptr %q
2172    ret void
2173}
2174
2175declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
2176define void @test_v2f64_v16i8(ptr %p, ptr %q) {
2177; SOFT-LABEL: test_v2f64_v16i8:
2178; SOFT:       @ %bb.0:
2179; SOFT-NEXT:    .save {r4, lr}
2180; SOFT-NEXT:    push {r4, lr}
2181; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2182; SOFT-NEXT:    mov r4, r1
2183; SOFT-NEXT:    vrev64.8 q8, q8
2184; SOFT-NEXT:    vadd.i8 q8, q8, q8
2185; SOFT-NEXT:    vrev64.8 q8, q8
2186; SOFT-NEXT:    vmov r1, r0, d16
2187; SOFT-NEXT:    vmov r3, r2, d17
2188; SOFT-NEXT:    bl test_v2f64_v16i8_helper
2189; SOFT-NEXT:    vmov d16, r3, r2
2190; SOFT-NEXT:    vmov d17, r1, r0
2191; SOFT-NEXT:    vadd.f64 d19, d16, d16
2192; SOFT-NEXT:    vadd.f64 d18, d17, d17
2193; SOFT-NEXT:    vst1.64 {d18, d19}, [r4]
2194; SOFT-NEXT:    pop {r4, pc}
2195;
2196; HARD-LABEL: test_v2f64_v16i8:
2197; HARD:       @ %bb.0:
2198; HARD-NEXT:    .save {r4, lr}
2199; HARD-NEXT:    push {r4, lr}
2200; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2201; HARD-NEXT:    mov r4, r1
2202; HARD-NEXT:    vrev64.8 q8, q8
2203; HARD-NEXT:    vadd.i8 q8, q8, q8
2204; HARD-NEXT:    vrev64.8 q0, q8
2205; HARD-NEXT:    bl test_v2f64_v16i8_helper
2206; HARD-NEXT:    vadd.f64 d17, d1, d1
2207; HARD-NEXT:    vadd.f64 d16, d0, d0
2208; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2209; HARD-NEXT:    pop {r4, pc}
2210    %1 = load <16 x i8>, ptr %p
2211    %2 = add <16 x i8> %1, %1
2212    %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
2213    %4 = fadd <2 x double> %3, %3
2214    store <2 x double> %4, ptr %q
2215    ret void
2216}
2217
2218declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
2219define void @test_v2i64_f128(ptr %p, ptr %q) {
2220; SOFT-LABEL: test_v2i64_f128:
2221; SOFT:       @ %bb.0:
2222; SOFT-NEXT:    .save {r4, r5, r11, lr}
2223; SOFT-NEXT:    push {r4, r5, r11, lr}
2224; SOFT-NEXT:    .pad #16
2225; SOFT-NEXT:    sub sp, sp, #16
2226; SOFT-NEXT:    ldr r4, [r0]
2227; SOFT-NEXT:    mov r5, r1
2228; SOFT-NEXT:    ldmib r0, {r1, r2, r3}
2229; SOFT-NEXT:    mov r0, r4
2230; SOFT-NEXT:    str r4, [sp]
2231; SOFT-NEXT:    stmib sp, {r1, r2, r3}
2232; SOFT-NEXT:    bl __addtf3
2233; SOFT-NEXT:    bl test_v2i64_f128_helper
2234; SOFT-NEXT:    vmov d17, r3, r2
2235; SOFT-NEXT:    vmov d16, r1, r0
2236; SOFT-NEXT:    vadd.i64 q8, q8, q8
2237; SOFT-NEXT:    vst1.64 {d16, d17}, [r5]
2238; SOFT-NEXT:    add sp, sp, #16
2239; SOFT-NEXT:    pop {r4, r5, r11, pc}
2240;
2241; HARD-LABEL: test_v2i64_f128:
2242; HARD:       @ %bb.0:
2243; HARD-NEXT:    .save {r4, r5, r11, lr}
2244; HARD-NEXT:    push {r4, r5, r11, lr}
2245; HARD-NEXT:    .pad #16
2246; HARD-NEXT:    sub sp, sp, #16
2247; HARD-NEXT:    ldr r4, [r0]
2248; HARD-NEXT:    mov r5, r1
2249; HARD-NEXT:    ldmib r0, {r1, r2, r3}
2250; HARD-NEXT:    mov r0, r4
2251; HARD-NEXT:    str r4, [sp]
2252; HARD-NEXT:    stmib sp, {r1, r2, r3}
2253; HARD-NEXT:    bl __addtf3
2254; HARD-NEXT:    bl test_v2i64_f128_helper
2255; HARD-NEXT:    vadd.i64 q8, q0, q0
2256; HARD-NEXT:    vst1.64 {d16, d17}, [r5]
2257; HARD-NEXT:    add sp, sp, #16
2258; HARD-NEXT:    pop {r4, r5, r11, pc}
2259    %1 = load fp128, ptr %p
2260    %2 = fadd fp128 %1, %1
2261    %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
2262    %4 = add <2 x i64> %3, %3
2263    store <2 x i64> %4, ptr %q
2264    ret void
2265}
2266
2267declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
2268define void @test_v2i64_v2f64(ptr %p, ptr %q) {
2269; SOFT-LABEL: test_v2i64_v2f64:
2270; SOFT:       @ %bb.0:
2271; SOFT-NEXT:    .save {r4, lr}
2272; SOFT-NEXT:    push {r4, lr}
2273; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2274; SOFT-NEXT:    mov r4, r1
2275; SOFT-NEXT:    vadd.f64 d18, d16, d16
2276; SOFT-NEXT:    vadd.f64 d16, d17, d17
2277; SOFT-NEXT:    vmov r1, r0, d18
2278; SOFT-NEXT:    vmov r3, r2, d16
2279; SOFT-NEXT:    bl test_v2i64_v2f64_helper
2280; SOFT-NEXT:    vmov d17, r3, r2
2281; SOFT-NEXT:    vmov d16, r1, r0
2282; SOFT-NEXT:    vadd.i64 q8, q8, q8
2283; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2284; SOFT-NEXT:    pop {r4, pc}
2285;
2286; HARD-LABEL: test_v2i64_v2f64:
2287; HARD:       @ %bb.0:
2288; HARD-NEXT:    .save {r4, lr}
2289; HARD-NEXT:    push {r4, lr}
2290; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2291; HARD-NEXT:    mov r4, r1
2292; HARD-NEXT:    vadd.f64 d1, d17, d17
2293; HARD-NEXT:    vadd.f64 d0, d16, d16
2294; HARD-NEXT:    bl test_v2i64_v2f64_helper
2295; HARD-NEXT:    vadd.i64 q8, q0, q0
2296; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2297; HARD-NEXT:    pop {r4, pc}
2298    %1 = load <2 x double>, ptr %p
2299    %2 = fadd <2 x double> %1, %1
2300    %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
2301    %4 = add <2 x i64> %3, %3
2302    store <2 x i64> %4, ptr %q
2303    ret void
2304}
2305
2306declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p)
2307define void @test_v2i64_v4f32(ptr %p, ptr %q) {
2308; SOFT-LABEL: test_v2i64_v4f32:
2309; SOFT:       @ %bb.0:
2310; SOFT-NEXT:    .save {r4, lr}
2311; SOFT-NEXT:    push {r4, lr}
2312; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2313; SOFT-NEXT:    mov r4, r1
2314; SOFT-NEXT:    vrev64.32 q8, q8
2315; SOFT-NEXT:    vadd.f32 q8, q8, q8
2316; SOFT-NEXT:    vrev64.32 q8, q8
2317; SOFT-NEXT:    vmov r1, r0, d16
2318; SOFT-NEXT:    vmov r3, r2, d17
2319; SOFT-NEXT:    bl test_v2i64_v4f32_helper
2320; SOFT-NEXT:    vmov d17, r3, r2
2321; SOFT-NEXT:    vmov d16, r1, r0
2322; SOFT-NEXT:    vadd.i64 q8, q8, q8
2323; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2324; SOFT-NEXT:    pop {r4, pc}
2325;
2326; HARD-LABEL: test_v2i64_v4f32:
2327; HARD:       @ %bb.0:
2328; HARD-NEXT:    .save {r4, lr}
2329; HARD-NEXT:    push {r4, lr}
2330; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2331; HARD-NEXT:    mov r4, r1
2332; HARD-NEXT:    vrev64.32 q8, q8
2333; HARD-NEXT:    vadd.f32 q8, q8, q8
2334; HARD-NEXT:    vrev64.32 q0, q8
2335; HARD-NEXT:    bl test_v2i64_v4f32_helper
2336; HARD-NEXT:    vadd.i64 q8, q0, q0
2337; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2338; HARD-NEXT:    pop {r4, pc}
2339    %1 = load <4 x float>, ptr %p
2340    %2 = fadd <4 x float> %1, %1
2341    %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
2342    %4 = add <2 x i64> %3, %3
2343    store <2 x i64> %4, ptr %q
2344    ret void
2345}
2346
2347declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
2348define void @test_v2i64_v4i32(ptr %p, ptr %q) {
2349; SOFT-LABEL: test_v2i64_v4i32:
2350; SOFT:       @ %bb.0:
2351; SOFT-NEXT:    .save {r4, lr}
2352; SOFT-NEXT:    push {r4, lr}
2353; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2354; SOFT-NEXT:    mov r4, r1
2355; SOFT-NEXT:    vrev64.32 q8, q8
2356; SOFT-NEXT:    vadd.i32 q8, q8, q8
2357; SOFT-NEXT:    vrev64.32 q8, q8
2358; SOFT-NEXT:    vmov r1, r0, d16
2359; SOFT-NEXT:    vmov r3, r2, d17
2360; SOFT-NEXT:    bl test_v2i64_v4i32_helper
2361; SOFT-NEXT:    vmov d17, r3, r2
2362; SOFT-NEXT:    vmov d16, r1, r0
2363; SOFT-NEXT:    vadd.i64 q8, q8, q8
2364; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2365; SOFT-NEXT:    pop {r4, pc}
2366;
2367; HARD-LABEL: test_v2i64_v4i32:
2368; HARD:       @ %bb.0:
2369; HARD-NEXT:    .save {r4, lr}
2370; HARD-NEXT:    push {r4, lr}
2371; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2372; HARD-NEXT:    mov r4, r1
2373; HARD-NEXT:    vrev64.32 q8, q8
2374; HARD-NEXT:    vadd.i32 q8, q8, q8
2375; HARD-NEXT:    vrev64.32 q0, q8
2376; HARD-NEXT:    bl test_v2i64_v4i32_helper
2377; HARD-NEXT:    vadd.i64 q8, q0, q0
2378; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2379; HARD-NEXT:    pop {r4, pc}
2380    %1 = load <4 x i32>, ptr %p
2381    %2 = add <4 x i32> %1, %1
2382    %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
2383    %4 = add <2 x i64> %3, %3
2384    store <2 x i64> %4, ptr %q
2385    ret void
2386}
2387
2388declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
2389define void @test_v2i64_v8i16(ptr %p, ptr %q) {
2390; SOFT-LABEL: test_v2i64_v8i16:
2391; SOFT:       @ %bb.0:
2392; SOFT-NEXT:    .save {r4, lr}
2393; SOFT-NEXT:    push {r4, lr}
2394; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2395; SOFT-NEXT:    mov r4, r1
2396; SOFT-NEXT:    vrev64.16 q8, q8
2397; SOFT-NEXT:    vadd.i16 q8, q8, q8
2398; SOFT-NEXT:    vrev64.16 q8, q8
2399; SOFT-NEXT:    vmov r1, r0, d16
2400; SOFT-NEXT:    vmov r3, r2, d17
2401; SOFT-NEXT:    bl test_v2i64_v8i16_helper
2402; SOFT-NEXT:    vmov d17, r3, r2
2403; SOFT-NEXT:    vmov d16, r1, r0
2404; SOFT-NEXT:    vadd.i64 q8, q8, q8
2405; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2406; SOFT-NEXT:    pop {r4, pc}
2407;
2408; HARD-LABEL: test_v2i64_v8i16:
2409; HARD:       @ %bb.0:
2410; HARD-NEXT:    .save {r4, lr}
2411; HARD-NEXT:    push {r4, lr}
2412; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2413; HARD-NEXT:    mov r4, r1
2414; HARD-NEXT:    vrev64.16 q8, q8
2415; HARD-NEXT:    vadd.i16 q8, q8, q8
2416; HARD-NEXT:    vrev64.16 q0, q8
2417; HARD-NEXT:    bl test_v2i64_v8i16_helper
2418; HARD-NEXT:    vadd.i64 q8, q0, q0
2419; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2420; HARD-NEXT:    pop {r4, pc}
2421    %1 = load <8 x i16>, ptr %p
2422    %2 = add <8 x i16> %1, %1
2423    %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
2424    %4 = add <2 x i64> %3, %3
2425    store <2 x i64> %4, ptr %q
2426    ret void
2427}
2428
2429declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
2430define void @test_v2i64_v16i8(ptr %p, ptr %q) {
2431; SOFT-LABEL: test_v2i64_v16i8:
2432; SOFT:       @ %bb.0:
2433; SOFT-NEXT:    .save {r4, lr}
2434; SOFT-NEXT:    push {r4, lr}
2435; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2436; SOFT-NEXT:    mov r4, r1
2437; SOFT-NEXT:    vrev64.8 q8, q8
2438; SOFT-NEXT:    vadd.i8 q8, q8, q8
2439; SOFT-NEXT:    vrev64.8 q8, q8
2440; SOFT-NEXT:    vmov r1, r0, d16
2441; SOFT-NEXT:    vmov r3, r2, d17
2442; SOFT-NEXT:    bl test_v2i64_v16i8_helper
2443; SOFT-NEXT:    vmov d17, r3, r2
2444; SOFT-NEXT:    vmov d16, r1, r0
2445; SOFT-NEXT:    vadd.i64 q8, q8, q8
2446; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2447; SOFT-NEXT:    pop {r4, pc}
2448;
2449; HARD-LABEL: test_v2i64_v16i8:
2450; HARD:       @ %bb.0:
2451; HARD-NEXT:    .save {r4, lr}
2452; HARD-NEXT:    push {r4, lr}
2453; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2454; HARD-NEXT:    mov r4, r1
2455; HARD-NEXT:    vrev64.8 q8, q8
2456; HARD-NEXT:    vadd.i8 q8, q8, q8
2457; HARD-NEXT:    vrev64.8 q0, q8
2458; HARD-NEXT:    bl test_v2i64_v16i8_helper
2459; HARD-NEXT:    vadd.i64 q8, q0, q0
2460; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2461; HARD-NEXT:    pop {r4, pc}
2462    %1 = load <16 x i8>, ptr %p
2463    %2 = add <16 x i8> %1, %1
2464    %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
2465    %4 = add <2 x i64> %3, %3
2466    store <2 x i64> %4, ptr %q
2467    ret void
2468}
2469
2470declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
2471define void @test_v4f32_f128(ptr %p, ptr %q) {
2472; SOFT-LABEL: test_v4f32_f128:
2473; SOFT:       @ %bb.0:
2474; SOFT-NEXT:    .save {r4, r5, r11, lr}
2475; SOFT-NEXT:    push {r4, r5, r11, lr}
2476; SOFT-NEXT:    .pad #16
2477; SOFT-NEXT:    sub sp, sp, #16
2478; SOFT-NEXT:    ldr r4, [r0]
2479; SOFT-NEXT:    mov r5, r1
2480; SOFT-NEXT:    ldmib r0, {r1, r2, r3}
2481; SOFT-NEXT:    mov r0, r4
2482; SOFT-NEXT:    str r4, [sp]
2483; SOFT-NEXT:    stmib sp, {r1, r2, r3}
2484; SOFT-NEXT:    bl __addtf3
2485; SOFT-NEXT:    bl test_v4f32_f128_helper
2486; SOFT-NEXT:    vmov d17, r3, r2
2487; SOFT-NEXT:    vmov d16, r1, r0
2488; SOFT-NEXT:    vrev64.32 q8, q8
2489; SOFT-NEXT:    vadd.f32 q8, q8, q8
2490; SOFT-NEXT:    vrev64.32 q8, q8
2491; SOFT-NEXT:    vst1.64 {d16, d17}, [r5]
2492; SOFT-NEXT:    add sp, sp, #16
2493; SOFT-NEXT:    pop {r4, r5, r11, pc}
2494;
2495; HARD-LABEL: test_v4f32_f128:
2496; HARD:       @ %bb.0:
2497; HARD-NEXT:    .save {r4, r5, r11, lr}
2498; HARD-NEXT:    push {r4, r5, r11, lr}
2499; HARD-NEXT:    .pad #16
2500; HARD-NEXT:    sub sp, sp, #16
2501; HARD-NEXT:    ldr r4, [r0]
2502; HARD-NEXT:    mov r5, r1
2503; HARD-NEXT:    ldmib r0, {r1, r2, r3}
2504; HARD-NEXT:    mov r0, r4
2505; HARD-NEXT:    str r4, [sp]
2506; HARD-NEXT:    stmib sp, {r1, r2, r3}
2507; HARD-NEXT:    bl __addtf3
2508; HARD-NEXT:    bl test_v4f32_f128_helper
2509; HARD-NEXT:    vrev64.32 q8, q0
2510; HARD-NEXT:    vadd.f32 q8, q8, q8
2511; HARD-NEXT:    vrev64.32 q8, q8
2512; HARD-NEXT:    vst1.64 {d16, d17}, [r5]
2513; HARD-NEXT:    add sp, sp, #16
2514; HARD-NEXT:    pop {r4, r5, r11, pc}
2515    %1 = load fp128, ptr %p
2516    %2 = fadd fp128 %1, %1
2517    %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
2518    %4 = fadd <4 x float> %3, %3
2519    store <4 x float> %4, ptr %q
2520    ret void
2521}
2522
2523declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
2524define void @test_v4f32_v2f64(ptr %p, ptr %q) {
2525; SOFT-LABEL: test_v4f32_v2f64:
2526; SOFT:       @ %bb.0:
2527; SOFT-NEXT:    .save {r4, lr}
2528; SOFT-NEXT:    push {r4, lr}
2529; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2530; SOFT-NEXT:    mov r4, r1
2531; SOFT-NEXT:    vadd.f64 d18, d16, d16
2532; SOFT-NEXT:    vadd.f64 d16, d17, d17
2533; SOFT-NEXT:    vmov r1, r0, d18
2534; SOFT-NEXT:    vmov r3, r2, d16
2535; SOFT-NEXT:    bl test_v4f32_v2f64_helper
2536; SOFT-NEXT:    vmov d17, r3, r2
2537; SOFT-NEXT:    vmov d16, r1, r0
2538; SOFT-NEXT:    vrev64.32 q8, q8
2539; SOFT-NEXT:    vadd.f32 q8, q8, q8
2540; SOFT-NEXT:    vrev64.32 q8, q8
2541; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2542; SOFT-NEXT:    pop {r4, pc}
2543;
2544; HARD-LABEL: test_v4f32_v2f64:
2545; HARD:       @ %bb.0:
2546; HARD-NEXT:    .save {r4, lr}
2547; HARD-NEXT:    push {r4, lr}
2548; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2549; HARD-NEXT:    mov r4, r1
2550; HARD-NEXT:    vadd.f64 d1, d17, d17
2551; HARD-NEXT:    vadd.f64 d0, d16, d16
2552; HARD-NEXT:    bl test_v4f32_v2f64_helper
2553; HARD-NEXT:    vrev64.32 q8, q0
2554; HARD-NEXT:    vadd.f32 q8, q8, q8
2555; HARD-NEXT:    vrev64.32 q8, q8
2556; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2557; HARD-NEXT:    pop {r4, pc}
2558    %1 = load <2 x double>, ptr %p
2559    %2 = fadd <2 x double> %1, %1
2560    %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
2561    %4 = fadd <4 x float> %3, %3
2562    store <4 x float> %4, ptr %q
2563    ret void
2564}
2565
2566declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
2567define void @test_v4f32_v2i64(ptr %p, ptr %q) {
2568; SOFT-LABEL: test_v4f32_v2i64:
2569; SOFT:       @ %bb.0:
2570; SOFT-NEXT:    .save {r4, lr}
2571; SOFT-NEXT:    push {r4, lr}
2572; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2573; SOFT-NEXT:    mov r4, r1
2574; SOFT-NEXT:    vadd.i64 q8, q8, q8
2575; SOFT-NEXT:    vmov r1, r0, d16
2576; SOFT-NEXT:    vmov r3, r2, d17
2577; SOFT-NEXT:    bl test_v4f32_v2i64_helper
2578; SOFT-NEXT:    vmov d17, r3, r2
2579; SOFT-NEXT:    vmov d16, r1, r0
2580; SOFT-NEXT:    vrev64.32 q8, q8
2581; SOFT-NEXT:    vadd.f32 q8, q8, q8
2582; SOFT-NEXT:    vrev64.32 q8, q8
2583; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2584; SOFT-NEXT:    pop {r4, pc}
2585;
2586; HARD-LABEL: test_v4f32_v2i64:
2587; HARD:       @ %bb.0:
2588; HARD-NEXT:    .save {r4, lr}
2589; HARD-NEXT:    push {r4, lr}
2590; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2591; HARD-NEXT:    mov r4, r1
2592; HARD-NEXT:    vadd.i64 q0, q8, q8
2593; HARD-NEXT:    bl test_v4f32_v2i64_helper
2594; HARD-NEXT:    vrev64.32 q8, q0
2595; HARD-NEXT:    vadd.f32 q8, q8, q8
2596; HARD-NEXT:    vrev64.32 q8, q8
2597; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2598; HARD-NEXT:    pop {r4, pc}
2599    %1 = load <2 x i64>, ptr %p
2600    %2 = add <2 x i64> %1, %1
2601    %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
2602    %4 = fadd <4 x float> %3, %3
2603    store <4 x float> %4, ptr %q
2604    ret void
2605}
2606
2607declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
2608define void @test_v4f32_v4i32(ptr %p, ptr %q) {
2609; SOFT-LABEL: test_v4f32_v4i32:
2610; SOFT:       @ %bb.0:
2611; SOFT-NEXT:    .save {r4, lr}
2612; SOFT-NEXT:    push {r4, lr}
2613; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2614; SOFT-NEXT:    mov r4, r1
2615; SOFT-NEXT:    vrev64.32 q8, q8
2616; SOFT-NEXT:    vadd.i32 q8, q8, q8
2617; SOFT-NEXT:    vrev64.32 q8, q8
2618; SOFT-NEXT:    vmov r1, r0, d16
2619; SOFT-NEXT:    vmov r3, r2, d17
2620; SOFT-NEXT:    bl test_v4f32_v4i32_helper
2621; SOFT-NEXT:    vmov d17, r3, r2
2622; SOFT-NEXT:    vmov d16, r1, r0
2623; SOFT-NEXT:    vrev64.32 q8, q8
2624; SOFT-NEXT:    vadd.f32 q8, q8, q8
2625; SOFT-NEXT:    vrev64.32 q8, q8
2626; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2627; SOFT-NEXT:    pop {r4, pc}
2628;
2629; HARD-LABEL: test_v4f32_v4i32:
2630; HARD:       @ %bb.0:
2631; HARD-NEXT:    .save {r4, lr}
2632; HARD-NEXT:    push {r4, lr}
2633; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2634; HARD-NEXT:    mov r4, r1
2635; HARD-NEXT:    vrev64.32 q8, q8
2636; HARD-NEXT:    vadd.i32 q8, q8, q8
2637; HARD-NEXT:    vrev64.32 q0, q8
2638; HARD-NEXT:    bl test_v4f32_v4i32_helper
2639; HARD-NEXT:    vrev64.32 q8, q0
2640; HARD-NEXT:    vadd.f32 q8, q8, q8
2641; HARD-NEXT:    vrev64.32 q8, q8
2642; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2643; HARD-NEXT:    pop {r4, pc}
2644    %1 = load <4 x i32>, ptr %p
2645    %2 = add <4 x i32> %1, %1
2646    %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
2647    %4 = fadd <4 x float> %3, %3
2648    store <4 x float> %4, ptr %q
2649    ret void
2650}
2651
2652declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
2653define void @test_v4f32_v8i16(ptr %p, ptr %q) {
2654; SOFT-LABEL: test_v4f32_v8i16:
2655; SOFT:       @ %bb.0:
2656; SOFT-NEXT:    .save {r4, lr}
2657; SOFT-NEXT:    push {r4, lr}
2658; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2659; SOFT-NEXT:    mov r4, r1
2660; SOFT-NEXT:    vrev64.16 q8, q8
2661; SOFT-NEXT:    vadd.i16 q8, q8, q8
2662; SOFT-NEXT:    vrev64.16 q8, q8
2663; SOFT-NEXT:    vmov r1, r0, d16
2664; SOFT-NEXT:    vmov r3, r2, d17
2665; SOFT-NEXT:    bl test_v4f32_v8i16_helper
2666; SOFT-NEXT:    vmov d17, r3, r2
2667; SOFT-NEXT:    vmov d16, r1, r0
2668; SOFT-NEXT:    vrev64.32 q8, q8
2669; SOFT-NEXT:    vadd.f32 q8, q8, q8
2670; SOFT-NEXT:    vrev64.32 q8, q8
2671; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2672; SOFT-NEXT:    pop {r4, pc}
2673;
2674; HARD-LABEL: test_v4f32_v8i16:
2675; HARD:       @ %bb.0:
2676; HARD-NEXT:    .save {r4, lr}
2677; HARD-NEXT:    push {r4, lr}
2678; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2679; HARD-NEXT:    mov r4, r1
2680; HARD-NEXT:    vrev64.16 q8, q8
2681; HARD-NEXT:    vadd.i16 q8, q8, q8
2682; HARD-NEXT:    vrev64.16 q0, q8
2683; HARD-NEXT:    bl test_v4f32_v8i16_helper
2684; HARD-NEXT:    vrev64.32 q8, q0
2685; HARD-NEXT:    vadd.f32 q8, q8, q8
2686; HARD-NEXT:    vrev64.32 q8, q8
2687; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2688; HARD-NEXT:    pop {r4, pc}
2689    %1 = load <8 x i16>, ptr %p
2690    %2 = add <8 x i16> %1, %1
2691    %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
2692    %4 = fadd <4 x float> %3, %3
2693    store <4 x float> %4, ptr %q
2694    ret void
2695}
2696
2697declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
2698define void @test_v4f32_v16i8(ptr %p, ptr %q) {
2699; SOFT-LABEL: test_v4f32_v16i8:
2700; SOFT:       @ %bb.0:
2701; SOFT-NEXT:    .save {r4, lr}
2702; SOFT-NEXT:    push {r4, lr}
2703; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2704; SOFT-NEXT:    mov r4, r1
2705; SOFT-NEXT:    vrev64.8 q8, q8
2706; SOFT-NEXT:    vadd.i8 q8, q8, q8
2707; SOFT-NEXT:    vrev64.8 q8, q8
2708; SOFT-NEXT:    vmov r1, r0, d16
2709; SOFT-NEXT:    vmov r3, r2, d17
2710; SOFT-NEXT:    bl test_v4f32_v16i8_helper
2711; SOFT-NEXT:    vmov d17, r3, r2
2712; SOFT-NEXT:    vmov d16, r1, r0
2713; SOFT-NEXT:    vrev64.32 q8, q8
2714; SOFT-NEXT:    vadd.f32 q8, q8, q8
2715; SOFT-NEXT:    vrev64.32 q8, q8
2716; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2717; SOFT-NEXT:    pop {r4, pc}
2718;
2719; HARD-LABEL: test_v4f32_v16i8:
2720; HARD:       @ %bb.0:
2721; HARD-NEXT:    .save {r4, lr}
2722; HARD-NEXT:    push {r4, lr}
2723; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2724; HARD-NEXT:    mov r4, r1
2725; HARD-NEXT:    vrev64.8 q8, q8
2726; HARD-NEXT:    vadd.i8 q8, q8, q8
2727; HARD-NEXT:    vrev64.8 q0, q8
2728; HARD-NEXT:    bl test_v4f32_v16i8_helper
2729; HARD-NEXT:    vrev64.32 q8, q0
2730; HARD-NEXT:    vadd.f32 q8, q8, q8
2731; HARD-NEXT:    vrev64.32 q8, q8
2732; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2733; HARD-NEXT:    pop {r4, pc}
2734    %1 = load <16 x i8>, ptr %p
2735    %2 = add <16 x i8> %1, %1
2736    %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
2737    %4 = fadd <4 x float> %3, %3
2738    store <4 x float> %4, ptr %q
2739    ret void
2740}
2741
2742declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
2743define void @test_v4i32_f128(ptr %p, ptr %q) {
2744; SOFT-LABEL: test_v4i32_f128:
2745; SOFT:       @ %bb.0:
2746; SOFT-NEXT:    .save {r4, r5, r11, lr}
2747; SOFT-NEXT:    push {r4, r5, r11, lr}
2748; SOFT-NEXT:    .pad #16
2749; SOFT-NEXT:    sub sp, sp, #16
2750; SOFT-NEXT:    ldr r4, [r0]
2751; SOFT-NEXT:    mov r5, r1
2752; SOFT-NEXT:    ldmib r0, {r1, r2, r3}
2753; SOFT-NEXT:    mov r0, r4
2754; SOFT-NEXT:    str r4, [sp]
2755; SOFT-NEXT:    stmib sp, {r1, r2, r3}
2756; SOFT-NEXT:    bl __addtf3
2757; SOFT-NEXT:    bl test_v4i32_f128_helper
2758; SOFT-NEXT:    vmov d17, r3, r2
2759; SOFT-NEXT:    vmov d16, r1, r0
2760; SOFT-NEXT:    vrev64.32 q8, q8
2761; SOFT-NEXT:    vadd.i32 q8, q8, q8
2762; SOFT-NEXT:    vrev64.32 q8, q8
2763; SOFT-NEXT:    vst1.64 {d16, d17}, [r5]
2764; SOFT-NEXT:    add sp, sp, #16
2765; SOFT-NEXT:    pop {r4, r5, r11, pc}
2766;
2767; HARD-LABEL: test_v4i32_f128:
2768; HARD:       @ %bb.0:
2769; HARD-NEXT:    .save {r4, r5, r11, lr}
2770; HARD-NEXT:    push {r4, r5, r11, lr}
2771; HARD-NEXT:    .pad #16
2772; HARD-NEXT:    sub sp, sp, #16
2773; HARD-NEXT:    ldr r4, [r0]
2774; HARD-NEXT:    mov r5, r1
2775; HARD-NEXT:    ldmib r0, {r1, r2, r3}
2776; HARD-NEXT:    mov r0, r4
2777; HARD-NEXT:    str r4, [sp]
2778; HARD-NEXT:    stmib sp, {r1, r2, r3}
2779; HARD-NEXT:    bl __addtf3
2780; HARD-NEXT:    bl test_v4i32_f128_helper
2781; HARD-NEXT:    vrev64.32 q8, q0
2782; HARD-NEXT:    vadd.i32 q8, q8, q8
2783; HARD-NEXT:    vrev64.32 q8, q8
2784; HARD-NEXT:    vst1.64 {d16, d17}, [r5]
2785; HARD-NEXT:    add sp, sp, #16
2786; HARD-NEXT:    pop {r4, r5, r11, pc}
2787    %1 = load fp128, ptr %p
2788    %2 = fadd fp128 %1, %1
2789    %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
2790    %4 = add <4 x i32> %3, %3
2791    store <4 x i32> %4, ptr %q
2792    ret void
2793}
2794
2795declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
2796define void @test_v4i32_v2f64(ptr %p, ptr %q) {
2797; SOFT-LABEL: test_v4i32_v2f64:
2798; SOFT:       @ %bb.0:
2799; SOFT-NEXT:    .save {r4, lr}
2800; SOFT-NEXT:    push {r4, lr}
2801; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2802; SOFT-NEXT:    mov r4, r1
2803; SOFT-NEXT:    vadd.f64 d18, d16, d16
2804; SOFT-NEXT:    vadd.f64 d16, d17, d17
2805; SOFT-NEXT:    vmov r1, r0, d18
2806; SOFT-NEXT:    vmov r3, r2, d16
2807; SOFT-NEXT:    bl test_v4i32_v2f64_helper
2808; SOFT-NEXT:    vmov d17, r3, r2
2809; SOFT-NEXT:    vmov d16, r1, r0
2810; SOFT-NEXT:    vrev64.32 q8, q8
2811; SOFT-NEXT:    vadd.i32 q8, q8, q8
2812; SOFT-NEXT:    vrev64.32 q8, q8
2813; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2814; SOFT-NEXT:    pop {r4, pc}
2815;
2816; HARD-LABEL: test_v4i32_v2f64:
2817; HARD:       @ %bb.0:
2818; HARD-NEXT:    .save {r4, lr}
2819; HARD-NEXT:    push {r4, lr}
2820; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2821; HARD-NEXT:    mov r4, r1
2822; HARD-NEXT:    vadd.f64 d1, d17, d17
2823; HARD-NEXT:    vadd.f64 d0, d16, d16
2824; HARD-NEXT:    bl test_v4i32_v2f64_helper
2825; HARD-NEXT:    vrev64.32 q8, q0
2826; HARD-NEXT:    vadd.i32 q8, q8, q8
2827; HARD-NEXT:    vrev64.32 q8, q8
2828; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2829; HARD-NEXT:    pop {r4, pc}
2830    %1 = load <2 x double>, ptr %p
2831    %2 = fadd <2 x double> %1, %1
2832    %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
2833    %4 = add <4 x i32> %3, %3
2834    store <4 x i32> %4, ptr %q
2835    ret void
2836}
2837
2838declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
2839define void @test_v4i32_v2i64(ptr %p, ptr %q) {
2840; SOFT-LABEL: test_v4i32_v2i64:
2841; SOFT:       @ %bb.0:
2842; SOFT-NEXT:    .save {r4, lr}
2843; SOFT-NEXT:    push {r4, lr}
2844; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2845; SOFT-NEXT:    mov r4, r1
2846; SOFT-NEXT:    vadd.i64 q8, q8, q8
2847; SOFT-NEXT:    vmov r1, r0, d16
2848; SOFT-NEXT:    vmov r3, r2, d17
2849; SOFT-NEXT:    bl test_v4i32_v2i64_helper
2850; SOFT-NEXT:    vmov d17, r3, r2
2851; SOFT-NEXT:    vmov d16, r1, r0
2852; SOFT-NEXT:    vrev64.32 q8, q8
2853; SOFT-NEXT:    vadd.i32 q8, q8, q8
2854; SOFT-NEXT:    vrev64.32 q8, q8
2855; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2856; SOFT-NEXT:    pop {r4, pc}
2857;
2858; HARD-LABEL: test_v4i32_v2i64:
2859; HARD:       @ %bb.0:
2860; HARD-NEXT:    .save {r4, lr}
2861; HARD-NEXT:    push {r4, lr}
2862; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2863; HARD-NEXT:    mov r4, r1
2864; HARD-NEXT:    vadd.i64 q0, q8, q8
2865; HARD-NEXT:    bl test_v4i32_v2i64_helper
2866; HARD-NEXT:    vrev64.32 q8, q0
2867; HARD-NEXT:    vadd.i32 q8, q8, q8
2868; HARD-NEXT:    vrev64.32 q8, q8
2869; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2870; HARD-NEXT:    pop {r4, pc}
2871    %1 = load <2 x i64>, ptr %p
2872    %2 = add <2 x i64> %1, %1
2873    %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
2874    %4 = add <4 x i32> %3, %3
2875    store <4 x i32> %4, ptr %q
2876    ret void
2877}
2878
2879declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
2880define void @test_v4i32_v4f32(ptr %p, ptr %q) {
2881; SOFT-LABEL: test_v4i32_v4f32:
2882; SOFT:       @ %bb.0:
2883; SOFT-NEXT:    .save {r4, lr}
2884; SOFT-NEXT:    push {r4, lr}
2885; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2886; SOFT-NEXT:    mov r4, r1
2887; SOFT-NEXT:    vrev64.32 q8, q8
2888; SOFT-NEXT:    vadd.f32 q8, q8, q8
2889; SOFT-NEXT:    vrev64.32 q8, q8
2890; SOFT-NEXT:    vmov r1, r0, d16
2891; SOFT-NEXT:    vmov r3, r2, d17
2892; SOFT-NEXT:    bl test_v4i32_v4f32_helper
2893; SOFT-NEXT:    vmov d17, r3, r2
2894; SOFT-NEXT:    vmov d16, r1, r0
2895; SOFT-NEXT:    vrev64.32 q8, q8
2896; SOFT-NEXT:    vadd.i32 q8, q8, q8
2897; SOFT-NEXT:    vrev64.32 q8, q8
2898; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2899; SOFT-NEXT:    pop {r4, pc}
2900;
2901; HARD-LABEL: test_v4i32_v4f32:
2902; HARD:       @ %bb.0:
2903; HARD-NEXT:    .save {r4, lr}
2904; HARD-NEXT:    push {r4, lr}
2905; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2906; HARD-NEXT:    mov r4, r1
2907; HARD-NEXT:    vrev64.32 q8, q8
2908; HARD-NEXT:    vadd.f32 q8, q8, q8
2909; HARD-NEXT:    vrev64.32 q0, q8
2910; HARD-NEXT:    bl test_v4i32_v4f32_helper
2911; HARD-NEXT:    vrev64.32 q8, q0
2912; HARD-NEXT:    vadd.i32 q8, q8, q8
2913; HARD-NEXT:    vrev64.32 q8, q8
2914; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2915; HARD-NEXT:    pop {r4, pc}
2916    %1 = load <4 x float>, ptr %p
2917    %2 = fadd <4 x float> %1, %1
2918    %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
2919    %4 = add <4 x i32> %3, %3
2920    store <4 x i32> %4, ptr %q
2921    ret void
2922}
2923
2924declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
2925define void @test_v4i32_v8i16(ptr %p, ptr %q) {
2926; SOFT-LABEL: test_v4i32_v8i16:
2927; SOFT:       @ %bb.0:
2928; SOFT-NEXT:    .save {r4, lr}
2929; SOFT-NEXT:    push {r4, lr}
2930; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2931; SOFT-NEXT:    mov r4, r1
2932; SOFT-NEXT:    vrev64.16 q8, q8
2933; SOFT-NEXT:    vadd.i16 q8, q8, q8
2934; SOFT-NEXT:    vrev64.16 q8, q8
2935; SOFT-NEXT:    vmov r1, r0, d16
2936; SOFT-NEXT:    vmov r3, r2, d17
2937; SOFT-NEXT:    bl test_v4i32_v8i16_helper
2938; SOFT-NEXT:    vmov d17, r3, r2
2939; SOFT-NEXT:    vmov d16, r1, r0
2940; SOFT-NEXT:    vrev64.32 q8, q8
2941; SOFT-NEXT:    vadd.i32 q8, q8, q8
2942; SOFT-NEXT:    vrev64.32 q8, q8
2943; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2944; SOFT-NEXT:    pop {r4, pc}
2945;
2946; HARD-LABEL: test_v4i32_v8i16:
2947; HARD:       @ %bb.0:
2948; HARD-NEXT:    .save {r4, lr}
2949; HARD-NEXT:    push {r4, lr}
2950; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2951; HARD-NEXT:    mov r4, r1
2952; HARD-NEXT:    vrev64.16 q8, q8
2953; HARD-NEXT:    vadd.i16 q8, q8, q8
2954; HARD-NEXT:    vrev64.16 q0, q8
2955; HARD-NEXT:    bl test_v4i32_v8i16_helper
2956; HARD-NEXT:    vrev64.32 q8, q0
2957; HARD-NEXT:    vadd.i32 q8, q8, q8
2958; HARD-NEXT:    vrev64.32 q8, q8
2959; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
2960; HARD-NEXT:    pop {r4, pc}
2961    %1 = load <8 x i16>, ptr %p
2962    %2 = add <8 x i16> %1, %1
2963    %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
2964    %4 = add <4 x i32> %3, %3
2965    store <4 x i32> %4, ptr %q
2966    ret void
2967}
2968
2969declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
2970define void @test_v4i32_v16i8(ptr %p, ptr %q) {
2971; SOFT-LABEL: test_v4i32_v16i8:
2972; SOFT:       @ %bb.0:
2973; SOFT-NEXT:    .save {r4, lr}
2974; SOFT-NEXT:    push {r4, lr}
2975; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
2976; SOFT-NEXT:    mov r4, r1
2977; SOFT-NEXT:    vrev64.8 q8, q8
2978; SOFT-NEXT:    vadd.i8 q8, q8, q8
2979; SOFT-NEXT:    vrev64.8 q8, q8
2980; SOFT-NEXT:    vmov r1, r0, d16
2981; SOFT-NEXT:    vmov r3, r2, d17
2982; SOFT-NEXT:    bl test_v4i32_v16i8_helper
2983; SOFT-NEXT:    vmov d17, r3, r2
2984; SOFT-NEXT:    vmov d16, r1, r0
2985; SOFT-NEXT:    vrev64.32 q8, q8
2986; SOFT-NEXT:    vadd.i32 q8, q8, q8
2987; SOFT-NEXT:    vrev64.32 q8, q8
2988; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
2989; SOFT-NEXT:    pop {r4, pc}
2990;
2991; HARD-LABEL: test_v4i32_v16i8:
2992; HARD:       @ %bb.0:
2993; HARD-NEXT:    .save {r4, lr}
2994; HARD-NEXT:    push {r4, lr}
2995; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
2996; HARD-NEXT:    mov r4, r1
2997; HARD-NEXT:    vrev64.8 q8, q8
2998; HARD-NEXT:    vadd.i8 q8, q8, q8
2999; HARD-NEXT:    vrev64.8 q0, q8
3000; HARD-NEXT:    bl test_v4i32_v16i8_helper
3001; HARD-NEXT:    vrev64.32 q8, q0
3002; HARD-NEXT:    vadd.i32 q8, q8, q8
3003; HARD-NEXT:    vrev64.32 q8, q8
3004; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3005; HARD-NEXT:    pop {r4, pc}
3006    %1 = load <16 x i8>, ptr %p
3007    %2 = add <16 x i8> %1, %1
3008    %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
3009    %4 = add <4 x i32> %3, %3
3010    store <4 x i32> %4, ptr %q
3011    ret void
3012}
3013
3014declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
3015define void @test_v8i16_f128(ptr %p, ptr %q) {
3016; SOFT-LABEL: test_v8i16_f128:
3017; SOFT:       @ %bb.0:
3018; SOFT-NEXT:    .save {r4, r5, r11, lr}
3019; SOFT-NEXT:    push {r4, r5, r11, lr}
3020; SOFT-NEXT:    .pad #16
3021; SOFT-NEXT:    sub sp, sp, #16
3022; SOFT-NEXT:    ldr r4, [r0]
3023; SOFT-NEXT:    mov r5, r1
3024; SOFT-NEXT:    ldmib r0, {r1, r2, r3}
3025; SOFT-NEXT:    mov r0, r4
3026; SOFT-NEXT:    str r4, [sp]
3027; SOFT-NEXT:    stmib sp, {r1, r2, r3}
3028; SOFT-NEXT:    bl __addtf3
3029; SOFT-NEXT:    bl test_v8i16_f128_helper
3030; SOFT-NEXT:    vmov d17, r3, r2
3031; SOFT-NEXT:    vmov d16, r1, r0
3032; SOFT-NEXT:    vrev64.16 q8, q8
3033; SOFT-NEXT:    vadd.i16 q8, q8, q8
3034; SOFT-NEXT:    vrev64.16 q8, q8
3035; SOFT-NEXT:    vst1.64 {d16, d17}, [r5]
3036; SOFT-NEXT:    add sp, sp, #16
3037; SOFT-NEXT:    pop {r4, r5, r11, pc}
3038;
3039; HARD-LABEL: test_v8i16_f128:
3040; HARD:       @ %bb.0:
3041; HARD-NEXT:    .save {r4, r5, r11, lr}
3042; HARD-NEXT:    push {r4, r5, r11, lr}
3043; HARD-NEXT:    .pad #16
3044; HARD-NEXT:    sub sp, sp, #16
3045; HARD-NEXT:    ldr r4, [r0]
3046; HARD-NEXT:    mov r5, r1
3047; HARD-NEXT:    ldmib r0, {r1, r2, r3}
3048; HARD-NEXT:    mov r0, r4
3049; HARD-NEXT:    str r4, [sp]
3050; HARD-NEXT:    stmib sp, {r1, r2, r3}
3051; HARD-NEXT:    bl __addtf3
3052; HARD-NEXT:    bl test_v8i16_f128_helper
3053; HARD-NEXT:    vrev64.16 q8, q0
3054; HARD-NEXT:    vadd.i16 q8, q8, q8
3055; HARD-NEXT:    vrev64.16 q8, q8
3056; HARD-NEXT:    vst1.64 {d16, d17}, [r5]
3057; HARD-NEXT:    add sp, sp, #16
3058; HARD-NEXT:    pop {r4, r5, r11, pc}
3059    %1 = load fp128, ptr %p
3060    %2 = fadd fp128 %1, %1
3061    %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
3062    %4 = add <8 x i16> %3, %3
3063    store <8 x i16> %4, ptr %q
3064    ret void
3065}
3066
3067declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
3068define void @test_v8i16_v2f64(ptr %p, ptr %q) {
3069; SOFT-LABEL: test_v8i16_v2f64:
3070; SOFT:       @ %bb.0:
3071; SOFT-NEXT:    .save {r4, lr}
3072; SOFT-NEXT:    push {r4, lr}
3073; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3074; SOFT-NEXT:    mov r4, r1
3075; SOFT-NEXT:    vadd.f64 d18, d16, d16
3076; SOFT-NEXT:    vadd.f64 d16, d17, d17
3077; SOFT-NEXT:    vmov r1, r0, d18
3078; SOFT-NEXT:    vmov r3, r2, d16
3079; SOFT-NEXT:    bl test_v8i16_v2f64_helper
3080; SOFT-NEXT:    vmov d17, r3, r2
3081; SOFT-NEXT:    vmov d16, r1, r0
3082; SOFT-NEXT:    vrev64.16 q8, q8
3083; SOFT-NEXT:    vadd.i16 q8, q8, q8
3084; SOFT-NEXT:    vrev64.16 q8, q8
3085; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3086; SOFT-NEXT:    pop {r4, pc}
3087;
3088; HARD-LABEL: test_v8i16_v2f64:
3089; HARD:       @ %bb.0:
3090; HARD-NEXT:    .save {r4, lr}
3091; HARD-NEXT:    push {r4, lr}
3092; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3093; HARD-NEXT:    mov r4, r1
3094; HARD-NEXT:    vadd.f64 d1, d17, d17
3095; HARD-NEXT:    vadd.f64 d0, d16, d16
3096; HARD-NEXT:    bl test_v8i16_v2f64_helper
3097; HARD-NEXT:    vrev64.16 q8, q0
3098; HARD-NEXT:    vadd.i16 q8, q8, q8
3099; HARD-NEXT:    vrev64.16 q8, q8
3100; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3101; HARD-NEXT:    pop {r4, pc}
3102    %1 = load <2 x double>, ptr %p
3103    %2 = fadd <2 x double> %1, %1
3104    %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
3105    %4 = add <8 x i16> %3, %3
3106    store <8 x i16> %4, ptr %q
3107    ret void
3108}
3109
3110declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
3111define void @test_v8i16_v2i64(ptr %p, ptr %q) {
3112; SOFT-LABEL: test_v8i16_v2i64:
3113; SOFT:       @ %bb.0:
3114; SOFT-NEXT:    .save {r4, lr}
3115; SOFT-NEXT:    push {r4, lr}
3116; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3117; SOFT-NEXT:    mov r4, r1
3118; SOFT-NEXT:    vadd.i64 q8, q8, q8
3119; SOFT-NEXT:    vmov r1, r0, d16
3120; SOFT-NEXT:    vmov r3, r2, d17
3121; SOFT-NEXT:    bl test_v8i16_v2i64_helper
3122; SOFT-NEXT:    vmov d17, r3, r2
3123; SOFT-NEXT:    vmov d16, r1, r0
3124; SOFT-NEXT:    vrev64.16 q8, q8
3125; SOFT-NEXT:    vadd.i16 q8, q8, q8
3126; SOFT-NEXT:    vrev64.16 q8, q8
3127; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3128; SOFT-NEXT:    pop {r4, pc}
3129;
3130; HARD-LABEL: test_v8i16_v2i64:
3131; HARD:       @ %bb.0:
3132; HARD-NEXT:    .save {r4, lr}
3133; HARD-NEXT:    push {r4, lr}
3134; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3135; HARD-NEXT:    mov r4, r1
3136; HARD-NEXT:    vadd.i64 q0, q8, q8
3137; HARD-NEXT:    bl test_v8i16_v2i64_helper
3138; HARD-NEXT:    vrev64.16 q8, q0
3139; HARD-NEXT:    vadd.i16 q8, q8, q8
3140; HARD-NEXT:    vrev64.16 q8, q8
3141; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3142; HARD-NEXT:    pop {r4, pc}
3143    %1 = load <2 x i64>, ptr %p
3144    %2 = add <2 x i64> %1, %1
3145    %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
3146    %4 = add <8 x i16> %3, %3
3147    store <8 x i16> %4, ptr %q
3148    ret void
3149}
3150
3151declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
3152define void @test_v8i16_v4f32(ptr %p, ptr %q) {
3153; SOFT-LABEL: test_v8i16_v4f32:
3154; SOFT:       @ %bb.0:
3155; SOFT-NEXT:    .save {r4, lr}
3156; SOFT-NEXT:    push {r4, lr}
3157; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3158; SOFT-NEXT:    mov r4, r1
3159; SOFT-NEXT:    vrev64.32 q8, q8
3160; SOFT-NEXT:    vadd.f32 q8, q8, q8
3161; SOFT-NEXT:    vrev64.32 q8, q8
3162; SOFT-NEXT:    vmov r1, r0, d16
3163; SOFT-NEXT:    vmov r3, r2, d17
3164; SOFT-NEXT:    bl test_v8i16_v4f32_helper
3165; SOFT-NEXT:    vmov d17, r3, r2
3166; SOFT-NEXT:    vmov d16, r1, r0
3167; SOFT-NEXT:    vrev64.16 q8, q8
3168; SOFT-NEXT:    vadd.i16 q8, q8, q8
3169; SOFT-NEXT:    vrev64.16 q8, q8
3170; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3171; SOFT-NEXT:    pop {r4, pc}
3172;
3173; HARD-LABEL: test_v8i16_v4f32:
3174; HARD:       @ %bb.0:
3175; HARD-NEXT:    .save {r4, lr}
3176; HARD-NEXT:    push {r4, lr}
3177; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3178; HARD-NEXT:    mov r4, r1
3179; HARD-NEXT:    vrev64.32 q8, q8
3180; HARD-NEXT:    vadd.f32 q8, q8, q8
3181; HARD-NEXT:    vrev64.32 q0, q8
3182; HARD-NEXT:    bl test_v8i16_v4f32_helper
3183; HARD-NEXT:    vrev64.16 q8, q0
3184; HARD-NEXT:    vadd.i16 q8, q8, q8
3185; HARD-NEXT:    vrev64.16 q8, q8
3186; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3187; HARD-NEXT:    pop {r4, pc}
3188    %1 = load <4 x float>, ptr %p
3189    %2 = fadd <4 x float> %1, %1
3190    %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
3191    %4 = add <8 x i16> %3, %3
3192    store <8 x i16> %4, ptr %q
3193    ret void
3194}
3195
3196declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
3197define void @test_v8i16_v4i32(ptr %p, ptr %q) {
3198; SOFT-LABEL: test_v8i16_v4i32:
3199; SOFT:       @ %bb.0:
3200; SOFT-NEXT:    .save {r4, lr}
3201; SOFT-NEXT:    push {r4, lr}
3202; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3203; SOFT-NEXT:    mov r4, r1
3204; SOFT-NEXT:    vrev64.32 q8, q8
3205; SOFT-NEXT:    vadd.i32 q8, q8, q8
3206; SOFT-NEXT:    vrev64.32 q8, q8
3207; SOFT-NEXT:    vmov r1, r0, d16
3208; SOFT-NEXT:    vmov r3, r2, d17
3209; SOFT-NEXT:    bl test_v8i16_v4i32_helper
3210; SOFT-NEXT:    vmov d17, r3, r2
3211; SOFT-NEXT:    vmov d16, r1, r0
3212; SOFT-NEXT:    vrev64.16 q8, q8
3213; SOFT-NEXT:    vadd.i16 q8, q8, q8
3214; SOFT-NEXT:    vrev64.16 q8, q8
3215; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3216; SOFT-NEXT:    pop {r4, pc}
3217;
3218; HARD-LABEL: test_v8i16_v4i32:
3219; HARD:       @ %bb.0:
3220; HARD-NEXT:    .save {r4, lr}
3221; HARD-NEXT:    push {r4, lr}
3222; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3223; HARD-NEXT:    mov r4, r1
3224; HARD-NEXT:    vrev64.32 q8, q8
3225; HARD-NEXT:    vadd.i32 q8, q8, q8
3226; HARD-NEXT:    vrev64.32 q0, q8
3227; HARD-NEXT:    bl test_v8i16_v4i32_helper
3228; HARD-NEXT:    vrev64.16 q8, q0
3229; HARD-NEXT:    vadd.i16 q8, q8, q8
3230; HARD-NEXT:    vrev64.16 q8, q8
3231; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3232; HARD-NEXT:    pop {r4, pc}
3233    %1 = load <4 x i32>, ptr %p
3234    %2 = add <4 x i32> %1, %1
3235    %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
3236    %4 = add <8 x i16> %3, %3
3237    store <8 x i16> %4, ptr %q
3238    ret void
3239}
3240
3241declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
3242define void @test_v8i16_v16i8(ptr %p, ptr %q) {
3243; SOFT-LABEL: test_v8i16_v16i8:
3244; SOFT:       @ %bb.0:
3245; SOFT-NEXT:    .save {r4, lr}
3246; SOFT-NEXT:    push {r4, lr}
3247; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3248; SOFT-NEXT:    mov r4, r1
3249; SOFT-NEXT:    vrev64.8 q8, q8
3250; SOFT-NEXT:    vadd.i8 q8, q8, q8
3251; SOFT-NEXT:    vrev64.8 q8, q8
3252; SOFT-NEXT:    vmov r1, r0, d16
3253; SOFT-NEXT:    vmov r3, r2, d17
3254; SOFT-NEXT:    bl test_v8i16_v16i8_helper
3255; SOFT-NEXT:    vmov d17, r3, r2
3256; SOFT-NEXT:    vmov d16, r1, r0
3257; SOFT-NEXT:    vrev64.16 q8, q8
3258; SOFT-NEXT:    vadd.i16 q8, q8, q8
3259; SOFT-NEXT:    vrev64.16 q8, q8
3260; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3261; SOFT-NEXT:    pop {r4, pc}
3262;
3263; HARD-LABEL: test_v8i16_v16i8:
3264; HARD:       @ %bb.0:
3265; HARD-NEXT:    .save {r4, lr}
3266; HARD-NEXT:    push {r4, lr}
3267; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3268; HARD-NEXT:    mov r4, r1
3269; HARD-NEXT:    vrev64.8 q8, q8
3270; HARD-NEXT:    vadd.i8 q8, q8, q8
3271; HARD-NEXT:    vrev64.8 q0, q8
3272; HARD-NEXT:    bl test_v8i16_v16i8_helper
3273; HARD-NEXT:    vrev64.16 q8, q0
3274; HARD-NEXT:    vadd.i16 q8, q8, q8
3275; HARD-NEXT:    vrev64.16 q8, q8
3276; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3277; HARD-NEXT:    pop {r4, pc}
3278    %1 = load <16 x i8>, ptr %p
3279    %2 = add <16 x i8> %1, %1
3280    %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
3281    %4 = add <8 x i16> %3, %3
3282    store <8 x i16> %4, ptr %q
3283    ret void
3284}
3285
3286declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
3287define void @test_v16i8_f128(ptr %p, ptr %q) {
3288; SOFT-LABEL: test_v16i8_f128:
3289; SOFT:       @ %bb.0:
3290; SOFT-NEXT:    .save {r4, r5, r11, lr}
3291; SOFT-NEXT:    push {r4, r5, r11, lr}
3292; SOFT-NEXT:    .pad #16
3293; SOFT-NEXT:    sub sp, sp, #16
3294; SOFT-NEXT:    ldr r4, [r0]
3295; SOFT-NEXT:    mov r5, r1
3296; SOFT-NEXT:    ldmib r0, {r1, r2, r3}
3297; SOFT-NEXT:    mov r0, r4
3298; SOFT-NEXT:    str r4, [sp]
3299; SOFT-NEXT:    stmib sp, {r1, r2, r3}
3300; SOFT-NEXT:    bl __addtf3
3301; SOFT-NEXT:    bl test_v16i8_f128_helper
3302; SOFT-NEXT:    vmov d17, r3, r2
3303; SOFT-NEXT:    vmov d16, r1, r0
3304; SOFT-NEXT:    vrev64.8 q8, q8
3305; SOFT-NEXT:    vadd.i8 q8, q8, q8
3306; SOFT-NEXT:    vrev64.8 q8, q8
3307; SOFT-NEXT:    vst1.64 {d16, d17}, [r5]
3308; SOFT-NEXT:    add sp, sp, #16
3309; SOFT-NEXT:    pop {r4, r5, r11, pc}
3310;
3311; HARD-LABEL: test_v16i8_f128:
3312; HARD:       @ %bb.0:
3313; HARD-NEXT:    .save {r4, r5, r11, lr}
3314; HARD-NEXT:    push {r4, r5, r11, lr}
3315; HARD-NEXT:    .pad #16
3316; HARD-NEXT:    sub sp, sp, #16
3317; HARD-NEXT:    ldr r4, [r0]
3318; HARD-NEXT:    mov r5, r1
3319; HARD-NEXT:    ldmib r0, {r1, r2, r3}
3320; HARD-NEXT:    mov r0, r4
3321; HARD-NEXT:    str r4, [sp]
3322; HARD-NEXT:    stmib sp, {r1, r2, r3}
3323; HARD-NEXT:    bl __addtf3
3324; HARD-NEXT:    bl test_v16i8_f128_helper
3325; HARD-NEXT:    vrev64.8 q8, q0
3326; HARD-NEXT:    vadd.i8 q8, q8, q8
3327; HARD-NEXT:    vrev64.8 q8, q8
3328; HARD-NEXT:    vst1.64 {d16, d17}, [r5]
3329; HARD-NEXT:    add sp, sp, #16
3330; HARD-NEXT:    pop {r4, r5, r11, pc}
3331    %1 = load fp128, ptr %p
3332    %2 = fadd fp128 %1, %1
3333    %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
3334    %4 = add <16 x i8> %3, %3
3335    store <16 x i8> %4, ptr %q
3336    ret void
3337}
3338
3339declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
3340define void @test_v16i8_v2f64(ptr %p, ptr %q) {
3341; SOFT-LABEL: test_v16i8_v2f64:
3342; SOFT:       @ %bb.0:
3343; SOFT-NEXT:    .save {r4, lr}
3344; SOFT-NEXT:    push {r4, lr}
3345; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3346; SOFT-NEXT:    mov r4, r1
3347; SOFT-NEXT:    vadd.f64 d18, d16, d16
3348; SOFT-NEXT:    vadd.f64 d16, d17, d17
3349; SOFT-NEXT:    vmov r1, r0, d18
3350; SOFT-NEXT:    vmov r3, r2, d16
3351; SOFT-NEXT:    bl test_v16i8_v2f64_helper
3352; SOFT-NEXT:    vmov d17, r3, r2
3353; SOFT-NEXT:    vmov d16, r1, r0
3354; SOFT-NEXT:    vrev64.8 q8, q8
3355; SOFT-NEXT:    vadd.i8 q8, q8, q8
3356; SOFT-NEXT:    vrev64.8 q8, q8
3357; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3358; SOFT-NEXT:    pop {r4, pc}
3359;
3360; HARD-LABEL: test_v16i8_v2f64:
3361; HARD:       @ %bb.0:
3362; HARD-NEXT:    .save {r4, lr}
3363; HARD-NEXT:    push {r4, lr}
3364; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3365; HARD-NEXT:    mov r4, r1
3366; HARD-NEXT:    vadd.f64 d1, d17, d17
3367; HARD-NEXT:    vadd.f64 d0, d16, d16
3368; HARD-NEXT:    bl test_v16i8_v2f64_helper
3369; HARD-NEXT:    vrev64.8 q8, q0
3370; HARD-NEXT:    vadd.i8 q8, q8, q8
3371; HARD-NEXT:    vrev64.8 q8, q8
3372; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3373; HARD-NEXT:    pop {r4, pc}
3374    %1 = load <2 x double>, ptr %p
3375    %2 = fadd <2 x double> %1, %1
3376    %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
3377    %4 = add <16 x i8> %3, %3
3378    store <16 x i8> %4, ptr %q
3379    ret void
3380}
3381
3382declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
3383define void @test_v16i8_v2i64(ptr %p, ptr %q) {
3384; SOFT-LABEL: test_v16i8_v2i64:
3385; SOFT:       @ %bb.0:
3386; SOFT-NEXT:    .save {r4, lr}
3387; SOFT-NEXT:    push {r4, lr}
3388; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3389; SOFT-NEXT:    mov r4, r1
3390; SOFT-NEXT:    vadd.i64 q8, q8, q8
3391; SOFT-NEXT:    vmov r1, r0, d16
3392; SOFT-NEXT:    vmov r3, r2, d17
3393; SOFT-NEXT:    bl test_v16i8_v2i64_helper
3394; SOFT-NEXT:    vmov d17, r3, r2
3395; SOFT-NEXT:    vmov d16, r1, r0
3396; SOFT-NEXT:    vrev64.8 q8, q8
3397; SOFT-NEXT:    vadd.i8 q8, q8, q8
3398; SOFT-NEXT:    vrev64.8 q8, q8
3399; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3400; SOFT-NEXT:    pop {r4, pc}
3401;
3402; HARD-LABEL: test_v16i8_v2i64:
3403; HARD:       @ %bb.0:
3404; HARD-NEXT:    .save {r4, lr}
3405; HARD-NEXT:    push {r4, lr}
3406; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3407; HARD-NEXT:    mov r4, r1
3408; HARD-NEXT:    vadd.i64 q0, q8, q8
3409; HARD-NEXT:    bl test_v16i8_v2i64_helper
3410; HARD-NEXT:    vrev64.8 q8, q0
3411; HARD-NEXT:    vadd.i8 q8, q8, q8
3412; HARD-NEXT:    vrev64.8 q8, q8
3413; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3414; HARD-NEXT:    pop {r4, pc}
3415    %1 = load <2 x i64>, ptr %p
3416    %2 = add <2 x i64> %1, %1
3417    %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
3418    %4 = add <16 x i8> %3, %3
3419    store <16 x i8> %4, ptr %q
3420    ret void
3421}
3422
3423declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
3424define void @test_v16i8_v4f32(ptr %p, ptr %q) {
3425; SOFT-LABEL: test_v16i8_v4f32:
3426; SOFT:       @ %bb.0:
3427; SOFT-NEXT:    .save {r4, lr}
3428; SOFT-NEXT:    push {r4, lr}
3429; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3430; SOFT-NEXT:    mov r4, r1
3431; SOFT-NEXT:    vrev64.32 q8, q8
3432; SOFT-NEXT:    vadd.f32 q8, q8, q8
3433; SOFT-NEXT:    vrev64.32 q8, q8
3434; SOFT-NEXT:    vmov r1, r0, d16
3435; SOFT-NEXT:    vmov r3, r2, d17
3436; SOFT-NEXT:    bl test_v16i8_v4f32_helper
3437; SOFT-NEXT:    vmov d17, r3, r2
3438; SOFT-NEXT:    vmov d16, r1, r0
3439; SOFT-NEXT:    vrev64.8 q8, q8
3440; SOFT-NEXT:    vadd.i8 q8, q8, q8
3441; SOFT-NEXT:    vrev64.8 q8, q8
3442; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3443; SOFT-NEXT:    pop {r4, pc}
3444;
3445; HARD-LABEL: test_v16i8_v4f32:
3446; HARD:       @ %bb.0:
3447; HARD-NEXT:    .save {r4, lr}
3448; HARD-NEXT:    push {r4, lr}
3449; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3450; HARD-NEXT:    mov r4, r1
3451; HARD-NEXT:    vrev64.32 q8, q8
3452; HARD-NEXT:    vadd.f32 q8, q8, q8
3453; HARD-NEXT:    vrev64.32 q0, q8
3454; HARD-NEXT:    bl test_v16i8_v4f32_helper
3455; HARD-NEXT:    vrev64.8 q8, q0
3456; HARD-NEXT:    vadd.i8 q8, q8, q8
3457; HARD-NEXT:    vrev64.8 q8, q8
3458; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3459; HARD-NEXT:    pop {r4, pc}
3460    %1 = load <4 x float>, ptr %p
3461    %2 = fadd <4 x float> %1, %1
3462    %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
3463    %4 = add <16 x i8> %3, %3
3464    store <16 x i8> %4, ptr %q
3465    ret void
3466}
3467
3468declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
3469define void @test_v16i8_v4i32(ptr %p, ptr %q) {
3470; SOFT-LABEL: test_v16i8_v4i32:
3471; SOFT:       @ %bb.0:
3472; SOFT-NEXT:    .save {r4, lr}
3473; SOFT-NEXT:    push {r4, lr}
3474; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3475; SOFT-NEXT:    mov r4, r1
3476; SOFT-NEXT:    vrev64.32 q8, q8
3477; SOFT-NEXT:    vadd.i32 q8, q8, q8
3478; SOFT-NEXT:    vrev64.32 q8, q8
3479; SOFT-NEXT:    vmov r1, r0, d16
3480; SOFT-NEXT:    vmov r3, r2, d17
3481; SOFT-NEXT:    bl test_v16i8_v4i32_helper
3482; SOFT-NEXT:    vmov d17, r3, r2
3483; SOFT-NEXT:    vmov d16, r1, r0
3484; SOFT-NEXT:    vrev64.8 q8, q8
3485; SOFT-NEXT:    vadd.i8 q8, q8, q8
3486; SOFT-NEXT:    vrev64.8 q8, q8
3487; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3488; SOFT-NEXT:    pop {r4, pc}
3489;
3490; HARD-LABEL: test_v16i8_v4i32:
3491; HARD:       @ %bb.0:
3492; HARD-NEXT:    .save {r4, lr}
3493; HARD-NEXT:    push {r4, lr}
3494; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3495; HARD-NEXT:    mov r4, r1
3496; HARD-NEXT:    vrev64.32 q8, q8
3497; HARD-NEXT:    vadd.i32 q8, q8, q8
3498; HARD-NEXT:    vrev64.32 q0, q8
3499; HARD-NEXT:    bl test_v16i8_v4i32_helper
3500; HARD-NEXT:    vrev64.8 q8, q0
3501; HARD-NEXT:    vadd.i8 q8, q8, q8
3502; HARD-NEXT:    vrev64.8 q8, q8
3503; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3504; HARD-NEXT:    pop {r4, pc}
3505    %1 = load <4 x i32>, ptr %p
3506    %2 = add <4 x i32> %1, %1
3507    %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
3508    %4 = add <16 x i8> %3, %3
3509    store <16 x i8> %4, ptr %q
3510    ret void
3511}
3512
3513declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
3514define void @test_v16i8_v8i16(ptr %p, ptr %q) {
3515; SOFT-LABEL: test_v16i8_v8i16:
3516; SOFT:       @ %bb.0:
3517; SOFT-NEXT:    .save {r4, lr}
3518; SOFT-NEXT:    push {r4, lr}
3519; SOFT-NEXT:    vld1.64 {d16, d17}, [r0]
3520; SOFT-NEXT:    mov r4, r1
3521; SOFT-NEXT:    vrev64.16 q8, q8
3522; SOFT-NEXT:    vadd.i16 q8, q8, q8
3523; SOFT-NEXT:    vrev64.16 q8, q8
3524; SOFT-NEXT:    vmov r1, r0, d16
3525; SOFT-NEXT:    vmov r3, r2, d17
3526; SOFT-NEXT:    bl test_v16i8_v8i16_helper
3527; SOFT-NEXT:    vmov d17, r3, r2
3528; SOFT-NEXT:    vmov d16, r1, r0
3529; SOFT-NEXT:    vrev64.8 q8, q8
3530; SOFT-NEXT:    vadd.i8 q8, q8, q8
3531; SOFT-NEXT:    vrev64.8 q8, q8
3532; SOFT-NEXT:    vst1.64 {d16, d17}, [r4]
3533; SOFT-NEXT:    pop {r4, pc}
3534;
3535; HARD-LABEL: test_v16i8_v8i16:
3536; HARD:       @ %bb.0:
3537; HARD-NEXT:    .save {r4, lr}
3538; HARD-NEXT:    push {r4, lr}
3539; HARD-NEXT:    vld1.64 {d16, d17}, [r0]
3540; HARD-NEXT:    mov r4, r1
3541; HARD-NEXT:    vrev64.16 q8, q8
3542; HARD-NEXT:    vadd.i16 q8, q8, q8
3543; HARD-NEXT:    vrev64.16 q0, q8
3544; HARD-NEXT:    bl test_v16i8_v8i16_helper
3545; HARD-NEXT:    vrev64.8 q8, q0
3546; HARD-NEXT:    vadd.i8 q8, q8, q8
3547; HARD-NEXT:    vrev64.8 q8, q8
3548; HARD-NEXT:    vst1.64 {d16, d17}, [r4]
3549; HARD-NEXT:    pop {r4, pc}
3550    %1 = load <8 x i16>, ptr %p
3551    %2 = add <8 x i16> %1, %1
3552    %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
3553    %4 = add <16 x i8> %3, %3
3554    store <16 x i8> %4, ptr %q
3555    ret void
3556}
3557