xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll (revision 8e0cd7382adacd8bc1741dc26bc0be6bdf8e238a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6
7target triple = "aarch64-unknown-linux-gnu"
8
9define void @add_v4i8(ptr %a, ptr %b) {
10; CHECK-LABEL: add_v4i8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    ptrue p0.h, vl4
13; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
14; CHECK-NEXT:    ld1b { z1.h }, p0/z, [x1]
15; CHECK-NEXT:    add z0.h, z0.h, z1.h
16; CHECK-NEXT:    st1b { z0.h }, p0, [x0]
17; CHECK-NEXT:    ret
18;
19; NONEON-NOSVE-LABEL: add_v4i8:
20; NONEON-NOSVE:       // %bb.0:
21; NONEON-NOSVE-NEXT:    ldrb w8, [x0, #3]
22; NONEON-NOSVE-NEXT:    ldrb w9, [x1, #3]
23; NONEON-NOSVE-NEXT:    ldrb w10, [x0, #2]
24; NONEON-NOSVE-NEXT:    ldrb w11, [x0, #1]
25; NONEON-NOSVE-NEXT:    ldrb w12, [x1, #2]
26; NONEON-NOSVE-NEXT:    ldrb w13, [x0]
27; NONEON-NOSVE-NEXT:    add w8, w8, w9
28; NONEON-NOSVE-NEXT:    ldrb w14, [x1, #1]
29; NONEON-NOSVE-NEXT:    ldrb w9, [x1]
30; NONEON-NOSVE-NEXT:    add w10, w10, w12
31; NONEON-NOSVE-NEXT:    strb w8, [x0, #3]
32; NONEON-NOSVE-NEXT:    add w8, w11, w14
33; NONEON-NOSVE-NEXT:    add w9, w13, w9
34; NONEON-NOSVE-NEXT:    strb w10, [x0, #2]
35; NONEON-NOSVE-NEXT:    strb w8, [x0, #1]
36; NONEON-NOSVE-NEXT:    strb w9, [x0]
37; NONEON-NOSVE-NEXT:    ret
38  %op1 = load <4 x i8>, ptr %a
39  %op2 = load <4 x i8>, ptr %b
40  %res = add <4 x i8> %op1, %op2
41  store <4 x i8> %res, ptr %a
42  ret void
43}
44
45define void @add_v8i8(ptr %a, ptr %b) {
46; CHECK-LABEL: add_v8i8:
47; CHECK:       // %bb.0:
48; CHECK-NEXT:    ldr d0, [x0]
49; CHECK-NEXT:    ldr d1, [x1]
50; CHECK-NEXT:    add z0.b, z0.b, z1.b
51; CHECK-NEXT:    str d0, [x0]
52; CHECK-NEXT:    ret
53;
54; NONEON-NOSVE-LABEL: add_v8i8:
55; NONEON-NOSVE:       // %bb.0:
56; NONEON-NOSVE-NEXT:    sub sp, sp, #32
57; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
58; NONEON-NOSVE-NEXT:    ldr d0, [x1]
59; NONEON-NOSVE-NEXT:    ldr d1, [x0]
60; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #8]
61; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
62; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
63; NONEON-NOSVE-NEXT:    add w8, w9, w8
64; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
65; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
66; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
67; NONEON-NOSVE-NEXT:    add w8, w9, w8
68; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
69; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
70; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
71; NONEON-NOSVE-NEXT:    add w8, w9, w8
72; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
73; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
74; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
75; NONEON-NOSVE-NEXT:    add w8, w9, w8
76; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
77; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
78; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
79; NONEON-NOSVE-NEXT:    add w8, w9, w8
80; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
81; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
82; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
83; NONEON-NOSVE-NEXT:    add w8, w9, w8
84; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
85; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
86; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
87; NONEON-NOSVE-NEXT:    add w8, w9, w8
88; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
89; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
90; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
91; NONEON-NOSVE-NEXT:    add w8, w9, w8
92; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
93; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
94; NONEON-NOSVE-NEXT:    str d0, [x0]
95; NONEON-NOSVE-NEXT:    add sp, sp, #32
96; NONEON-NOSVE-NEXT:    ret
97  %op1 = load <8 x i8>, ptr %a
98  %op2 = load <8 x i8>, ptr %b
99  %res = add <8 x i8> %op1, %op2
100  store <8 x i8> %res, ptr %a
101  ret void
102}
103
104define void @add_v16i8(ptr %a, ptr %b) {
105; CHECK-LABEL: add_v16i8:
106; CHECK:       // %bb.0:
107; CHECK-NEXT:    ldr q0, [x0]
108; CHECK-NEXT:    ldr q1, [x1]
109; CHECK-NEXT:    add z0.b, z0.b, z1.b
110; CHECK-NEXT:    str q0, [x0]
111; CHECK-NEXT:    ret
112;
113; NONEON-NOSVE-LABEL: add_v16i8:
114; NONEON-NOSVE:       // %bb.0:
115; NONEON-NOSVE-NEXT:    ldr q0, [x1]
116; NONEON-NOSVE-NEXT:    ldr q1, [x0]
117; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
118; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
119; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
120; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
121; NONEON-NOSVE-NEXT:    add w8, w9, w8
122; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
123; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
124; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
125; NONEON-NOSVE-NEXT:    add w8, w9, w8
126; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
127; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
128; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
129; NONEON-NOSVE-NEXT:    add w8, w9, w8
130; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
131; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
132; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
133; NONEON-NOSVE-NEXT:    add w8, w9, w8
134; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
135; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
136; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
137; NONEON-NOSVE-NEXT:    add w8, w9, w8
138; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
139; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
140; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
141; NONEON-NOSVE-NEXT:    add w8, w9, w8
142; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
143; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
144; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
145; NONEON-NOSVE-NEXT:    add w8, w9, w8
146; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
147; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
148; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
149; NONEON-NOSVE-NEXT:    add w8, w9, w8
150; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
151; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
152; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
153; NONEON-NOSVE-NEXT:    add w8, w9, w8
154; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
155; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
156; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
157; NONEON-NOSVE-NEXT:    add w8, w9, w8
158; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
159; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
160; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
161; NONEON-NOSVE-NEXT:    add w8, w9, w8
162; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
163; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
164; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
165; NONEON-NOSVE-NEXT:    add w8, w9, w8
166; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
167; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
168; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
169; NONEON-NOSVE-NEXT:    add w8, w9, w8
170; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
171; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
172; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
173; NONEON-NOSVE-NEXT:    add w8, w9, w8
174; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
175; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
176; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
177; NONEON-NOSVE-NEXT:    add w8, w9, w8
178; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
179; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
180; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
181; NONEON-NOSVE-NEXT:    add w8, w9, w8
182; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
183; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
184; NONEON-NOSVE-NEXT:    str q0, [x0]
185; NONEON-NOSVE-NEXT:    add sp, sp, #48
186; NONEON-NOSVE-NEXT:    ret
187  %op1 = load <16 x i8>, ptr %a
188  %op2 = load <16 x i8>, ptr %b
189  %res = add <16 x i8> %op1, %op2
190  store <16 x i8> %res, ptr %a
191  ret void
192}
193
194define void @add_v32i8(ptr %a, ptr %b) {
195; CHECK-LABEL: add_v32i8:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    ldp q0, q3, [x1]
198; CHECK-NEXT:    ldp q1, q2, [x0]
199; CHECK-NEXT:    add z0.b, z1.b, z0.b
200; CHECK-NEXT:    add z1.b, z2.b, z3.b
201; CHECK-NEXT:    stp q0, q1, [x0]
202; CHECK-NEXT:    ret
203;
204; NONEON-NOSVE-LABEL: add_v32i8:
205; NONEON-NOSVE:       // %bb.0:
206; NONEON-NOSVE-NEXT:    sub sp, sp, #96
207; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
208; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
209; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
210; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
211; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
212; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
213; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #47]
214; NONEON-NOSVE-NEXT:    add w8, w9, w8
215; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
216; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
217; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
218; NONEON-NOSVE-NEXT:    add w8, w9, w8
219; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #45]
220; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
221; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
222; NONEON-NOSVE-NEXT:    add w8, w9, w8
223; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
224; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
225; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
226; NONEON-NOSVE-NEXT:    add w8, w9, w8
227; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #43]
228; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
229; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
230; NONEON-NOSVE-NEXT:    add w8, w9, w8
231; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
232; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
233; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
234; NONEON-NOSVE-NEXT:    add w8, w9, w8
235; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
236; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
237; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
238; NONEON-NOSVE-NEXT:    add w8, w9, w8
239; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
240; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
241; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
242; NONEON-NOSVE-NEXT:    add w8, w9, w8
243; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #39]
244; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
245; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
246; NONEON-NOSVE-NEXT:    add w8, w9, w8
247; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
248; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
249; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
250; NONEON-NOSVE-NEXT:    add w8, w9, w8
251; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #37]
252; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
253; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
254; NONEON-NOSVE-NEXT:    add w8, w9, w8
255; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
256; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
257; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
258; NONEON-NOSVE-NEXT:    add w8, w9, w8
259; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #35]
260; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
261; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
262; NONEON-NOSVE-NEXT:    add w8, w9, w8
263; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
264; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
265; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
266; NONEON-NOSVE-NEXT:    add w8, w9, w8
267; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #33]
268; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
269; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
270; NONEON-NOSVE-NEXT:    add w8, w9, w8
271; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
272; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
273; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
274; NONEON-NOSVE-NEXT:    add w8, w9, w8
275; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
276; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
277; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
278; NONEON-NOSVE-NEXT:    add w8, w9, w8
279; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
280; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
281; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
282; NONEON-NOSVE-NEXT:    add w8, w9, w8
283; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
284; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
285; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
286; NONEON-NOSVE-NEXT:    add w8, w9, w8
287; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
288; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
289; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
290; NONEON-NOSVE-NEXT:    add w8, w9, w8
291; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
292; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
293; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
294; NONEON-NOSVE-NEXT:    add w8, w9, w8
295; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
296; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
297; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
298; NONEON-NOSVE-NEXT:    add w8, w9, w8
299; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
300; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
301; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
302; NONEON-NOSVE-NEXT:    add w8, w9, w8
303; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
304; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
305; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
306; NONEON-NOSVE-NEXT:    add w8, w9, w8
307; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
308; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
309; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
310; NONEON-NOSVE-NEXT:    add w8, w9, w8
311; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
312; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
313; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
314; NONEON-NOSVE-NEXT:    add w8, w9, w8
315; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
316; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
317; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
318; NONEON-NOSVE-NEXT:    add w8, w9, w8
319; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
320; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
321; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
322; NONEON-NOSVE-NEXT:    add w8, w9, w8
323; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
324; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
325; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
326; NONEON-NOSVE-NEXT:    add w8, w9, w8
327; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
328; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
329; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
330; NONEON-NOSVE-NEXT:    add w8, w9, w8
331; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
332; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
333; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
334; NONEON-NOSVE-NEXT:    add w8, w9, w8
335; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
336; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
337; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
338; NONEON-NOSVE-NEXT:    add w8, w9, w8
339; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
340; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
341; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
342; NONEON-NOSVE-NEXT:    add sp, sp, #96
343; NONEON-NOSVE-NEXT:    ret
344  %op1 = load <32 x i8>, ptr %a
345  %op2 = load <32 x i8>, ptr %b
346  %res = add <32 x i8> %op1, %op2
347  store <32 x i8> %res, ptr %a
348  ret void
349}
350
351define void @add_v2i16(ptr %a, ptr %b, ptr %c) {
352; CHECK-LABEL: add_v2i16:
353; CHECK:       // %bb.0:
354; CHECK-NEXT:    ptrue p0.s, vl2
355; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
356; CHECK-NEXT:    ld1h { z1.s }, p0/z, [x1]
357; CHECK-NEXT:    add z0.s, z0.s, z1.s
358; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
359; CHECK-NEXT:    ret
360;
361; NONEON-NOSVE-LABEL: add_v2i16:
362; NONEON-NOSVE:       // %bb.0:
363; NONEON-NOSVE-NEXT:    ldrh w8, [x0]
364; NONEON-NOSVE-NEXT:    ldrh w9, [x1]
365; NONEON-NOSVE-NEXT:    ldrh w10, [x0, #2]
366; NONEON-NOSVE-NEXT:    ldrh w11, [x1, #2]
367; NONEON-NOSVE-NEXT:    add w8, w8, w9
368; NONEON-NOSVE-NEXT:    add w9, w10, w11
369; NONEON-NOSVE-NEXT:    strh w8, [x0]
370; NONEON-NOSVE-NEXT:    strh w9, [x0, #2]
371; NONEON-NOSVE-NEXT:    ret
372  %op1 = load <2 x i16>, ptr %a
373  %op2 = load <2 x i16>, ptr %b
374  %res = add <2 x i16> %op1, %op2
375  store <2 x i16> %res, ptr %a
376  ret void
377}
378
379define void @add_v4i16(ptr %a, ptr %b, ptr %c) {
380; CHECK-LABEL: add_v4i16:
381; CHECK:       // %bb.0:
382; CHECK-NEXT:    ldr d0, [x0]
383; CHECK-NEXT:    ldr d1, [x1]
384; CHECK-NEXT:    add z0.h, z0.h, z1.h
385; CHECK-NEXT:    str d0, [x0]
386; CHECK-NEXT:    ret
387;
388; NONEON-NOSVE-LABEL: add_v4i16:
389; NONEON-NOSVE:       // %bb.0:
390; NONEON-NOSVE-NEXT:    sub sp, sp, #32
391; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
392; NONEON-NOSVE-NEXT:    ldr d0, [x1]
393; NONEON-NOSVE-NEXT:    ldr d1, [x0]
394; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #8]
395; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
396; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
397; NONEON-NOSVE-NEXT:    add w8, w9, w8
398; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
399; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
400; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
401; NONEON-NOSVE-NEXT:    add w8, w9, w8
402; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
403; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
404; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
405; NONEON-NOSVE-NEXT:    add w8, w9, w8
406; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
407; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
408; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
409; NONEON-NOSVE-NEXT:    add w8, w9, w8
410; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
411; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
412; NONEON-NOSVE-NEXT:    str d0, [x0]
413; NONEON-NOSVE-NEXT:    add sp, sp, #32
414; NONEON-NOSVE-NEXT:    ret
415  %op1 = load <4 x i16>, ptr %a
416  %op2 = load <4 x i16>, ptr %b
417  %res = add <4 x i16> %op1, %op2
418  store <4 x i16> %res, ptr %a
419  ret void
420}
421
422define void @add_v8i16(ptr %a, ptr %b, ptr %c) {
423; CHECK-LABEL: add_v8i16:
424; CHECK:       // %bb.0:
425; CHECK-NEXT:    ldr q0, [x0]
426; CHECK-NEXT:    ldr q1, [x1]
427; CHECK-NEXT:    add z0.h, z0.h, z1.h
428; CHECK-NEXT:    str q0, [x0]
429; CHECK-NEXT:    ret
430;
431; NONEON-NOSVE-LABEL: add_v8i16:
432; NONEON-NOSVE:       // %bb.0:
433; NONEON-NOSVE-NEXT:    ldr q0, [x1]
434; NONEON-NOSVE-NEXT:    ldr q1, [x0]
435; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
436; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
437; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
438; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
439; NONEON-NOSVE-NEXT:    add w8, w9, w8
440; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
441; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
442; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
443; NONEON-NOSVE-NEXT:    add w8, w9, w8
444; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
445; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
446; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
447; NONEON-NOSVE-NEXT:    add w8, w9, w8
448; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
449; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
450; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
451; NONEON-NOSVE-NEXT:    add w8, w9, w8
452; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
453; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
454; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
455; NONEON-NOSVE-NEXT:    add w8, w9, w8
456; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
457; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
458; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
459; NONEON-NOSVE-NEXT:    add w8, w9, w8
460; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
461; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
462; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
463; NONEON-NOSVE-NEXT:    add w8, w9, w8
464; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
465; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
466; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
467; NONEON-NOSVE-NEXT:    add w8, w9, w8
468; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
469; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
470; NONEON-NOSVE-NEXT:    str q0, [x0]
471; NONEON-NOSVE-NEXT:    add sp, sp, #48
472; NONEON-NOSVE-NEXT:    ret
473  %op1 = load <8 x i16>, ptr %a
474  %op2 = load <8 x i16>, ptr %b
475  %res = add <8 x i16> %op1, %op2
476  store <8 x i16> %res, ptr %a
477  ret void
478}
479
480define void @add_v16i16(ptr %a, ptr %b, ptr %c) {
481; CHECK-LABEL: add_v16i16:
482; CHECK:       // %bb.0:
483; CHECK-NEXT:    ldp q0, q3, [x1]
484; CHECK-NEXT:    ldp q1, q2, [x0]
485; CHECK-NEXT:    add z0.h, z1.h, z0.h
486; CHECK-NEXT:    add z1.h, z2.h, z3.h
487; CHECK-NEXT:    stp q0, q1, [x0]
488; CHECK-NEXT:    ret
489;
490; NONEON-NOSVE-LABEL: add_v16i16:
491; NONEON-NOSVE:       // %bb.0:
492; NONEON-NOSVE-NEXT:    sub sp, sp, #96
493; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
494; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
495; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
496; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
497; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
498; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
499; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
500; NONEON-NOSVE-NEXT:    add w8, w9, w8
501; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #44]
502; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
503; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
504; NONEON-NOSVE-NEXT:    add w8, w9, w8
505; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
506; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
507; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
508; NONEON-NOSVE-NEXT:    add w8, w9, w8
509; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #40]
510; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
511; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
512; NONEON-NOSVE-NEXT:    add w8, w9, w8
513; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #38]
514; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
515; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
516; NONEON-NOSVE-NEXT:    add w8, w9, w8
517; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #36]
518; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
519; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
520; NONEON-NOSVE-NEXT:    add w8, w9, w8
521; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #34]
522; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
523; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
524; NONEON-NOSVE-NEXT:    add w8, w9, w8
525; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
526; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
527; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
528; NONEON-NOSVE-NEXT:    add w8, w9, w8
529; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
530; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
531; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
532; NONEON-NOSVE-NEXT:    add w8, w9, w8
533; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
534; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
535; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
536; NONEON-NOSVE-NEXT:    add w8, w9, w8
537; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
538; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
539; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
540; NONEON-NOSVE-NEXT:    add w8, w9, w8
541; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
542; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
543; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
544; NONEON-NOSVE-NEXT:    add w8, w9, w8
545; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
546; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
547; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
548; NONEON-NOSVE-NEXT:    add w8, w9, w8
549; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
550; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
551; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
552; NONEON-NOSVE-NEXT:    add w8, w9, w8
553; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
554; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
555; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
556; NONEON-NOSVE-NEXT:    add w8, w9, w8
557; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
558; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
559; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
560; NONEON-NOSVE-NEXT:    add w8, w9, w8
561; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
562; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
563; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
564; NONEON-NOSVE-NEXT:    add sp, sp, #96
565; NONEON-NOSVE-NEXT:    ret
566  %op1 = load <16 x i16>, ptr %a
567  %op2 = load <16 x i16>, ptr %b
568  %res = add <16 x i16> %op1, %op2
569  store <16 x i16> %res, ptr %a
570  ret void
571}
572
573define void @abs_v2i32(ptr %a) {
574; CHECK-LABEL: abs_v2i32:
575; CHECK:       // %bb.0:
576; CHECK-NEXT:    ptrue p0.s, vl2
577; CHECK-NEXT:    ldr d0, [x0]
578; CHECK-NEXT:    abs z0.s, p0/m, z0.s
579; CHECK-NEXT:    str d0, [x0]
580; CHECK-NEXT:    ret
581;
582; NONEON-NOSVE-LABEL: abs_v2i32:
583; NONEON-NOSVE:       // %bb.0:
584; NONEON-NOSVE-NEXT:    ldr d0, [x0]
585; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
586; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
587; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
588; NONEON-NOSVE-NEXT:    cmp w8, #0
589; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
590; NONEON-NOSVE-NEXT:    ldr w8, [sp]
591; NONEON-NOSVE-NEXT:    cmp w8, #0
592; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
593; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8]
594; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
595; NONEON-NOSVE-NEXT:    str d0, [x0]
596; NONEON-NOSVE-NEXT:    add sp, sp, #16
597; NONEON-NOSVE-NEXT:    ret
598  %op1 = load <2 x i32>, ptr %a
599  %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false)
600  store <2 x i32> %res, ptr %a
601  ret void
602}
603
604define void @abs_v4i32(ptr %a) {
605; CHECK-LABEL: abs_v4i32:
606; CHECK:       // %bb.0:
607; CHECK-NEXT:    ptrue p0.s, vl4
608; CHECK-NEXT:    ldr q0, [x0]
609; CHECK-NEXT:    abs z0.s, p0/m, z0.s
610; CHECK-NEXT:    str q0, [x0]
611; CHECK-NEXT:    ret
612;
613; NONEON-NOSVE-LABEL: abs_v4i32:
614; NONEON-NOSVE:       // %bb.0:
615; NONEON-NOSVE-NEXT:    ldr q0, [x0]
616; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
617; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
618; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
619; NONEON-NOSVE-NEXT:    cmp w8, #0
620; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
621; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
622; NONEON-NOSVE-NEXT:    cmp w8, #0
623; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
624; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
625; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
626; NONEON-NOSVE-NEXT:    cmp w8, #0
627; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
628; NONEON-NOSVE-NEXT:    ldr w8, [sp]
629; NONEON-NOSVE-NEXT:    cmp w8, #0
630; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
631; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16]
632; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
633; NONEON-NOSVE-NEXT:    str q0, [x0]
634; NONEON-NOSVE-NEXT:    add sp, sp, #32
635; NONEON-NOSVE-NEXT:    ret
636  %op1 = load <4 x i32>, ptr %a
637  %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false)
638  store <4 x i32> %res, ptr %a
639  ret void
640}
641
642define void @abs_v8i32(ptr %a) {
643; CHECK-LABEL: abs_v8i32:
644; CHECK:       // %bb.0:
645; CHECK-NEXT:    ldp q0, q1, [x0]
646; CHECK-NEXT:    ptrue p0.s, vl4
647; CHECK-NEXT:    abs z0.s, p0/m, z0.s
648; CHECK-NEXT:    abs z1.s, p0/m, z1.s
649; CHECK-NEXT:    stp q0, q1, [x0]
650; CHECK-NEXT:    ret
651;
652; NONEON-NOSVE-LABEL: abs_v8i32:
653; NONEON-NOSVE:       // %bb.0:
654; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
655; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
656; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
657; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
658; NONEON-NOSVE-NEXT:    cmp w8, #0
659; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
660; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
661; NONEON-NOSVE-NEXT:    cmp w8, #0
662; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
663; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56]
664; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
665; NONEON-NOSVE-NEXT:    cmp w8, #0
666; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
667; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
668; NONEON-NOSVE-NEXT:    cmp w8, #0
669; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
670; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48]
671; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12]
672; NONEON-NOSVE-NEXT:    cmp w8, #0
673; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
674; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8]
675; NONEON-NOSVE-NEXT:    cmp w8, #0
676; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
677; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
678; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
679; NONEON-NOSVE-NEXT:    cmp w8, #0
680; NONEON-NOSVE-NEXT:    cneg w9, w8, mi
681; NONEON-NOSVE-NEXT:    ldr w8, [sp]
682; NONEON-NOSVE-NEXT:    cmp w8, #0
683; NONEON-NOSVE-NEXT:    cneg w8, w8, mi
684; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32]
685; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
686; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
687; NONEON-NOSVE-NEXT:    add sp, sp, #64
688; NONEON-NOSVE-NEXT:    ret
689  %op1 = load <8 x i32>, ptr %a
690  %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false)
691  store <8 x i32> %res, ptr %a
692  ret void
693}
694
695define void @abs_v2i64(ptr %a) {
696; CHECK-LABEL: abs_v2i64:
697; CHECK:       // %bb.0:
698; CHECK-NEXT:    ptrue p0.d, vl2
699; CHECK-NEXT:    ldr q0, [x0]
700; CHECK-NEXT:    abs z0.d, p0/m, z0.d
701; CHECK-NEXT:    str q0, [x0]
702; CHECK-NEXT:    ret
703;
704; NONEON-NOSVE-LABEL: abs_v2i64:
705; NONEON-NOSVE:       // %bb.0:
706; NONEON-NOSVE-NEXT:    ldr q0, [x0]
707; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
708; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
709; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
710; NONEON-NOSVE-NEXT:    cmp x8, #0
711; NONEON-NOSVE-NEXT:    cneg x9, x8, mi
712; NONEON-NOSVE-NEXT:    ldr x8, [sp]
713; NONEON-NOSVE-NEXT:    cmp x8, #0
714; NONEON-NOSVE-NEXT:    cneg x8, x8, mi
715; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #16]
716; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
717; NONEON-NOSVE-NEXT:    str q0, [x0]
718; NONEON-NOSVE-NEXT:    add sp, sp, #32
719; NONEON-NOSVE-NEXT:    ret
720  %op1 = load <2 x i64>, ptr %a
721  %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false)
722  store <2 x i64> %res, ptr %a
723  ret void
724}
725
726define void @abs_v4i64(ptr %a) {
727; CHECK-LABEL: abs_v4i64:
728; CHECK:       // %bb.0:
729; CHECK-NEXT:    ldp q0, q1, [x0]
730; CHECK-NEXT:    ptrue p0.d, vl2
731; CHECK-NEXT:    abs z0.d, p0/m, z0.d
732; CHECK-NEXT:    abs z1.d, p0/m, z1.d
733; CHECK-NEXT:    stp q0, q1, [x0]
734; CHECK-NEXT:    ret
735;
736; NONEON-NOSVE-LABEL: abs_v4i64:
737; NONEON-NOSVE:       // %bb.0:
738; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
739; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
740; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
741; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
742; NONEON-NOSVE-NEXT:    cmp x8, #0
743; NONEON-NOSVE-NEXT:    cneg x9, x8, mi
744; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
745; NONEON-NOSVE-NEXT:    cmp x8, #0
746; NONEON-NOSVE-NEXT:    cneg x8, x8, mi
747; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
748; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
749; NONEON-NOSVE-NEXT:    cmp x8, #0
750; NONEON-NOSVE-NEXT:    cneg x9, x8, mi
751; NONEON-NOSVE-NEXT:    ldr x8, [sp]
752; NONEON-NOSVE-NEXT:    cmp x8, #0
753; NONEON-NOSVE-NEXT:    cneg x8, x8, mi
754; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #32]
755; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
756; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
757; NONEON-NOSVE-NEXT:    add sp, sp, #64
758; NONEON-NOSVE-NEXT:    ret
759  %op1 = load <4 x i64>, ptr %a
760  %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false)
761  store <4 x i64> %res, ptr %a
762  ret void
763}
764
765define void @fadd_v2f16(ptr %a, ptr %b) {
766; CHECK-LABEL: fadd_v2f16:
767; CHECK:       // %bb.0:
768; CHECK-NEXT:    ptrue p0.h, vl4
769; CHECK-NEXT:    ldr s0, [x0]
770; CHECK-NEXT:    ldr s1, [x1]
771; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
772; CHECK-NEXT:    fmov w8, s0
773; CHECK-NEXT:    str w8, [x0]
774; CHECK-NEXT:    ret
775;
776; NONEON-NOSVE-LABEL: fadd_v2f16:
777; NONEON-NOSVE:       // %bb.0:
778; NONEON-NOSVE-NEXT:    ldr w8, [x0]
779; NONEON-NOSVE-NEXT:    str w8, [sp, #-48]!
780; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
781; NONEON-NOSVE-NEXT:    ldr w8, [x1]
782; NONEON-NOSVE-NEXT:    str w8, [sp, #8]
783; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp]
784; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
785; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
786; NONEON-NOSVE-NEXT:    ldr h1, [sp, #18]
787; NONEON-NOSVE-NEXT:    fcvt s0, h0
788; NONEON-NOSVE-NEXT:    fcvt s1, h1
789; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
790; NONEON-NOSVE-NEXT:    ldr h1, [sp, #16]
791; NONEON-NOSVE-NEXT:    fcvt s1, h1
792; NONEON-NOSVE-NEXT:    fcvt h0, s0
793; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
794; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
795; NONEON-NOSVE-NEXT:    fcvt s0, h0
796; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
797; NONEON-NOSVE-NEXT:    fcvt h0, s0
798; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
799; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
800; NONEON-NOSVE-NEXT:    str d0, [sp, #40]
801; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40]
802; NONEON-NOSVE-NEXT:    str w8, [x0]
803; NONEON-NOSVE-NEXT:    add sp, sp, #48
804; NONEON-NOSVE-NEXT:    ret
805  %op1 = load <2 x half>, ptr %a
806  %op2 = load <2 x half>, ptr %b
807  %res = fadd <2 x half> %op1, %op2
808  store <2 x half> %res, ptr %a
809  ret void
810}
811
812define void @fadd_v4f16(ptr %a, ptr %b) {
813; CHECK-LABEL: fadd_v4f16:
814; CHECK:       // %bb.0:
815; CHECK-NEXT:    ptrue p0.h, vl4
816; CHECK-NEXT:    ldr d0, [x0]
817; CHECK-NEXT:    ldr d1, [x1]
818; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
819; CHECK-NEXT:    str d0, [x0]
820; CHECK-NEXT:    ret
821;
822; NONEON-NOSVE-LABEL: fadd_v4f16:
823; NONEON-NOSVE:       // %bb.0:
824; NONEON-NOSVE-NEXT:    sub sp, sp, #32
825; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
826; NONEON-NOSVE-NEXT:    ldr d0, [x1]
827; NONEON-NOSVE-NEXT:    ldr d1, [x0]
828; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #8]
829; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
830; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
831; NONEON-NOSVE-NEXT:    fcvt s0, h0
832; NONEON-NOSVE-NEXT:    fcvt s1, h1
833; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
834; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
835; NONEON-NOSVE-NEXT:    fcvt s1, h1
836; NONEON-NOSVE-NEXT:    fcvt h0, s0
837; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
838; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
839; NONEON-NOSVE-NEXT:    fcvt s0, h0
840; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
841; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
842; NONEON-NOSVE-NEXT:    fcvt s1, h1
843; NONEON-NOSVE-NEXT:    fcvt h0, s0
844; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
845; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
846; NONEON-NOSVE-NEXT:    fcvt s0, h0
847; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
848; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
849; NONEON-NOSVE-NEXT:    fcvt s1, h1
850; NONEON-NOSVE-NEXT:    fcvt h0, s0
851; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
852; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
853; NONEON-NOSVE-NEXT:    fcvt s0, h0
854; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
855; NONEON-NOSVE-NEXT:    fcvt h0, s0
856; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
857; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
858; NONEON-NOSVE-NEXT:    str d0, [x0]
859; NONEON-NOSVE-NEXT:    add sp, sp, #32
860; NONEON-NOSVE-NEXT:    ret
861  %op1 = load <4 x half>, ptr %a
862  %op2 = load <4 x half>, ptr %b
863  %res = fadd <4 x half> %op1, %op2
864  store <4 x half> %res, ptr %a
865  ret void
866}
867
868define void @fadd_v8f16(ptr %a, ptr %b) {
869; CHECK-LABEL: fadd_v8f16:
870; CHECK:       // %bb.0:
871; CHECK-NEXT:    ptrue p0.h, vl8
872; CHECK-NEXT:    ldr q0, [x0]
873; CHECK-NEXT:    ldr q1, [x1]
874; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
875; CHECK-NEXT:    str q0, [x0]
876; CHECK-NEXT:    ret
877;
878; NONEON-NOSVE-LABEL: fadd_v8f16:
879; NONEON-NOSVE:       // %bb.0:
880; NONEON-NOSVE-NEXT:    ldr q0, [x1]
881; NONEON-NOSVE-NEXT:    ldr q1, [x0]
882; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
883; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
884; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
885; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
886; NONEON-NOSVE-NEXT:    fcvt s0, h0
887; NONEON-NOSVE-NEXT:    fcvt s1, h1
888; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
889; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
890; NONEON-NOSVE-NEXT:    fcvt s1, h1
891; NONEON-NOSVE-NEXT:    fcvt h0, s0
892; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
893; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
894; NONEON-NOSVE-NEXT:    fcvt s0, h0
895; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
896; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
897; NONEON-NOSVE-NEXT:    fcvt s1, h1
898; NONEON-NOSVE-NEXT:    fcvt h0, s0
899; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
900; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
901; NONEON-NOSVE-NEXT:    fcvt s0, h0
902; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
903; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
904; NONEON-NOSVE-NEXT:    fcvt s1, h1
905; NONEON-NOSVE-NEXT:    fcvt h0, s0
906; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
907; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
908; NONEON-NOSVE-NEXT:    fcvt s0, h0
909; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
910; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
911; NONEON-NOSVE-NEXT:    fcvt s1, h1
912; NONEON-NOSVE-NEXT:    fcvt h0, s0
913; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
914; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
915; NONEON-NOSVE-NEXT:    fcvt s0, h0
916; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
917; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
918; NONEON-NOSVE-NEXT:    fcvt s1, h1
919; NONEON-NOSVE-NEXT:    fcvt h0, s0
920; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
921; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
922; NONEON-NOSVE-NEXT:    fcvt s0, h0
923; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
924; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
925; NONEON-NOSVE-NEXT:    fcvt s1, h1
926; NONEON-NOSVE-NEXT:    fcvt h0, s0
927; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
928; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
929; NONEON-NOSVE-NEXT:    fcvt s0, h0
930; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
931; NONEON-NOSVE-NEXT:    ldr h1, [sp]
932; NONEON-NOSVE-NEXT:    fcvt s1, h1
933; NONEON-NOSVE-NEXT:    fcvt h0, s0
934; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
935; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
936; NONEON-NOSVE-NEXT:    fcvt s0, h0
937; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
938; NONEON-NOSVE-NEXT:    fcvt h0, s0
939; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
940; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
941; NONEON-NOSVE-NEXT:    str q0, [x0]
942; NONEON-NOSVE-NEXT:    add sp, sp, #48
943; NONEON-NOSVE-NEXT:    ret
944  %op1 = load <8 x half>, ptr %a
945  %op2 = load <8 x half>, ptr %b
946  %res = fadd <8 x half> %op1, %op2
947  store <8 x half> %res, ptr %a
948  ret void
949}
950
951define void @fadd_v16f16(ptr %a, ptr %b) {
952; CHECK-LABEL: fadd_v16f16:
953; CHECK:       // %bb.0:
954; CHECK-NEXT:    ldp q0, q3, [x1]
955; CHECK-NEXT:    ptrue p0.h, vl8
956; CHECK-NEXT:    ldp q1, q2, [x0]
957; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
958; CHECK-NEXT:    movprfx z1, z2
959; CHECK-NEXT:    fadd z1.h, p0/m, z1.h, z3.h
960; CHECK-NEXT:    stp q0, q1, [x0]
961; CHECK-NEXT:    ret
962;
963; NONEON-NOSVE-LABEL: fadd_v16f16:
964; NONEON-NOSVE:       // %bb.0:
965; NONEON-NOSVE-NEXT:    sub sp, sp, #96
966; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
967; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
968; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
969; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
970; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
971; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
972; NONEON-NOSVE-NEXT:    ldr h1, [sp, #46]
973; NONEON-NOSVE-NEXT:    fcvt s0, h0
974; NONEON-NOSVE-NEXT:    fcvt s1, h1
975; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
976; NONEON-NOSVE-NEXT:    ldr h1, [sp, #44]
977; NONEON-NOSVE-NEXT:    fcvt s1, h1
978; NONEON-NOSVE-NEXT:    fcvt h0, s0
979; NONEON-NOSVE-NEXT:    str h0, [sp, #94]
980; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
981; NONEON-NOSVE-NEXT:    fcvt s0, h0
982; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
983; NONEON-NOSVE-NEXT:    ldr h1, [sp, #42]
984; NONEON-NOSVE-NEXT:    fcvt s1, h1
985; NONEON-NOSVE-NEXT:    fcvt h0, s0
986; NONEON-NOSVE-NEXT:    str h0, [sp, #92]
987; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
988; NONEON-NOSVE-NEXT:    fcvt s0, h0
989; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
990; NONEON-NOSVE-NEXT:    ldr h1, [sp, #40]
991; NONEON-NOSVE-NEXT:    fcvt s1, h1
992; NONEON-NOSVE-NEXT:    fcvt h0, s0
993; NONEON-NOSVE-NEXT:    str h0, [sp, #90]
994; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
995; NONEON-NOSVE-NEXT:    fcvt s0, h0
996; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
997; NONEON-NOSVE-NEXT:    ldr h1, [sp, #38]
998; NONEON-NOSVE-NEXT:    fcvt s1, h1
999; NONEON-NOSVE-NEXT:    fcvt h0, s0
1000; NONEON-NOSVE-NEXT:    str h0, [sp, #88]
1001; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
1002; NONEON-NOSVE-NEXT:    fcvt s0, h0
1003; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1004; NONEON-NOSVE-NEXT:    ldr h1, [sp, #36]
1005; NONEON-NOSVE-NEXT:    fcvt s1, h1
1006; NONEON-NOSVE-NEXT:    fcvt h0, s0
1007; NONEON-NOSVE-NEXT:    str h0, [sp, #86]
1008; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
1009; NONEON-NOSVE-NEXT:    fcvt s0, h0
1010; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1011; NONEON-NOSVE-NEXT:    ldr h1, [sp, #34]
1012; NONEON-NOSVE-NEXT:    fcvt s1, h1
1013; NONEON-NOSVE-NEXT:    fcvt h0, s0
1014; NONEON-NOSVE-NEXT:    str h0, [sp, #84]
1015; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
1016; NONEON-NOSVE-NEXT:    fcvt s0, h0
1017; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1018; NONEON-NOSVE-NEXT:    ldr h1, [sp, #32]
1019; NONEON-NOSVE-NEXT:    fcvt s1, h1
1020; NONEON-NOSVE-NEXT:    fcvt h0, s0
1021; NONEON-NOSVE-NEXT:    str h0, [sp, #82]
1022; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
1023; NONEON-NOSVE-NEXT:    fcvt s0, h0
1024; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1025; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1026; NONEON-NOSVE-NEXT:    fcvt s1, h1
1027; NONEON-NOSVE-NEXT:    fcvt h0, s0
1028; NONEON-NOSVE-NEXT:    str h0, [sp, #80]
1029; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
1030; NONEON-NOSVE-NEXT:    fcvt s0, h0
1031; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1032; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1033; NONEON-NOSVE-NEXT:    fcvt s1, h1
1034; NONEON-NOSVE-NEXT:    fcvt h0, s0
1035; NONEON-NOSVE-NEXT:    str h0, [sp, #78]
1036; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
1037; NONEON-NOSVE-NEXT:    fcvt s0, h0
1038; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1039; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1040; NONEON-NOSVE-NEXT:    fcvt s1, h1
1041; NONEON-NOSVE-NEXT:    fcvt h0, s0
1042; NONEON-NOSVE-NEXT:    str h0, [sp, #76]
1043; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
1044; NONEON-NOSVE-NEXT:    fcvt s0, h0
1045; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1046; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1047; NONEON-NOSVE-NEXT:    fcvt s1, h1
1048; NONEON-NOSVE-NEXT:    fcvt h0, s0
1049; NONEON-NOSVE-NEXT:    str h0, [sp, #74]
1050; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
1051; NONEON-NOSVE-NEXT:    fcvt s0, h0
1052; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1053; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
1054; NONEON-NOSVE-NEXT:    fcvt s1, h1
1055; NONEON-NOSVE-NEXT:    fcvt h0, s0
1056; NONEON-NOSVE-NEXT:    str h0, [sp, #72]
1057; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1058; NONEON-NOSVE-NEXT:    fcvt s0, h0
1059; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1060; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
1061; NONEON-NOSVE-NEXT:    fcvt s1, h1
1062; NONEON-NOSVE-NEXT:    fcvt h0, s0
1063; NONEON-NOSVE-NEXT:    str h0, [sp, #70]
1064; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1065; NONEON-NOSVE-NEXT:    fcvt s0, h0
1066; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1067; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1068; NONEON-NOSVE-NEXT:    fcvt s1, h1
1069; NONEON-NOSVE-NEXT:    fcvt h0, s0
1070; NONEON-NOSVE-NEXT:    str h0, [sp, #68]
1071; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1072; NONEON-NOSVE-NEXT:    fcvt s0, h0
1073; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1074; NONEON-NOSVE-NEXT:    ldr h1, [sp]
1075; NONEON-NOSVE-NEXT:    fcvt s1, h1
1076; NONEON-NOSVE-NEXT:    fcvt h0, s0
1077; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
1078; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1079; NONEON-NOSVE-NEXT:    fcvt s0, h0
1080; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1081; NONEON-NOSVE-NEXT:    fcvt h0, s0
1082; NONEON-NOSVE-NEXT:    str h0, [sp, #64]
1083; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1084; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1085; NONEON-NOSVE-NEXT:    add sp, sp, #96
1086; NONEON-NOSVE-NEXT:    ret
1087  %op1 = load <16 x half>, ptr %a
1088  %op2 = load <16 x half>, ptr %b
1089  %res = fadd <16 x half> %op1, %op2
1090  store <16 x half> %res, ptr %a
1091  ret void
1092}
1093
1094define void @fadd_v2f32(ptr %a, ptr %b) {
1095; CHECK-LABEL: fadd_v2f32:
1096; CHECK:       // %bb.0:
1097; CHECK-NEXT:    ptrue p0.s, vl2
1098; CHECK-NEXT:    ldr d0, [x0]
1099; CHECK-NEXT:    ldr d1, [x1]
1100; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
1101; CHECK-NEXT:    str d0, [x0]
1102; CHECK-NEXT:    ret
1103;
1104; NONEON-NOSVE-LABEL: fadd_v2f32:
1105; NONEON-NOSVE:       // %bb.0:
1106; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1107; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1108; NONEON-NOSVE-NEXT:    ldr d0, [x1]
1109; NONEON-NOSVE-NEXT:    ldr d1, [x0]
1110; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #8]
1111; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
1112; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
1113; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1114; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
1115; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1116; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #24]
1117; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1118; NONEON-NOSVE-NEXT:    str d0, [x0]
1119; NONEON-NOSVE-NEXT:    add sp, sp, #32
1120; NONEON-NOSVE-NEXT:    ret
1121  %op1 = load <2 x float>, ptr %a
1122  %op2 = load <2 x float>, ptr %b
1123  %res = fadd <2 x float> %op1, %op2
1124  store <2 x float> %res, ptr %a
1125  ret void
1126}
1127
1128define void @fadd_v4f32(ptr %a, ptr %b) {
1129; CHECK-LABEL: fadd_v4f32:
1130; CHECK:       // %bb.0:
1131; CHECK-NEXT:    ptrue p0.s, vl4
1132; CHECK-NEXT:    ldr q0, [x0]
1133; CHECK-NEXT:    ldr q1, [x1]
1134; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
1135; CHECK-NEXT:    str q0, [x0]
1136; CHECK-NEXT:    ret
1137;
1138; NONEON-NOSVE-LABEL: fadd_v4f32:
1139; NONEON-NOSVE:       // %bb.0:
1140; NONEON-NOSVE-NEXT:    ldr q0, [x1]
1141; NONEON-NOSVE-NEXT:    ldr q1, [x0]
1142; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
1143; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1144; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
1145; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
1146; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1147; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
1148; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1149; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
1150; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #40]
1151; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
1152; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1153; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
1154; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1155; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #32]
1156; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1157; NONEON-NOSVE-NEXT:    str q0, [x0]
1158; NONEON-NOSVE-NEXT:    add sp, sp, #48
1159; NONEON-NOSVE-NEXT:    ret
1160  %op1 = load <4 x float>, ptr %a
1161  %op2 = load <4 x float>, ptr %b
1162  %res = fadd <4 x float> %op1, %op2
1163  store <4 x float> %res, ptr %a
1164  ret void
1165}
1166
1167define void @fadd_v8f32(ptr %a, ptr %b) {
1168; CHECK-LABEL: fadd_v8f32:
1169; CHECK:       // %bb.0:
1170; CHECK-NEXT:    ldp q0, q3, [x1]
1171; CHECK-NEXT:    ptrue p0.s, vl4
1172; CHECK-NEXT:    ldp q1, q2, [x0]
1173; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
1174; CHECK-NEXT:    movprfx z1, z2
1175; CHECK-NEXT:    fadd z1.s, p0/m, z1.s, z3.s
1176; CHECK-NEXT:    stp q0, q1, [x0]
1177; CHECK-NEXT:    ret
1178;
1179; NONEON-NOSVE-LABEL: fadd_v8f32:
1180; NONEON-NOSVE:       // %bb.0:
1181; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1182; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1183; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1184; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1185; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1186; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1187; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #40]
1188; NONEON-NOSVE-NEXT:    ldr s0, [sp, #60]
1189; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1190; NONEON-NOSVE-NEXT:    ldr s0, [sp, #56]
1191; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1192; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #32]
1193; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #88]
1194; NONEON-NOSVE-NEXT:    ldr s0, [sp, #52]
1195; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1196; NONEON-NOSVE-NEXT:    ldr s0, [sp, #48]
1197; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1198; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
1199; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #80]
1200; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
1201; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1202; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
1203; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1204; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
1205; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #72]
1206; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
1207; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
1208; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
1209; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
1210; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #64]
1211; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1212; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1213; NONEON-NOSVE-NEXT:    add sp, sp, #96
1214; NONEON-NOSVE-NEXT:    ret
1215  %op1 = load <8 x float>, ptr %a
1216  %op2 = load <8 x float>, ptr %b
1217  %res = fadd <8 x float> %op1, %op2
1218  store <8 x float> %res, ptr %a
1219  ret void
1220}
1221
1222define void @fadd_v2f64(ptr %a, ptr %b) {
1223; CHECK-LABEL: fadd_v2f64:
1224; CHECK:       // %bb.0:
1225; CHECK-NEXT:    ptrue p0.d, vl2
1226; CHECK-NEXT:    ldr q0, [x0]
1227; CHECK-NEXT:    ldr q1, [x1]
1228; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
1229; CHECK-NEXT:    str q0, [x0]
1230; CHECK-NEXT:    ret
1231;
1232; NONEON-NOSVE-LABEL: fadd_v2f64:
1233; NONEON-NOSVE:       // %bb.0:
1234; NONEON-NOSVE-NEXT:    ldr q0, [x1]
1235; NONEON-NOSVE-NEXT:    ldr q1, [x0]
1236; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
1237; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1238; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
1239; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1240; NONEON-NOSVE-NEXT:    fadd d3, d2, d0
1241; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
1242; NONEON-NOSVE-NEXT:    fadd d0, d1, d0
1243; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #32]
1244; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1245; NONEON-NOSVE-NEXT:    str q0, [x0]
1246; NONEON-NOSVE-NEXT:    add sp, sp, #48
1247; NONEON-NOSVE-NEXT:    ret
1248  %op1 = load <2 x double>, ptr %a
1249  %op2 = load <2 x double>, ptr %b
1250  %res = fadd <2 x double> %op1, %op2
1251  store <2 x double> %res, ptr %a
1252  ret void
1253}
1254
1255define void @fadd_v4f64(ptr %a, ptr %b) {
1256; CHECK-LABEL: fadd_v4f64:
1257; CHECK:       // %bb.0:
1258; CHECK-NEXT:    ldp q0, q3, [x1]
1259; CHECK-NEXT:    ptrue p0.d, vl2
1260; CHECK-NEXT:    ldp q1, q2, [x0]
1261; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
1262; CHECK-NEXT:    movprfx z1, z2
1263; CHECK-NEXT:    fadd z1.d, p0/m, z1.d, z3.d
1264; CHECK-NEXT:    stp q0, q1, [x0]
1265; CHECK-NEXT:    ret
1266;
1267; NONEON-NOSVE-LABEL: fadd_v4f64:
1268; NONEON-NOSVE:       // %bb.0:
1269; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1270; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1271; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1272; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1273; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1274; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1275; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #32]
1276; NONEON-NOSVE-NEXT:    ldr d0, [sp, #56]
1277; NONEON-NOSVE-NEXT:    fadd d3, d2, d0
1278; NONEON-NOSVE-NEXT:    ldr d0, [sp, #48]
1279; NONEON-NOSVE-NEXT:    fadd d0, d1, d0
1280; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
1281; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #80]
1282; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1283; NONEON-NOSVE-NEXT:    fadd d3, d2, d0
1284; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
1285; NONEON-NOSVE-NEXT:    fadd d0, d1, d0
1286; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #64]
1287; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1288; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1289; NONEON-NOSVE-NEXT:    add sp, sp, #96
1290; NONEON-NOSVE-NEXT:    ret
1291  %op1 = load <4 x double>, ptr %a
1292  %op2 = load <4 x double>, ptr %b
1293  %res = fadd <4 x double> %op1, %op2
1294  store <4 x double> %res, ptr %a
1295  ret void
1296}
1297
1298declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
1299declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
1300declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
1301declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
1302declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
1303