xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-div-expand.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
4
5define arm_aapcs_vfpcc <4 x i32> @udiv_i32(<4 x i32> %in1, <4 x i32> %in2) {
6; CHECK-LABEL: udiv_i32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    .save {r4, r5, r7, lr}
9; CHECK-NEXT:    push {r4, r5, r7, lr}
10; CHECK-NEXT:    vmov r0, r12, d3
11; CHECK-NEXT:    vmov r2, lr, d1
12; CHECK-NEXT:    vmov r1, r3, d2
13; CHECK-NEXT:    udiv r0, r2, r0
14; CHECK-NEXT:    vmov r4, r5, d0
15; CHECK-NEXT:    udiv r1, r4, r1
16; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
17; CHECK-NEXT:    udiv r0, lr, r12
18; CHECK-NEXT:    udiv r1, r5, r3
19; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
20; CHECK-NEXT:    pop {r4, r5, r7, pc}
21entry:
22  %out = udiv <4 x i32> %in1, %in2
23  ret <4 x i32> %out
24}
25
26define arm_aapcs_vfpcc <4 x i32> @sdiv_i32(<4 x i32> %in1, <4 x i32> %in2) {
27; CHECK-LABEL: sdiv_i32:
28; CHECK:       @ %bb.0: @ %entry
29; CHECK-NEXT:    .save {r4, r5, r7, lr}
30; CHECK-NEXT:    push {r4, r5, r7, lr}
31; CHECK-NEXT:    vmov r0, r12, d3
32; CHECK-NEXT:    vmov r2, lr, d1
33; CHECK-NEXT:    vmov r1, r3, d2
34; CHECK-NEXT:    sdiv r0, r2, r0
35; CHECK-NEXT:    vmov r4, r5, d0
36; CHECK-NEXT:    sdiv r1, r4, r1
37; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
38; CHECK-NEXT:    sdiv r0, lr, r12
39; CHECK-NEXT:    sdiv r1, r5, r3
40; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
41; CHECK-NEXT:    pop {r4, r5, r7, pc}
42entry:
43  %out = sdiv <4 x i32> %in1, %in2
44  ret <4 x i32> %out
45}
46
47define arm_aapcs_vfpcc <4 x i32> @urem_i32(<4 x i32> %in1, <4 x i32> %in2) {
48; CHECK-LABEL: urem_i32:
49; CHECK:       @ %bb.0: @ %entry
50; CHECK-NEXT:    .save {r4, r5, r7, lr}
51; CHECK-NEXT:    push {r4, r5, r7, lr}
52; CHECK-NEXT:    vmov r0, r12, d3
53; CHECK-NEXT:    vmov r2, r3, d1
54; CHECK-NEXT:    vmov r1, lr, d2
55; CHECK-NEXT:    udiv r4, r2, r0
56; CHECK-NEXT:    mls r0, r4, r0, r2
57; CHECK-NEXT:    vmov r2, r4, d0
58; CHECK-NEXT:    udiv r5, r2, r1
59; CHECK-NEXT:    mls r1, r5, r1, r2
60; CHECK-NEXT:    udiv r2, r3, r12
61; CHECK-NEXT:    mls r2, r2, r12, r3
62; CHECK-NEXT:    udiv r3, r4, lr
63; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
64; CHECK-NEXT:    mls r3, r3, lr, r4
65; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
66; CHECK-NEXT:    pop {r4, r5, r7, pc}
67entry:
68  %out = urem <4 x i32> %in1, %in2
69  ret <4 x i32> %out
70}
71
72define arm_aapcs_vfpcc <4 x i32> @srem_i32(<4 x i32> %in1, <4 x i32> %in2) {
73; CHECK-LABEL: srem_i32:
74; CHECK:       @ %bb.0: @ %entry
75; CHECK-NEXT:    .save {r4, r5, r7, lr}
76; CHECK-NEXT:    push {r4, r5, r7, lr}
77; CHECK-NEXT:    vmov r0, r12, d3
78; CHECK-NEXT:    vmov r2, r3, d1
79; CHECK-NEXT:    vmov r1, lr, d2
80; CHECK-NEXT:    sdiv r4, r2, r0
81; CHECK-NEXT:    mls r0, r4, r0, r2
82; CHECK-NEXT:    vmov r2, r4, d0
83; CHECK-NEXT:    sdiv r5, r2, r1
84; CHECK-NEXT:    mls r1, r5, r1, r2
85; CHECK-NEXT:    sdiv r2, r3, r12
86; CHECK-NEXT:    mls r2, r2, r12, r3
87; CHECK-NEXT:    sdiv r3, r4, lr
88; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
89; CHECK-NEXT:    mls r3, r3, lr, r4
90; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
91; CHECK-NEXT:    pop {r4, r5, r7, pc}
92entry:
93  %out = srem <4 x i32> %in1, %in2
94  ret <4 x i32> %out
95}
96
97
98define arm_aapcs_vfpcc <8 x i16> @udiv_i16(<8 x i16> %in1, <8 x i16> %in2) {
99; CHECK-LABEL: udiv_i16:
100; CHECK:       @ %bb.0: @ %entry
101; CHECK-NEXT:    vmov.u16 r0, q1[0]
102; CHECK-NEXT:    vmov.u16 r1, q0[0]
103; CHECK-NEXT:    udiv r0, r1, r0
104; CHECK-NEXT:    vmov.u16 r1, q1[1]
105; CHECK-NEXT:    vmov.u16 r2, q0[1]
106; CHECK-NEXT:    vmov.16 q2[0], r0
107; CHECK-NEXT:    udiv r1, r2, r1
108; CHECK-NEXT:    vmov.u16 r0, q1[2]
109; CHECK-NEXT:    vmov.16 q2[1], r1
110; CHECK-NEXT:    vmov.u16 r1, q0[2]
111; CHECK-NEXT:    udiv r0, r1, r0
112; CHECK-NEXT:    vmov.u16 r1, q0[3]
113; CHECK-NEXT:    vmov.16 q2[2], r0
114; CHECK-NEXT:    vmov.u16 r0, q1[3]
115; CHECK-NEXT:    udiv r0, r1, r0
116; CHECK-NEXT:    vmov.u16 r1, q0[4]
117; CHECK-NEXT:    vmov.16 q2[3], r0
118; CHECK-NEXT:    vmov.u16 r0, q1[4]
119; CHECK-NEXT:    udiv r0, r1, r0
120; CHECK-NEXT:    vmov.u16 r1, q0[5]
121; CHECK-NEXT:    vmov.16 q2[4], r0
122; CHECK-NEXT:    vmov.u16 r0, q1[5]
123; CHECK-NEXT:    udiv r0, r1, r0
124; CHECK-NEXT:    vmov.u16 r1, q0[6]
125; CHECK-NEXT:    vmov.16 q2[5], r0
126; CHECK-NEXT:    vmov.u16 r0, q1[6]
127; CHECK-NEXT:    udiv r0, r1, r0
128; CHECK-NEXT:    vmov.u16 r1, q0[7]
129; CHECK-NEXT:    vmov.16 q2[6], r0
130; CHECK-NEXT:    vmov.u16 r0, q1[7]
131; CHECK-NEXT:    udiv r0, r1, r0
132; CHECK-NEXT:    vmov.16 q2[7], r0
133; CHECK-NEXT:    vmov q0, q2
134; CHECK-NEXT:    bx lr
135entry:
136  %out = udiv <8 x i16> %in1, %in2
137  ret <8 x i16> %out
138}
139
140define arm_aapcs_vfpcc <8 x i16> @sdiv_i16(<8 x i16> %in1, <8 x i16> %in2) {
141; CHECK-LABEL: sdiv_i16:
142; CHECK:       @ %bb.0: @ %entry
143; CHECK-NEXT:    vmov.s16 r0, q1[0]
144; CHECK-NEXT:    vmov.s16 r1, q0[0]
145; CHECK-NEXT:    sdiv r0, r1, r0
146; CHECK-NEXT:    vmov.s16 r1, q1[1]
147; CHECK-NEXT:    vmov.s16 r2, q0[1]
148; CHECK-NEXT:    vmov.16 q2[0], r0
149; CHECK-NEXT:    sdiv r1, r2, r1
150; CHECK-NEXT:    vmov.s16 r0, q1[2]
151; CHECK-NEXT:    vmov.16 q2[1], r1
152; CHECK-NEXT:    vmov.s16 r1, q0[2]
153; CHECK-NEXT:    sdiv r0, r1, r0
154; CHECK-NEXT:    vmov.s16 r1, q0[3]
155; CHECK-NEXT:    vmov.16 q2[2], r0
156; CHECK-NEXT:    vmov.s16 r0, q1[3]
157; CHECK-NEXT:    sdiv r0, r1, r0
158; CHECK-NEXT:    vmov.s16 r1, q0[4]
159; CHECK-NEXT:    vmov.16 q2[3], r0
160; CHECK-NEXT:    vmov.s16 r0, q1[4]
161; CHECK-NEXT:    sdiv r0, r1, r0
162; CHECK-NEXT:    vmov.s16 r1, q0[5]
163; CHECK-NEXT:    vmov.16 q2[4], r0
164; CHECK-NEXT:    vmov.s16 r0, q1[5]
165; CHECK-NEXT:    sdiv r0, r1, r0
166; CHECK-NEXT:    vmov.s16 r1, q0[6]
167; CHECK-NEXT:    vmov.16 q2[5], r0
168; CHECK-NEXT:    vmov.s16 r0, q1[6]
169; CHECK-NEXT:    sdiv r0, r1, r0
170; CHECK-NEXT:    vmov.s16 r1, q0[7]
171; CHECK-NEXT:    vmov.16 q2[6], r0
172; CHECK-NEXT:    vmov.s16 r0, q1[7]
173; CHECK-NEXT:    sdiv r0, r1, r0
174; CHECK-NEXT:    vmov.16 q2[7], r0
175; CHECK-NEXT:    vmov q0, q2
176; CHECK-NEXT:    bx lr
177entry:
178  %out = sdiv <8 x i16> %in1, %in2
179  ret <8 x i16> %out
180}
181
182define arm_aapcs_vfpcc <8 x i16> @urem_i16(<8 x i16> %in1, <8 x i16> %in2) {
183; CHECK-LABEL: urem_i16:
184; CHECK:       @ %bb.0: @ %entry
185; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
186; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
187; CHECK-NEXT:    vmov.u16 r0, q1[6]
188; CHECK-NEXT:    vmov.u16 r1, q0[6]
189; CHECK-NEXT:    udiv r2, r1, r0
190; CHECK-NEXT:    mls r12, r2, r0, r1
191; CHECK-NEXT:    vmov.u16 r1, q1[7]
192; CHECK-NEXT:    vmov.u16 r2, q0[7]
193; CHECK-NEXT:    udiv r3, r2, r1
194; CHECK-NEXT:    mls lr, r3, r1, r2
195; CHECK-NEXT:    vmov.u16 r2, q1[4]
196; CHECK-NEXT:    vmov.u16 r3, q0[4]
197; CHECK-NEXT:    udiv r0, r3, r2
198; CHECK-NEXT:    mls r2, r0, r2, r3
199; CHECK-NEXT:    vmov.u16 r0, q1[5]
200; CHECK-NEXT:    vmov.u16 r3, q0[5]
201; CHECK-NEXT:    udiv r1, r3, r0
202; CHECK-NEXT:    mls r0, r1, r0, r3
203; CHECK-NEXT:    vmov.u16 r1, q1[2]
204; CHECK-NEXT:    vmov.u16 r3, q0[2]
205; CHECK-NEXT:    udiv r4, r3, r1
206; CHECK-NEXT:    mls r1, r4, r1, r3
207; CHECK-NEXT:    vmov.u16 r3, q1[3]
208; CHECK-NEXT:    vmov.u16 r4, q0[3]
209; CHECK-NEXT:    udiv r5, r4, r3
210; CHECK-NEXT:    mls r3, r5, r3, r4
211; CHECK-NEXT:    vmov.u16 r4, q1[0]
212; CHECK-NEXT:    vmov.u16 r5, q0[0]
213; CHECK-NEXT:    udiv r6, r5, r4
214; CHECK-NEXT:    mls r4, r6, r4, r5
215; CHECK-NEXT:    vmov.u16 r6, q0[1]
216; CHECK-NEXT:    vmov.u16 r5, q1[1]
217; CHECK-NEXT:    udiv r7, r6, r5
218; CHECK-NEXT:    vmov.16 q0[0], r4
219; CHECK-NEXT:    mls r5, r7, r5, r6
220; CHECK-NEXT:    vmov.16 q0[1], r5
221; CHECK-NEXT:    vmov.16 q0[2], r1
222; CHECK-NEXT:    vmov.16 q0[3], r3
223; CHECK-NEXT:    vmov.16 q0[4], r2
224; CHECK-NEXT:    vmov.16 q0[5], r0
225; CHECK-NEXT:    vmov.16 q0[6], r12
226; CHECK-NEXT:    vmov.16 q0[7], lr
227; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
228entry:
229  %out = urem <8 x i16> %in1, %in2
230  ret <8 x i16> %out
231}
232
233define arm_aapcs_vfpcc <8 x i16> @srem_i16(<8 x i16> %in1, <8 x i16> %in2) {
234; CHECK-LABEL: srem_i16:
235; CHECK:       @ %bb.0: @ %entry
236; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
237; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
238; CHECK-NEXT:    vmov.s16 r0, q1[6]
239; CHECK-NEXT:    vmov.s16 r1, q0[6]
240; CHECK-NEXT:    sdiv r2, r1, r0
241; CHECK-NEXT:    mls r12, r2, r0, r1
242; CHECK-NEXT:    vmov.s16 r1, q1[7]
243; CHECK-NEXT:    vmov.s16 r2, q0[7]
244; CHECK-NEXT:    sdiv r3, r2, r1
245; CHECK-NEXT:    mls lr, r3, r1, r2
246; CHECK-NEXT:    vmov.s16 r2, q1[4]
247; CHECK-NEXT:    vmov.s16 r3, q0[4]
248; CHECK-NEXT:    sdiv r0, r3, r2
249; CHECK-NEXT:    mls r2, r0, r2, r3
250; CHECK-NEXT:    vmov.s16 r0, q1[5]
251; CHECK-NEXT:    vmov.s16 r3, q0[5]
252; CHECK-NEXT:    sdiv r1, r3, r0
253; CHECK-NEXT:    mls r0, r1, r0, r3
254; CHECK-NEXT:    vmov.s16 r1, q1[2]
255; CHECK-NEXT:    vmov.s16 r3, q0[2]
256; CHECK-NEXT:    sdiv r4, r3, r1
257; CHECK-NEXT:    mls r1, r4, r1, r3
258; CHECK-NEXT:    vmov.s16 r3, q1[3]
259; CHECK-NEXT:    vmov.s16 r4, q0[3]
260; CHECK-NEXT:    sdiv r5, r4, r3
261; CHECK-NEXT:    mls r3, r5, r3, r4
262; CHECK-NEXT:    vmov.s16 r4, q1[0]
263; CHECK-NEXT:    vmov.s16 r5, q0[0]
264; CHECK-NEXT:    sdiv r6, r5, r4
265; CHECK-NEXT:    mls r4, r6, r4, r5
266; CHECK-NEXT:    vmov.s16 r6, q0[1]
267; CHECK-NEXT:    vmov.s16 r5, q1[1]
268; CHECK-NEXT:    sdiv r7, r6, r5
269; CHECK-NEXT:    vmov.16 q0[0], r4
270; CHECK-NEXT:    mls r5, r7, r5, r6
271; CHECK-NEXT:    vmov.16 q0[1], r5
272; CHECK-NEXT:    vmov.16 q0[2], r1
273; CHECK-NEXT:    vmov.16 q0[3], r3
274; CHECK-NEXT:    vmov.16 q0[4], r2
275; CHECK-NEXT:    vmov.16 q0[5], r0
276; CHECK-NEXT:    vmov.16 q0[6], r12
277; CHECK-NEXT:    vmov.16 q0[7], lr
278; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
279entry:
280  %out = srem <8 x i16> %in1, %in2
281  ret <8 x i16> %out
282}
283
284
285define arm_aapcs_vfpcc <16 x i8> @udiv_i8(<16 x i8> %in1, <16 x i8> %in2) {
286; CHECK-LABEL: udiv_i8:
287; CHECK:       @ %bb.0: @ %entry
288; CHECK-NEXT:    vmov.u8 r0, q1[0]
289; CHECK-NEXT:    vmov.u8 r1, q0[0]
290; CHECK-NEXT:    udiv r0, r1, r0
291; CHECK-NEXT:    vmov.u8 r1, q1[1]
292; CHECK-NEXT:    vmov.u8 r2, q0[1]
293; CHECK-NEXT:    vmov.8 q2[0], r0
294; CHECK-NEXT:    udiv r1, r2, r1
295; CHECK-NEXT:    vmov.u8 r0, q1[2]
296; CHECK-NEXT:    vmov.8 q2[1], r1
297; CHECK-NEXT:    vmov.u8 r1, q0[2]
298; CHECK-NEXT:    udiv r0, r1, r0
299; CHECK-NEXT:    vmov.u8 r1, q0[3]
300; CHECK-NEXT:    vmov.8 q2[2], r0
301; CHECK-NEXT:    vmov.u8 r0, q1[3]
302; CHECK-NEXT:    udiv r0, r1, r0
303; CHECK-NEXT:    vmov.u8 r1, q0[4]
304; CHECK-NEXT:    vmov.8 q2[3], r0
305; CHECK-NEXT:    vmov.u8 r0, q1[4]
306; CHECK-NEXT:    udiv r0, r1, r0
307; CHECK-NEXT:    vmov.u8 r1, q0[5]
308; CHECK-NEXT:    vmov.8 q2[4], r0
309; CHECK-NEXT:    vmov.u8 r0, q1[5]
310; CHECK-NEXT:    udiv r0, r1, r0
311; CHECK-NEXT:    vmov.u8 r1, q0[6]
312; CHECK-NEXT:    vmov.8 q2[5], r0
313; CHECK-NEXT:    vmov.u8 r0, q1[6]
314; CHECK-NEXT:    udiv r0, r1, r0
315; CHECK-NEXT:    vmov.u8 r1, q0[7]
316; CHECK-NEXT:    vmov.8 q2[6], r0
317; CHECK-NEXT:    vmov.u8 r0, q1[7]
318; CHECK-NEXT:    udiv r0, r1, r0
319; CHECK-NEXT:    vmov.u8 r1, q0[8]
320; CHECK-NEXT:    vmov.8 q2[7], r0
321; CHECK-NEXT:    vmov.u8 r0, q1[8]
322; CHECK-NEXT:    udiv r0, r1, r0
323; CHECK-NEXT:    vmov.u8 r1, q0[9]
324; CHECK-NEXT:    vmov.8 q2[8], r0
325; CHECK-NEXT:    vmov.u8 r0, q1[9]
326; CHECK-NEXT:    udiv r0, r1, r0
327; CHECK-NEXT:    vmov.u8 r1, q0[10]
328; CHECK-NEXT:    vmov.8 q2[9], r0
329; CHECK-NEXT:    vmov.u8 r0, q1[10]
330; CHECK-NEXT:    udiv r0, r1, r0
331; CHECK-NEXT:    vmov.u8 r1, q0[11]
332; CHECK-NEXT:    vmov.8 q2[10], r0
333; CHECK-NEXT:    vmov.u8 r0, q1[11]
334; CHECK-NEXT:    udiv r0, r1, r0
335; CHECK-NEXT:    vmov.u8 r1, q0[12]
336; CHECK-NEXT:    vmov.8 q2[11], r0
337; CHECK-NEXT:    vmov.u8 r0, q1[12]
338; CHECK-NEXT:    udiv r0, r1, r0
339; CHECK-NEXT:    vmov.u8 r1, q0[13]
340; CHECK-NEXT:    vmov.8 q2[12], r0
341; CHECK-NEXT:    vmov.u8 r0, q1[13]
342; CHECK-NEXT:    udiv r0, r1, r0
343; CHECK-NEXT:    vmov.u8 r1, q0[14]
344; CHECK-NEXT:    vmov.8 q2[13], r0
345; CHECK-NEXT:    vmov.u8 r0, q1[14]
346; CHECK-NEXT:    udiv r0, r1, r0
347; CHECK-NEXT:    vmov.u8 r1, q0[15]
348; CHECK-NEXT:    vmov.8 q2[14], r0
349; CHECK-NEXT:    vmov.u8 r0, q1[15]
350; CHECK-NEXT:    udiv r0, r1, r0
351; CHECK-NEXT:    vmov.8 q2[15], r0
352; CHECK-NEXT:    vmov q0, q2
353; CHECK-NEXT:    bx lr
354entry:
355  %out = udiv <16 x i8> %in1, %in2
356  ret <16 x i8> %out
357}
358
359define arm_aapcs_vfpcc <16 x i8> @sdiv_i8(<16 x i8> %in1, <16 x i8> %in2) {
360; CHECK-LABEL: sdiv_i8:
361; CHECK:       @ %bb.0: @ %entry
362; CHECK-NEXT:    vmov.s8 r0, q1[0]
363; CHECK-NEXT:    vmov.s8 r1, q0[0]
364; CHECK-NEXT:    sdiv r0, r1, r0
365; CHECK-NEXT:    vmov.s8 r1, q1[1]
366; CHECK-NEXT:    vmov.s8 r2, q0[1]
367; CHECK-NEXT:    vmov.8 q2[0], r0
368; CHECK-NEXT:    sdiv r1, r2, r1
369; CHECK-NEXT:    vmov.s8 r0, q1[2]
370; CHECK-NEXT:    vmov.8 q2[1], r1
371; CHECK-NEXT:    vmov.s8 r1, q0[2]
372; CHECK-NEXT:    sdiv r0, r1, r0
373; CHECK-NEXT:    vmov.s8 r1, q0[3]
374; CHECK-NEXT:    vmov.8 q2[2], r0
375; CHECK-NEXT:    vmov.s8 r0, q1[3]
376; CHECK-NEXT:    sdiv r0, r1, r0
377; CHECK-NEXT:    vmov.s8 r1, q0[4]
378; CHECK-NEXT:    vmov.8 q2[3], r0
379; CHECK-NEXT:    vmov.s8 r0, q1[4]
380; CHECK-NEXT:    sdiv r0, r1, r0
381; CHECK-NEXT:    vmov.s8 r1, q0[5]
382; CHECK-NEXT:    vmov.8 q2[4], r0
383; CHECK-NEXT:    vmov.s8 r0, q1[5]
384; CHECK-NEXT:    sdiv r0, r1, r0
385; CHECK-NEXT:    vmov.s8 r1, q0[6]
386; CHECK-NEXT:    vmov.8 q2[5], r0
387; CHECK-NEXT:    vmov.s8 r0, q1[6]
388; CHECK-NEXT:    sdiv r0, r1, r0
389; CHECK-NEXT:    vmov.s8 r1, q0[7]
390; CHECK-NEXT:    vmov.8 q2[6], r0
391; CHECK-NEXT:    vmov.s8 r0, q1[7]
392; CHECK-NEXT:    sdiv r0, r1, r0
393; CHECK-NEXT:    vmov.s8 r1, q0[8]
394; CHECK-NEXT:    vmov.8 q2[7], r0
395; CHECK-NEXT:    vmov.s8 r0, q1[8]
396; CHECK-NEXT:    sdiv r0, r1, r0
397; CHECK-NEXT:    vmov.s8 r1, q0[9]
398; CHECK-NEXT:    vmov.8 q2[8], r0
399; CHECK-NEXT:    vmov.s8 r0, q1[9]
400; CHECK-NEXT:    sdiv r0, r1, r0
401; CHECK-NEXT:    vmov.s8 r1, q0[10]
402; CHECK-NEXT:    vmov.8 q2[9], r0
403; CHECK-NEXT:    vmov.s8 r0, q1[10]
404; CHECK-NEXT:    sdiv r0, r1, r0
405; CHECK-NEXT:    vmov.s8 r1, q0[11]
406; CHECK-NEXT:    vmov.8 q2[10], r0
407; CHECK-NEXT:    vmov.s8 r0, q1[11]
408; CHECK-NEXT:    sdiv r0, r1, r0
409; CHECK-NEXT:    vmov.s8 r1, q0[12]
410; CHECK-NEXT:    vmov.8 q2[11], r0
411; CHECK-NEXT:    vmov.s8 r0, q1[12]
412; CHECK-NEXT:    sdiv r0, r1, r0
413; CHECK-NEXT:    vmov.s8 r1, q0[13]
414; CHECK-NEXT:    vmov.8 q2[12], r0
415; CHECK-NEXT:    vmov.s8 r0, q1[13]
416; CHECK-NEXT:    sdiv r0, r1, r0
417; CHECK-NEXT:    vmov.s8 r1, q0[14]
418; CHECK-NEXT:    vmov.8 q2[13], r0
419; CHECK-NEXT:    vmov.s8 r0, q1[14]
420; CHECK-NEXT:    sdiv r0, r1, r0
421; CHECK-NEXT:    vmov.s8 r1, q0[15]
422; CHECK-NEXT:    vmov.8 q2[14], r0
423; CHECK-NEXT:    vmov.s8 r0, q1[15]
424; CHECK-NEXT:    sdiv r0, r1, r0
425; CHECK-NEXT:    vmov.8 q2[15], r0
426; CHECK-NEXT:    vmov q0, q2
427; CHECK-NEXT:    bx lr
428entry:
429  %out = sdiv <16 x i8> %in1, %in2
430  ret <16 x i8> %out
431}
432
433define arm_aapcs_vfpcc <16 x i8> @urem_i8(<16 x i8> %in1, <16 x i8> %in2) {
434; CHECK-LABEL: urem_i8:
435; CHECK:       @ %bb.0: @ %entry
436; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
437; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
438; CHECK-NEXT:    vmov.u8 r0, q1[14]
439; CHECK-NEXT:    vmov.u8 r1, q0[14]
440; CHECK-NEXT:    udiv r2, r1, r0
441; CHECK-NEXT:    mls r12, r2, r0, r1
442; CHECK-NEXT:    vmov.u8 r0, q1[15]
443; CHECK-NEXT:    vmov.u8 r1, q0[15]
444; CHECK-NEXT:    udiv r2, r1, r0
445; CHECK-NEXT:    mls lr, r2, r0, r1
446; CHECK-NEXT:    vmov.u8 r0, q1[12]
447; CHECK-NEXT:    vmov.u8 r1, q0[12]
448; CHECK-NEXT:    udiv r2, r1, r0
449; CHECK-NEXT:    mls r8, r2, r0, r1
450; CHECK-NEXT:    vmov.u8 r0, q1[13]
451; CHECK-NEXT:    vmov.u8 r1, q0[13]
452; CHECK-NEXT:    udiv r3, r1, r0
453; CHECK-NEXT:    mls r3, r3, r0, r1
454; CHECK-NEXT:    vmov.u8 r0, q1[10]
455; CHECK-NEXT:    vmov.u8 r1, q0[10]
456; CHECK-NEXT:    udiv r4, r1, r0
457; CHECK-NEXT:    mls r0, r4, r0, r1
458; CHECK-NEXT:    vmov.u8 r1, q1[11]
459; CHECK-NEXT:    vmov.u8 r4, q0[11]
460; CHECK-NEXT:    udiv r5, r4, r1
461; CHECK-NEXT:    mls r1, r5, r1, r4
462; CHECK-NEXT:    vmov.u8 r4, q1[8]
463; CHECK-NEXT:    vmov.u8 r5, q0[8]
464; CHECK-NEXT:    udiv r6, r5, r4
465; CHECK-NEXT:    mls r4, r6, r4, r5
466; CHECK-NEXT:    vmov.u8 r5, q1[0]
467; CHECK-NEXT:    vmov.u8 r6, q0[0]
468; CHECK-NEXT:    udiv r7, r6, r5
469; CHECK-NEXT:    mls r5, r7, r5, r6
470; CHECK-NEXT:    vmov.u8 r6, q1[1]
471; CHECK-NEXT:    vmov.u8 r7, q0[1]
472; CHECK-NEXT:    udiv r2, r7, r6
473; CHECK-NEXT:    vmov.8 q2[0], r5
474; CHECK-NEXT:    mls r2, r2, r6, r7
475; CHECK-NEXT:    vmov.u8 r5, q0[2]
476; CHECK-NEXT:    vmov.8 q2[1], r2
477; CHECK-NEXT:    vmov.u8 r2, q1[2]
478; CHECK-NEXT:    udiv r6, r5, r2
479; CHECK-NEXT:    mls r2, r6, r2, r5
480; CHECK-NEXT:    vmov.u8 r5, q0[3]
481; CHECK-NEXT:    vmov.8 q2[2], r2
482; CHECK-NEXT:    vmov.u8 r2, q1[3]
483; CHECK-NEXT:    udiv r6, r5, r2
484; CHECK-NEXT:    mls r2, r6, r2, r5
485; CHECK-NEXT:    vmov.u8 r5, q0[4]
486; CHECK-NEXT:    vmov.8 q2[3], r2
487; CHECK-NEXT:    vmov.u8 r2, q1[4]
488; CHECK-NEXT:    udiv r6, r5, r2
489; CHECK-NEXT:    mls r2, r6, r2, r5
490; CHECK-NEXT:    vmov.u8 r5, q0[5]
491; CHECK-NEXT:    vmov.8 q2[4], r2
492; CHECK-NEXT:    vmov.u8 r2, q1[5]
493; CHECK-NEXT:    udiv r6, r5, r2
494; CHECK-NEXT:    mls r2, r6, r2, r5
495; CHECK-NEXT:    vmov.u8 r5, q0[6]
496; CHECK-NEXT:    vmov.8 q2[5], r2
497; CHECK-NEXT:    vmov.u8 r2, q1[6]
498; CHECK-NEXT:    udiv r6, r5, r2
499; CHECK-NEXT:    mls r2, r6, r2, r5
500; CHECK-NEXT:    vmov.u8 r5, q0[7]
501; CHECK-NEXT:    vmov.8 q2[6], r2
502; CHECK-NEXT:    vmov.u8 r2, q1[7]
503; CHECK-NEXT:    udiv r6, r5, r2
504; CHECK-NEXT:    mls r2, r6, r2, r5
505; CHECK-NEXT:    vmov.u8 r5, q0[9]
506; CHECK-NEXT:    vmov.8 q2[7], r2
507; CHECK-NEXT:    vmov.u8 r2, q1[9]
508; CHECK-NEXT:    udiv r6, r5, r2
509; CHECK-NEXT:    vmov.8 q2[8], r4
510; CHECK-NEXT:    mls r2, r6, r2, r5
511; CHECK-NEXT:    vmov.8 q2[9], r2
512; CHECK-NEXT:    vmov.8 q2[10], r0
513; CHECK-NEXT:    vmov.8 q2[11], r1
514; CHECK-NEXT:    vmov.8 q2[12], r8
515; CHECK-NEXT:    vmov.8 q2[13], r3
516; CHECK-NEXT:    vmov.8 q2[14], r12
517; CHECK-NEXT:    vmov.8 q2[15], lr
518; CHECK-NEXT:    vmov q0, q2
519; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
520entry:
521  %out = urem <16 x i8> %in1, %in2
522  ret <16 x i8> %out
523}
524
525define arm_aapcs_vfpcc <16 x i8> @srem_i8(<16 x i8> %in1, <16 x i8> %in2) {
526; CHECK-LABEL: srem_i8:
527; CHECK:       @ %bb.0: @ %entry
528; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
529; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
530; CHECK-NEXT:    vmov.s8 r0, q1[14]
531; CHECK-NEXT:    vmov.s8 r1, q0[14]
532; CHECK-NEXT:    sdiv r2, r1, r0
533; CHECK-NEXT:    mls r12, r2, r0, r1
534; CHECK-NEXT:    vmov.s8 r0, q1[15]
535; CHECK-NEXT:    vmov.s8 r1, q0[15]
536; CHECK-NEXT:    sdiv r2, r1, r0
537; CHECK-NEXT:    mls lr, r2, r0, r1
538; CHECK-NEXT:    vmov.s8 r0, q1[12]
539; CHECK-NEXT:    vmov.s8 r1, q0[12]
540; CHECK-NEXT:    sdiv r2, r1, r0
541; CHECK-NEXT:    mls r8, r2, r0, r1
542; CHECK-NEXT:    vmov.s8 r0, q1[13]
543; CHECK-NEXT:    vmov.s8 r1, q0[13]
544; CHECK-NEXT:    sdiv r3, r1, r0
545; CHECK-NEXT:    mls r3, r3, r0, r1
546; CHECK-NEXT:    vmov.s8 r0, q1[10]
547; CHECK-NEXT:    vmov.s8 r1, q0[10]
548; CHECK-NEXT:    sdiv r4, r1, r0
549; CHECK-NEXT:    mls r0, r4, r0, r1
550; CHECK-NEXT:    vmov.s8 r1, q1[11]
551; CHECK-NEXT:    vmov.s8 r4, q0[11]
552; CHECK-NEXT:    sdiv r5, r4, r1
553; CHECK-NEXT:    mls r1, r5, r1, r4
554; CHECK-NEXT:    vmov.s8 r4, q1[8]
555; CHECK-NEXT:    vmov.s8 r5, q0[8]
556; CHECK-NEXT:    sdiv r6, r5, r4
557; CHECK-NEXT:    mls r4, r6, r4, r5
558; CHECK-NEXT:    vmov.s8 r5, q1[0]
559; CHECK-NEXT:    vmov.s8 r6, q0[0]
560; CHECK-NEXT:    sdiv r7, r6, r5
561; CHECK-NEXT:    mls r5, r7, r5, r6
562; CHECK-NEXT:    vmov.s8 r6, q1[1]
563; CHECK-NEXT:    vmov.s8 r7, q0[1]
564; CHECK-NEXT:    sdiv r2, r7, r6
565; CHECK-NEXT:    vmov.8 q2[0], r5
566; CHECK-NEXT:    mls r2, r2, r6, r7
567; CHECK-NEXT:    vmov.s8 r5, q0[2]
568; CHECK-NEXT:    vmov.8 q2[1], r2
569; CHECK-NEXT:    vmov.s8 r2, q1[2]
570; CHECK-NEXT:    sdiv r6, r5, r2
571; CHECK-NEXT:    mls r2, r6, r2, r5
572; CHECK-NEXT:    vmov.s8 r5, q0[3]
573; CHECK-NEXT:    vmov.8 q2[2], r2
574; CHECK-NEXT:    vmov.s8 r2, q1[3]
575; CHECK-NEXT:    sdiv r6, r5, r2
576; CHECK-NEXT:    mls r2, r6, r2, r5
577; CHECK-NEXT:    vmov.s8 r5, q0[4]
578; CHECK-NEXT:    vmov.8 q2[3], r2
579; CHECK-NEXT:    vmov.s8 r2, q1[4]
580; CHECK-NEXT:    sdiv r6, r5, r2
581; CHECK-NEXT:    mls r2, r6, r2, r5
582; CHECK-NEXT:    vmov.s8 r5, q0[5]
583; CHECK-NEXT:    vmov.8 q2[4], r2
584; CHECK-NEXT:    vmov.s8 r2, q1[5]
585; CHECK-NEXT:    sdiv r6, r5, r2
586; CHECK-NEXT:    mls r2, r6, r2, r5
587; CHECK-NEXT:    vmov.s8 r5, q0[6]
588; CHECK-NEXT:    vmov.8 q2[5], r2
589; CHECK-NEXT:    vmov.s8 r2, q1[6]
590; CHECK-NEXT:    sdiv r6, r5, r2
591; CHECK-NEXT:    mls r2, r6, r2, r5
592; CHECK-NEXT:    vmov.s8 r5, q0[7]
593; CHECK-NEXT:    vmov.8 q2[6], r2
594; CHECK-NEXT:    vmov.s8 r2, q1[7]
595; CHECK-NEXT:    sdiv r6, r5, r2
596; CHECK-NEXT:    mls r2, r6, r2, r5
597; CHECK-NEXT:    vmov.s8 r5, q0[9]
598; CHECK-NEXT:    vmov.8 q2[7], r2
599; CHECK-NEXT:    vmov.s8 r2, q1[9]
600; CHECK-NEXT:    sdiv r6, r5, r2
601; CHECK-NEXT:    vmov.8 q2[8], r4
602; CHECK-NEXT:    mls r2, r6, r2, r5
603; CHECK-NEXT:    vmov.8 q2[9], r2
604; CHECK-NEXT:    vmov.8 q2[10], r0
605; CHECK-NEXT:    vmov.8 q2[11], r1
606; CHECK-NEXT:    vmov.8 q2[12], r8
607; CHECK-NEXT:    vmov.8 q2[13], r3
608; CHECK-NEXT:    vmov.8 q2[14], r12
609; CHECK-NEXT:    vmov.8 q2[15], lr
610; CHECK-NEXT:    vmov q0, q2
611; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
612entry:
613  %out = srem <16 x i8> %in1, %in2
614  ret <16 x i8> %out
615}
616
617define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
618; CHECK-LABEL: udiv_i64:
619; CHECK:       @ %bb.0: @ %entry
620; CHECK-NEXT:    .save {r4, r5, r7, lr}
621; CHECK-NEXT:    push {r4, r5, r7, lr}
622; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
623; CHECK-NEXT:    vpush {d8, d9, d10, d11}
624; CHECK-NEXT:    vmov q4, q1
625; CHECK-NEXT:    vmov q5, q0
626; CHECK-NEXT:    vmov r0, r1, d11
627; CHECK-NEXT:    vmov r2, r3, d9
628; CHECK-NEXT:    bl __aeabi_uldivmod
629; CHECK-NEXT:    mov r4, r0
630; CHECK-NEXT:    mov r5, r1
631; CHECK-NEXT:    vmov r0, r1, d10
632; CHECK-NEXT:    vmov r2, r3, d8
633; CHECK-NEXT:    bl __aeabi_uldivmod
634; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
635; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
636; CHECK-NEXT:    vpop {d8, d9, d10, d11}
637; CHECK-NEXT:    pop {r4, r5, r7, pc}
638entry:
639  %out = udiv <2 x i64> %in1, %in2
640  ret <2 x i64> %out
641}
642
643define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) {
644; CHECK-LABEL: sdiv_i64:
645; CHECK:       @ %bb.0: @ %entry
646; CHECK-NEXT:    .save {r4, r5, r7, lr}
647; CHECK-NEXT:    push {r4, r5, r7, lr}
648; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
649; CHECK-NEXT:    vpush {d8, d9, d10, d11}
650; CHECK-NEXT:    vmov q4, q1
651; CHECK-NEXT:    vmov q5, q0
652; CHECK-NEXT:    vmov r0, r1, d11
653; CHECK-NEXT:    vmov r2, r3, d9
654; CHECK-NEXT:    bl __aeabi_ldivmod
655; CHECK-NEXT:    mov r4, r0
656; CHECK-NEXT:    mov r5, r1
657; CHECK-NEXT:    vmov r0, r1, d10
658; CHECK-NEXT:    vmov r2, r3, d8
659; CHECK-NEXT:    bl __aeabi_ldivmod
660; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
661; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
662; CHECK-NEXT:    vpop {d8, d9, d10, d11}
663; CHECK-NEXT:    pop {r4, r5, r7, pc}
664entry:
665  %out = sdiv <2 x i64> %in1, %in2
666  ret <2 x i64> %out
667}
668
669define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) {
670; CHECK-LABEL: urem_i64:
671; CHECK:       @ %bb.0: @ %entry
672; CHECK-NEXT:    .save {r4, r5, r7, lr}
673; CHECK-NEXT:    push {r4, r5, r7, lr}
674; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
675; CHECK-NEXT:    vpush {d8, d9, d10, d11}
676; CHECK-NEXT:    vmov q4, q1
677; CHECK-NEXT:    vmov q5, q0
678; CHECK-NEXT:    vmov r0, r1, d11
679; CHECK-NEXT:    vmov r2, r3, d9
680; CHECK-NEXT:    bl __aeabi_uldivmod
681; CHECK-NEXT:    mov r4, r2
682; CHECK-NEXT:    mov r5, r3
683; CHECK-NEXT:    vmov r0, r1, d10
684; CHECK-NEXT:    vmov r2, r3, d8
685; CHECK-NEXT:    bl __aeabi_uldivmod
686; CHECK-NEXT:    vmov q0[2], q0[0], r2, r4
687; CHECK-NEXT:    vmov q0[3], q0[1], r3, r5
688; CHECK-NEXT:    vpop {d8, d9, d10, d11}
689; CHECK-NEXT:    pop {r4, r5, r7, pc}
690entry:
691  %out = urem <2 x i64> %in1, %in2
692  ret <2 x i64> %out
693}
694
695define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) {
696; CHECK-LABEL: srem_i64:
697; CHECK:       @ %bb.0: @ %entry
698; CHECK-NEXT:    .save {r4, r5, r7, lr}
699; CHECK-NEXT:    push {r4, r5, r7, lr}
700; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
701; CHECK-NEXT:    vpush {d8, d9, d10, d11}
702; CHECK-NEXT:    vmov q4, q1
703; CHECK-NEXT:    vmov q5, q0
704; CHECK-NEXT:    vmov r0, r1, d11
705; CHECK-NEXT:    vmov r2, r3, d9
706; CHECK-NEXT:    bl __aeabi_ldivmod
707; CHECK-NEXT:    mov r4, r2
708; CHECK-NEXT:    mov r5, r3
709; CHECK-NEXT:    vmov r0, r1, d10
710; CHECK-NEXT:    vmov r2, r3, d8
711; CHECK-NEXT:    bl __aeabi_ldivmod
712; CHECK-NEXT:    vmov q0[2], q0[0], r2, r4
713; CHECK-NEXT:    vmov q0[3], q0[1], r3, r5
714; CHECK-NEXT:    vpop {d8, d9, d10, d11}
715; CHECK-NEXT:    pop {r4, r5, r7, pc}
716entry:
717  %out = srem <2 x i64> %in1, %in2
718  ret <2 x i64> %out
719}
720
721
722
723
724define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) {
725; CHECK-LABEL: fdiv_f32:
726; CHECK:       @ %bb.0: @ %entry
727; CHECK-NEXT:    vdiv.f32 s3, s3, s7
728; CHECK-NEXT:    vdiv.f32 s2, s2, s6
729; CHECK-NEXT:    vdiv.f32 s1, s1, s5
730; CHECK-NEXT:    vdiv.f32 s0, s0, s4
731; CHECK-NEXT:    bx lr
732entry:
733  %out = fdiv <4 x float> %in1, %in2
734  ret <4 x float> %out
735}
736
737define arm_aapcs_vfpcc <4 x float> @frem_f32(<4 x float> %in1, <4 x float> %in2) {
738; CHECK-LABEL: frem_f32:
739; CHECK:       @ %bb.0: @ %entry
740; CHECK-NEXT:    .save {r4, r5, r6, lr}
741; CHECK-NEXT:    push {r4, r5, r6, lr}
742; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
743; CHECK-NEXT:    vpush {d8, d9, d10, d11}
744; CHECK-NEXT:    vmov q4, q1
745; CHECK-NEXT:    vmov q5, q0
746; CHECK-NEXT:    vmov r0, r4, d11
747; CHECK-NEXT:    vmov r1, r5, d9
748; CHECK-NEXT:    bl fmodf
749; CHECK-NEXT:    mov r6, r0
750; CHECK-NEXT:    mov r0, r4
751; CHECK-NEXT:    mov r1, r5
752; CHECK-NEXT:    bl fmodf
753; CHECK-NEXT:    vmov r4, r2, d10
754; CHECK-NEXT:    vmov r5, r1, d8
755; CHECK-NEXT:    vmov s19, r0
756; CHECK-NEXT:    vmov s18, r6
757; CHECK-NEXT:    mov r0, r2
758; CHECK-NEXT:    bl fmodf
759; CHECK-NEXT:    vmov s17, r0
760; CHECK-NEXT:    mov r0, r4
761; CHECK-NEXT:    mov r1, r5
762; CHECK-NEXT:    bl fmodf
763; CHECK-NEXT:    vmov s16, r0
764; CHECK-NEXT:    vmov q0, q4
765; CHECK-NEXT:    vpop {d8, d9, d10, d11}
766; CHECK-NEXT:    pop {r4, r5, r6, pc}
767entry:
768  %out = frem <4 x float> %in1, %in2
769  ret <4 x float> %out
770}
771
772
773define arm_aapcs_vfpcc <8 x half> @fdiv_f16(<8 x half> %in1, <8 x half> %in2) {
774; CHECK-LABEL: fdiv_f16:
775; CHECK:       @ %bb.0: @ %entry
776; CHECK-NEXT:    vmovx.f16 s10, s0
777; CHECK-NEXT:    vmovx.f16 s8, s4
778; CHECK-NEXT:    vdiv.f16 s0, s0, s4
779; CHECK-NEXT:    vdiv.f16 s8, s10, s8
780; CHECK-NEXT:    vins.f16 s0, s8
781; CHECK-NEXT:    vmovx.f16 s8, s1
782; CHECK-NEXT:    vmovx.f16 s4, s5
783; CHECK-NEXT:    vdiv.f16 s1, s1, s5
784; CHECK-NEXT:    vdiv.f16 s4, s8, s4
785; CHECK-NEXT:    vmovx.f16 s8, s2
786; CHECK-NEXT:    vins.f16 s1, s4
787; CHECK-NEXT:    vmovx.f16 s4, s6
788; CHECK-NEXT:    vdiv.f16 s2, s2, s6
789; CHECK-NEXT:    vdiv.f16 s4, s8, s4
790; CHECK-NEXT:    vins.f16 s2, s4
791; CHECK-NEXT:    vmovx.f16 s6, s3
792; CHECK-NEXT:    vmovx.f16 s4, s7
793; CHECK-NEXT:    vdiv.f16 s3, s3, s7
794; CHECK-NEXT:    vdiv.f16 s4, s6, s4
795; CHECK-NEXT:    vins.f16 s3, s4
796; CHECK-NEXT:    bx lr
797entry:
798  %out = fdiv <8 x half> %in1, %in2
799  ret <8 x half> %out
800}
801
802define arm_aapcs_vfpcc <8 x half> @frem_f16(<8 x half> %in1, <8 x half> %in2) {
803; CHECK-LABEL: frem_f16:
804; CHECK:       @ %bb.0: @ %entry
805; CHECK-NEXT:    .save {r7, lr}
806; CHECK-NEXT:    push {r7, lr}
807; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
808; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
809; CHECK-NEXT:    vmov q5, q0
810; CHECK-NEXT:    vmov q4, q1
811; CHECK-NEXT:    vcvtb.f32.f16 s0, s20
812; CHECK-NEXT:    vmov r0, s0
813; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
814; CHECK-NEXT:    vmov r1, s0
815; CHECK-NEXT:    bl fmodf
816; CHECK-NEXT:    vcvtt.f32.f16 s0, s20
817; CHECK-NEXT:    vmov r2, s0
818; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
819; CHECK-NEXT:    vmov r1, s0
820; CHECK-NEXT:    vmov s16, r0
821; CHECK-NEXT:    mov r0, r2
822; CHECK-NEXT:    bl fmodf
823; CHECK-NEXT:    vmov s0, r0
824; CHECK-NEXT:    vcvtb.f16.f32 s24, s16
825; CHECK-NEXT:    vcvtt.f16.f32 s24, s0
826; CHECK-NEXT:    vcvtb.f32.f16 s0, s21
827; CHECK-NEXT:    vmov r0, s0
828; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
829; CHECK-NEXT:    vmov r1, s0
830; CHECK-NEXT:    bl fmodf
831; CHECK-NEXT:    vmov s0, r0
832; CHECK-NEXT:    vcvtb.f16.f32 s25, s0
833; CHECK-NEXT:    vcvtt.f32.f16 s0, s21
834; CHECK-NEXT:    vmov r0, s0
835; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
836; CHECK-NEXT:    vmov r1, s0
837; CHECK-NEXT:    bl fmodf
838; CHECK-NEXT:    vmov s0, r0
839; CHECK-NEXT:    vcvtt.f16.f32 s25, s0
840; CHECK-NEXT:    vcvtb.f32.f16 s0, s22
841; CHECK-NEXT:    vmov r0, s0
842; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
843; CHECK-NEXT:    vmov r1, s0
844; CHECK-NEXT:    bl fmodf
845; CHECK-NEXT:    vmov s0, r0
846; CHECK-NEXT:    vcvtb.f16.f32 s26, s0
847; CHECK-NEXT:    vcvtt.f32.f16 s0, s22
848; CHECK-NEXT:    vmov r0, s0
849; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
850; CHECK-NEXT:    vmov r1, s0
851; CHECK-NEXT:    bl fmodf
852; CHECK-NEXT:    vmov s0, r0
853; CHECK-NEXT:    vcvtt.f16.f32 s26, s0
854; CHECK-NEXT:    vcvtb.f32.f16 s0, s23
855; CHECK-NEXT:    vmov r0, s0
856; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
857; CHECK-NEXT:    vmov r1, s0
858; CHECK-NEXT:    bl fmodf
859; CHECK-NEXT:    vmov s0, r0
860; CHECK-NEXT:    vcvtb.f16.f32 s27, s0
861; CHECK-NEXT:    vcvtt.f32.f16 s0, s23
862; CHECK-NEXT:    vmov r0, s0
863; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
864; CHECK-NEXT:    vmov r1, s0
865; CHECK-NEXT:    bl fmodf
866; CHECK-NEXT:    vmov s0, r0
867; CHECK-NEXT:    vcvtt.f16.f32 s27, s0
868; CHECK-NEXT:    vmov q0, q6
869; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
870; CHECK-NEXT:    pop {r7, pc}
871entry:
872  %out = frem <8 x half> %in1, %in2
873  ret <8 x half> %out
874}
875
876define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) {
877; CHECK-LABEL: fdiv_f64:
878; CHECK:       @ %bb.0: @ %entry
879; CHECK-NEXT:    .save {r7, lr}
880; CHECK-NEXT:    push {r7, lr}
881; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
882; CHECK-NEXT:    vpush {d8, d9, d10, d11}
883; CHECK-NEXT:    vmov q4, q1
884; CHECK-NEXT:    vmov q5, q0
885; CHECK-NEXT:    vmov r0, r1, d11
886; CHECK-NEXT:    vmov r2, r3, d9
887; CHECK-NEXT:    bl __aeabi_ddiv
888; CHECK-NEXT:    vmov lr, r12, d10
889; CHECK-NEXT:    vmov r2, r3, d8
890; CHECK-NEXT:    vmov d9, r0, r1
891; CHECK-NEXT:    mov r0, lr
892; CHECK-NEXT:    mov r1, r12
893; CHECK-NEXT:    bl __aeabi_ddiv
894; CHECK-NEXT:    vmov d8, r0, r1
895; CHECK-NEXT:    vmov q0, q4
896; CHECK-NEXT:    vpop {d8, d9, d10, d11}
897; CHECK-NEXT:    pop {r7, pc}
898entry:
899  %out = fdiv <2 x double> %in1, %in2
900  ret <2 x double> %out
901}
902
903define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) {
904; CHECK-LABEL: frem_f64:
905; CHECK:       @ %bb.0: @ %entry
906; CHECK-NEXT:    .save {r7, lr}
907; CHECK-NEXT:    push {r7, lr}
908; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
909; CHECK-NEXT:    vpush {d8, d9, d10, d11}
910; CHECK-NEXT:    vmov q4, q1
911; CHECK-NEXT:    vmov q5, q0
912; CHECK-NEXT:    vmov r0, r1, d11
913; CHECK-NEXT:    vmov r2, r3, d9
914; CHECK-NEXT:    bl fmod
915; CHECK-NEXT:    vmov lr, r12, d10
916; CHECK-NEXT:    vmov r2, r3, d8
917; CHECK-NEXT:    vmov d9, r0, r1
918; CHECK-NEXT:    mov r0, lr
919; CHECK-NEXT:    mov r1, r12
920; CHECK-NEXT:    bl fmod
921; CHECK-NEXT:    vmov d8, r0, r1
922; CHECK-NEXT:    vmov q0, q4
923; CHECK-NEXT:    vpop {d8, d9, d10, d11}
924; CHECK-NEXT:    pop {r7, pc}
925entry:
926  %out = frem <2 x double> %in1, %in2
927  ret <2 x double> %out
928}
929
930
931