xref: /llvm-project/llvm/test/CodeGen/AArch64/rem.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5define i8 @si8(i8 %a, i8 %b) {
6; CHECK-SD-LABEL: si8:
7; CHECK-SD:       // %bb.0: // %entry
8; CHECK-SD-NEXT:    sxtb w8, w1
9; CHECK-SD-NEXT:    sxtb w9, w0
10; CHECK-SD-NEXT:    sdiv w10, w9, w8
11; CHECK-SD-NEXT:    msub w0, w10, w8, w9
12; CHECK-SD-NEXT:    ret
13;
14; CHECK-GI-LABEL: si8:
15; CHECK-GI:       // %bb.0: // %entry
16; CHECK-GI-NEXT:    sxtb w8, w0
17; CHECK-GI-NEXT:    sxtb w9, w1
18; CHECK-GI-NEXT:    sdiv w8, w8, w9
19; CHECK-GI-NEXT:    msub w0, w8, w1, w0
20; CHECK-GI-NEXT:    ret
21entry:
22  %s = srem i8 %a, %b
23  ret i8 %s
24}
25
26define i8 @ui8(i8 %a, i8 %b) {
27; CHECK-SD-LABEL: ui8:
28; CHECK-SD:       // %bb.0: // %entry
29; CHECK-SD-NEXT:    and w8, w1, #0xff
30; CHECK-SD-NEXT:    and w9, w0, #0xff
31; CHECK-SD-NEXT:    udiv w10, w9, w8
32; CHECK-SD-NEXT:    msub w0, w10, w8, w9
33; CHECK-SD-NEXT:    ret
34;
35; CHECK-GI-LABEL: ui8:
36; CHECK-GI:       // %bb.0: // %entry
37; CHECK-GI-NEXT:    and w8, w0, #0xff
38; CHECK-GI-NEXT:    and w9, w1, #0xff
39; CHECK-GI-NEXT:    udiv w8, w8, w9
40; CHECK-GI-NEXT:    msub w0, w8, w1, w0
41; CHECK-GI-NEXT:    ret
42entry:
43  %s = urem i8 %a, %b
44  ret i8 %s
45}
46
47define i16 @si16(i16 %a, i16 %b) {
48; CHECK-SD-LABEL: si16:
49; CHECK-SD:       // %bb.0: // %entry
50; CHECK-SD-NEXT:    sxth w8, w1
51; CHECK-SD-NEXT:    sxth w9, w0
52; CHECK-SD-NEXT:    sdiv w10, w9, w8
53; CHECK-SD-NEXT:    msub w0, w10, w8, w9
54; CHECK-SD-NEXT:    ret
55;
56; CHECK-GI-LABEL: si16:
57; CHECK-GI:       // %bb.0: // %entry
58; CHECK-GI-NEXT:    sxth w8, w0
59; CHECK-GI-NEXT:    sxth w9, w1
60; CHECK-GI-NEXT:    sdiv w8, w8, w9
61; CHECK-GI-NEXT:    msub w0, w8, w1, w0
62; CHECK-GI-NEXT:    ret
63entry:
64  %s = srem i16 %a, %b
65  ret i16 %s
66}
67
68define i16 @ui16(i16 %a, i16 %b) {
69; CHECK-SD-LABEL: ui16:
70; CHECK-SD:       // %bb.0: // %entry
71; CHECK-SD-NEXT:    and w8, w1, #0xffff
72; CHECK-SD-NEXT:    and w9, w0, #0xffff
73; CHECK-SD-NEXT:    udiv w10, w9, w8
74; CHECK-SD-NEXT:    msub w0, w10, w8, w9
75; CHECK-SD-NEXT:    ret
76;
77; CHECK-GI-LABEL: ui16:
78; CHECK-GI:       // %bb.0: // %entry
79; CHECK-GI-NEXT:    and w8, w0, #0xffff
80; CHECK-GI-NEXT:    and w9, w1, #0xffff
81; CHECK-GI-NEXT:    udiv w8, w8, w9
82; CHECK-GI-NEXT:    msub w0, w8, w1, w0
83; CHECK-GI-NEXT:    ret
84entry:
85  %s = urem i16 %a, %b
86  ret i16 %s
87}
88
89define i32 @si32(i32 %a, i32 %b) {
90; CHECK-LABEL: si32:
91; CHECK:       // %bb.0: // %entry
92; CHECK-NEXT:    sdiv w8, w0, w1
93; CHECK-NEXT:    msub w0, w8, w1, w0
94; CHECK-NEXT:    ret
95entry:
96  %s = srem i32 %a, %b
97  ret i32 %s
98}
99
100define i32 @ui32(i32 %a, i32 %b) {
101; CHECK-LABEL: ui32:
102; CHECK:       // %bb.0: // %entry
103; CHECK-NEXT:    udiv w8, w0, w1
104; CHECK-NEXT:    msub w0, w8, w1, w0
105; CHECK-NEXT:    ret
106entry:
107  %s = urem i32 %a, %b
108  ret i32 %s
109}
110
111define i64 @si64(i64 %a, i64 %b) {
112; CHECK-LABEL: si64:
113; CHECK:       // %bb.0: // %entry
114; CHECK-NEXT:    sdiv x8, x0, x1
115; CHECK-NEXT:    msub x0, x8, x1, x0
116; CHECK-NEXT:    ret
117entry:
118  %s = srem i64 %a, %b
119  ret i64 %s
120}
121
122define i64 @ui64(i64 %a, i64 %b) {
123; CHECK-LABEL: ui64:
124; CHECK:       // %bb.0: // %entry
125; CHECK-NEXT:    udiv x8, x0, x1
126; CHECK-NEXT:    msub x0, x8, x1, x0
127; CHECK-NEXT:    ret
128entry:
129  %s = urem i64 %a, %b
130  ret i64 %s
131}
132
133define i128 @si128(i128 %a, i128 %b) {
134; CHECK-LABEL: si128:
135; CHECK:       // %bb.0: // %entry
136; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
137; CHECK-NEXT:    .cfi_def_cfa_offset 16
138; CHECK-NEXT:    .cfi_offset w30, -16
139; CHECK-NEXT:    bl __modti3
140; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
141; CHECK-NEXT:    ret
142entry:
143  %s = srem i128 %a, %b
144  ret i128 %s
145}
146
147define i128 @ui128(i128 %a, i128 %b) {
148; CHECK-LABEL: ui128:
149; CHECK:       // %bb.0: // %entry
150; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
151; CHECK-NEXT:    .cfi_def_cfa_offset 16
152; CHECK-NEXT:    .cfi_offset w30, -16
153; CHECK-NEXT:    bl __umodti3
154; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
155; CHECK-NEXT:    ret
156entry:
157  %s = urem i128 %a, %b
158  ret i128 %s
159}
160
161define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) {
162; CHECK-SD-LABEL: sv2i8:
163; CHECK-SD:       // %bb.0: // %entry
164; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #24
165; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #24
166; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
167; CHECK-SD-NEXT:    sshr v1.2s, v1.2s, #24
168; CHECK-SD-NEXT:    fmov w8, s1
169; CHECK-SD-NEXT:    fmov w9, s0
170; CHECK-SD-NEXT:    mov w11, v1.s[1]
171; CHECK-SD-NEXT:    mov w12, v0.s[1]
172; CHECK-SD-NEXT:    sdiv w10, w9, w8
173; CHECK-SD-NEXT:    sdiv w13, w12, w11
174; CHECK-SD-NEXT:    msub w8, w10, w8, w9
175; CHECK-SD-NEXT:    fmov s0, w8
176; CHECK-SD-NEXT:    msub w9, w13, w11, w12
177; CHECK-SD-NEXT:    mov v0.s[1], w9
178; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
179; CHECK-SD-NEXT:    ret
180;
181; CHECK-GI-LABEL: sv2i8:
182; CHECK-GI:       // %bb.0: // %entry
183; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #24
184; CHECK-GI-NEXT:    shl v1.2s, v1.2s, #24
185; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #24
186; CHECK-GI-NEXT:    sshr v1.2s, v1.2s, #24
187; CHECK-GI-NEXT:    fmov w8, s0
188; CHECK-GI-NEXT:    fmov w9, s1
189; CHECK-GI-NEXT:    mov w10, v1.s[1]
190; CHECK-GI-NEXT:    sdiv w8, w8, w9
191; CHECK-GI-NEXT:    mov w9, v0.s[1]
192; CHECK-GI-NEXT:    sdiv w9, w9, w10
193; CHECK-GI-NEXT:    mov v2.s[0], w8
194; CHECK-GI-NEXT:    mov v2.s[1], w9
195; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
196; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
197; CHECK-GI-NEXT:    ret
198entry:
199  %s = srem <2 x i8> %d, %e
200  ret <2 x i8> %s
201}
202
203define <3 x i8> @sv3i8(<3 x i8> %d, <3 x i8> %e) {
204; CHECK-SD-LABEL: sv3i8:
205; CHECK-SD:       // %bb.0: // %entry
206; CHECK-SD-NEXT:    sxtb w8, w3
207; CHECK-SD-NEXT:    sxtb w9, w0
208; CHECK-SD-NEXT:    sxtb w11, w4
209; CHECK-SD-NEXT:    sxtb w12, w1
210; CHECK-SD-NEXT:    sxtb w14, w5
211; CHECK-SD-NEXT:    sxtb w15, w2
212; CHECK-SD-NEXT:    sdiv w10, w9, w8
213; CHECK-SD-NEXT:    sdiv w13, w12, w11
214; CHECK-SD-NEXT:    msub w0, w10, w8, w9
215; CHECK-SD-NEXT:    sdiv w16, w15, w14
216; CHECK-SD-NEXT:    msub w1, w13, w11, w12
217; CHECK-SD-NEXT:    msub w2, w16, w14, w15
218; CHECK-SD-NEXT:    ret
219;
220; CHECK-GI-LABEL: sv3i8:
221; CHECK-GI:       // %bb.0: // %entry
222; CHECK-GI-NEXT:    sxtb w8, w0
223; CHECK-GI-NEXT:    sxtb w9, w3
224; CHECK-GI-NEXT:    sxtb w11, w1
225; CHECK-GI-NEXT:    sxtb w12, w4
226; CHECK-GI-NEXT:    sxtb w14, w2
227; CHECK-GI-NEXT:    sxtb w15, w5
228; CHECK-GI-NEXT:    sdiv w10, w8, w9
229; CHECK-GI-NEXT:    sdiv w13, w11, w12
230; CHECK-GI-NEXT:    msub w0, w10, w9, w8
231; CHECK-GI-NEXT:    sdiv w16, w14, w15
232; CHECK-GI-NEXT:    msub w1, w13, w12, w11
233; CHECK-GI-NEXT:    msub w2, w16, w15, w14
234; CHECK-GI-NEXT:    ret
235entry:
236  %s = srem <3 x i8> %d, %e
237  ret <3 x i8> %s
238}
239
240define <4 x i8> @sv4i8(<4 x i8> %d, <4 x i8> %e) {
241; CHECK-SD-LABEL: sv4i8:
242; CHECK-SD:       // %bb.0: // %entry
243; CHECK-SD-NEXT:    shl v0.4h, v0.4h, #8
244; CHECK-SD-NEXT:    shl v1.4h, v1.4h, #8
245; CHECK-SD-NEXT:    sshr v0.4h, v0.4h, #8
246; CHECK-SD-NEXT:    sshr v1.4h, v1.4h, #8
247; CHECK-SD-NEXT:    smov w11, v1.h[0]
248; CHECK-SD-NEXT:    smov w12, v0.h[0]
249; CHECK-SD-NEXT:    smov w8, v1.h[1]
250; CHECK-SD-NEXT:    smov w9, v0.h[1]
251; CHECK-SD-NEXT:    smov w14, v1.h[2]
252; CHECK-SD-NEXT:    smov w15, v0.h[2]
253; CHECK-SD-NEXT:    smov w17, v1.h[3]
254; CHECK-SD-NEXT:    smov w18, v0.h[3]
255; CHECK-SD-NEXT:    sdiv w13, w12, w11
256; CHECK-SD-NEXT:    sdiv w10, w9, w8
257; CHECK-SD-NEXT:    msub w11, w13, w11, w12
258; CHECK-SD-NEXT:    fmov s0, w11
259; CHECK-SD-NEXT:    sdiv w16, w15, w14
260; CHECK-SD-NEXT:    msub w8, w10, w8, w9
261; CHECK-SD-NEXT:    mov v0.h[1], w8
262; CHECK-SD-NEXT:    sdiv w9, w18, w17
263; CHECK-SD-NEXT:    msub w8, w16, w14, w15
264; CHECK-SD-NEXT:    mov v0.h[2], w8
265; CHECK-SD-NEXT:    msub w8, w9, w17, w18
266; CHECK-SD-NEXT:    mov v0.h[3], w8
267; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
268; CHECK-SD-NEXT:    ret
269;
270; CHECK-GI-LABEL: sv4i8:
271; CHECK-GI:       // %bb.0: // %entry
272; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
273; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
274; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #24
275; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #24
276; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #24
277; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #24
278; CHECK-GI-NEXT:    fmov w8, s0
279; CHECK-GI-NEXT:    fmov w9, s1
280; CHECK-GI-NEXT:    mov w10, v1.s[1]
281; CHECK-GI-NEXT:    mov w11, v1.s[2]
282; CHECK-GI-NEXT:    mov w12, v1.s[3]
283; CHECK-GI-NEXT:    sdiv w8, w8, w9
284; CHECK-GI-NEXT:    mov w9, v0.s[1]
285; CHECK-GI-NEXT:    sdiv w9, w9, w10
286; CHECK-GI-NEXT:    mov w10, v0.s[2]
287; CHECK-GI-NEXT:    mov v2.s[0], w8
288; CHECK-GI-NEXT:    sdiv w10, w10, w11
289; CHECK-GI-NEXT:    mov w11, v0.s[3]
290; CHECK-GI-NEXT:    mov v2.s[1], w9
291; CHECK-GI-NEXT:    sdiv w8, w11, w12
292; CHECK-GI-NEXT:    mov v2.s[2], w10
293; CHECK-GI-NEXT:    mov v2.s[3], w8
294; CHECK-GI-NEXT:    mls v0.4s, v2.4s, v1.4s
295; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
296; CHECK-GI-NEXT:    ret
297entry:
298  %s = srem <4 x i8> %d, %e
299  ret <4 x i8> %s
300}
301
302define <8 x i8> @sv8i8(<8 x i8> %d, <8 x i8> %e) {
303; CHECK-SD-LABEL: sv8i8:
304; CHECK-SD:       // %bb.0: // %entry
305; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
306; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
307; CHECK-SD-NEXT:    smov w11, v1.b[0]
308; CHECK-SD-NEXT:    smov w12, v0.b[0]
309; CHECK-SD-NEXT:    smov w8, v1.b[1]
310; CHECK-SD-NEXT:    smov w9, v0.b[1]
311; CHECK-SD-NEXT:    smov w14, v1.b[2]
312; CHECK-SD-NEXT:    smov w15, v0.b[2]
313; CHECK-SD-NEXT:    smov w17, v1.b[3]
314; CHECK-SD-NEXT:    smov w18, v0.b[3]
315; CHECK-SD-NEXT:    smov w1, v1.b[4]
316; CHECK-SD-NEXT:    smov w2, v0.b[4]
317; CHECK-SD-NEXT:    smov w4, v1.b[5]
318; CHECK-SD-NEXT:    smov w5, v0.b[5]
319; CHECK-SD-NEXT:    sdiv w13, w12, w11
320; CHECK-SD-NEXT:    sdiv w10, w9, w8
321; CHECK-SD-NEXT:    msub w11, w13, w11, w12
322; CHECK-SD-NEXT:    smov w13, v1.b[7]
323; CHECK-SD-NEXT:    fmov s2, w11
324; CHECK-SD-NEXT:    smov w11, v0.b[6]
325; CHECK-SD-NEXT:    sdiv w16, w15, w14
326; CHECK-SD-NEXT:    msub w8, w10, w8, w9
327; CHECK-SD-NEXT:    smov w10, v1.b[6]
328; CHECK-SD-NEXT:    mov v2.b[1], w8
329; CHECK-SD-NEXT:    sdiv w0, w18, w17
330; CHECK-SD-NEXT:    msub w8, w16, w14, w15
331; CHECK-SD-NEXT:    smov w14, v0.b[7]
332; CHECK-SD-NEXT:    mov v2.b[2], w8
333; CHECK-SD-NEXT:    sdiv w3, w2, w1
334; CHECK-SD-NEXT:    msub w8, w0, w17, w18
335; CHECK-SD-NEXT:    mov v2.b[3], w8
336; CHECK-SD-NEXT:    sdiv w9, w5, w4
337; CHECK-SD-NEXT:    msub w8, w3, w1, w2
338; CHECK-SD-NEXT:    mov v2.b[4], w8
339; CHECK-SD-NEXT:    sdiv w12, w11, w10
340; CHECK-SD-NEXT:    msub w8, w9, w4, w5
341; CHECK-SD-NEXT:    mov v2.b[5], w8
342; CHECK-SD-NEXT:    sdiv w9, w14, w13
343; CHECK-SD-NEXT:    msub w8, w12, w10, w11
344; CHECK-SD-NEXT:    mov v2.b[6], w8
345; CHECK-SD-NEXT:    msub w8, w9, w13, w14
346; CHECK-SD-NEXT:    mov v2.b[7], w8
347; CHECK-SD-NEXT:    fmov d0, d2
348; CHECK-SD-NEXT:    ret
349;
350; CHECK-GI-LABEL: sv8i8:
351; CHECK-GI:       // %bb.0: // %entry
352; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
353; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
354; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
355; CHECK-GI-NEXT:    sshll v3.4s, v1.4h, #0
356; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
357; CHECK-GI-NEXT:    sshll2 v1.4s, v1.8h, #0
358; CHECK-GI-NEXT:    fmov w8, s2
359; CHECK-GI-NEXT:    fmov w9, s3
360; CHECK-GI-NEXT:    mov w10, v3.s[1]
361; CHECK-GI-NEXT:    mov w11, v3.s[2]
362; CHECK-GI-NEXT:    mov w12, v3.s[3]
363; CHECK-GI-NEXT:    fmov w13, s1
364; CHECK-GI-NEXT:    mov w14, v1.s[1]
365; CHECK-GI-NEXT:    mov w15, v1.s[2]
366; CHECK-GI-NEXT:    sdiv w8, w8, w9
367; CHECK-GI-NEXT:    mov w9, v2.s[1]
368; CHECK-GI-NEXT:    sdiv w9, w9, w10
369; CHECK-GI-NEXT:    mov w10, v2.s[2]
370; CHECK-GI-NEXT:    mov v4.s[0], w8
371; CHECK-GI-NEXT:    mov w8, v0.s[3]
372; CHECK-GI-NEXT:    sdiv w10, w10, w11
373; CHECK-GI-NEXT:    mov w11, v2.s[3]
374; CHECK-GI-NEXT:    mov v4.s[1], w9
375; CHECK-GI-NEXT:    sdiv w11, w11, w12
376; CHECK-GI-NEXT:    fmov w12, s0
377; CHECK-GI-NEXT:    mov v4.s[2], w10
378; CHECK-GI-NEXT:    sdiv w12, w12, w13
379; CHECK-GI-NEXT:    mov w13, v0.s[1]
380; CHECK-GI-NEXT:    mov v4.s[3], w11
381; CHECK-GI-NEXT:    mls v2.4s, v4.4s, v3.4s
382; CHECK-GI-NEXT:    sdiv w13, w13, w14
383; CHECK-GI-NEXT:    mov w14, v0.s[2]
384; CHECK-GI-NEXT:    mov v5.s[0], w12
385; CHECK-GI-NEXT:    mov w12, v1.s[3]
386; CHECK-GI-NEXT:    sdiv w14, w14, w15
387; CHECK-GI-NEXT:    mov v5.s[1], w13
388; CHECK-GI-NEXT:    sdiv w8, w8, w12
389; CHECK-GI-NEXT:    mov v5.s[2], w14
390; CHECK-GI-NEXT:    mov v5.s[3], w8
391; CHECK-GI-NEXT:    mls v0.4s, v5.4s, v1.4s
392; CHECK-GI-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
393; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
394; CHECK-GI-NEXT:    ret
395entry:
396  %s = srem <8 x i8> %d, %e
397  ret <8 x i8> %s
398}
399
400define <16 x i8> @sv16i8(<16 x i8> %d, <16 x i8> %e) {
401; CHECK-SD-LABEL: sv16i8:
402; CHECK-SD:       // %bb.0: // %entry
403; CHECK-SD-NEXT:    stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
404; CHECK-SD-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
405; CHECK-SD-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
406; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
407; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
408; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
409; CHECK-SD-NEXT:    .cfi_offset w19, -8
410; CHECK-SD-NEXT:    .cfi_offset w20, -16
411; CHECK-SD-NEXT:    .cfi_offset w21, -24
412; CHECK-SD-NEXT:    .cfi_offset w22, -32
413; CHECK-SD-NEXT:    .cfi_offset w23, -40
414; CHECK-SD-NEXT:    .cfi_offset w24, -48
415; CHECK-SD-NEXT:    .cfi_offset w25, -56
416; CHECK-SD-NEXT:    .cfi_offset w26, -64
417; CHECK-SD-NEXT:    .cfi_offset w27, -72
418; CHECK-SD-NEXT:    .cfi_offset w28, -80
419; CHECK-SD-NEXT:    smov w11, v1.b[0]
420; CHECK-SD-NEXT:    smov w12, v0.b[0]
421; CHECK-SD-NEXT:    smov w8, v1.b[1]
422; CHECK-SD-NEXT:    smov w9, v0.b[1]
423; CHECK-SD-NEXT:    smov w14, v1.b[2]
424; CHECK-SD-NEXT:    smov w15, v0.b[2]
425; CHECK-SD-NEXT:    smov w17, v1.b[3]
426; CHECK-SD-NEXT:    smov w18, v0.b[3]
427; CHECK-SD-NEXT:    smov w1, v1.b[4]
428; CHECK-SD-NEXT:    smov w2, v0.b[4]
429; CHECK-SD-NEXT:    smov w4, v1.b[5]
430; CHECK-SD-NEXT:    smov w5, v0.b[5]
431; CHECK-SD-NEXT:    sdiv w13, w12, w11
432; CHECK-SD-NEXT:    smov w7, v1.b[6]
433; CHECK-SD-NEXT:    smov w19, v0.b[6]
434; CHECK-SD-NEXT:    smov w21, v1.b[7]
435; CHECK-SD-NEXT:    smov w22, v0.b[7]
436; CHECK-SD-NEXT:    smov w24, v1.b[8]
437; CHECK-SD-NEXT:    smov w25, v0.b[8]
438; CHECK-SD-NEXT:    smov w27, v1.b[9]
439; CHECK-SD-NEXT:    smov w28, v0.b[9]
440; CHECK-SD-NEXT:    sdiv w10, w9, w8
441; CHECK-SD-NEXT:    msub w11, w13, w11, w12
442; CHECK-SD-NEXT:    smov w13, v1.b[11]
443; CHECK-SD-NEXT:    fmov s2, w11
444; CHECK-SD-NEXT:    smov w11, v0.b[10]
445; CHECK-SD-NEXT:    sdiv w16, w15, w14
446; CHECK-SD-NEXT:    msub w8, w10, w8, w9
447; CHECK-SD-NEXT:    smov w10, v1.b[10]
448; CHECK-SD-NEXT:    mov v2.b[1], w8
449; CHECK-SD-NEXT:    sdiv w0, w18, w17
450; CHECK-SD-NEXT:    msub w8, w16, w14, w15
451; CHECK-SD-NEXT:    smov w14, v0.b[11]
452; CHECK-SD-NEXT:    smov w16, v1.b[12]
453; CHECK-SD-NEXT:    mov v2.b[2], w8
454; CHECK-SD-NEXT:    sdiv w3, w2, w1
455; CHECK-SD-NEXT:    msub w8, w0, w17, w18
456; CHECK-SD-NEXT:    smov w17, v0.b[12]
457; CHECK-SD-NEXT:    smov w0, v1.b[13]
458; CHECK-SD-NEXT:    mov v2.b[3], w8
459; CHECK-SD-NEXT:    sdiv w6, w5, w4
460; CHECK-SD-NEXT:    msub w8, w3, w1, w2
461; CHECK-SD-NEXT:    smov w1, v0.b[13]
462; CHECK-SD-NEXT:    mov v2.b[4], w8
463; CHECK-SD-NEXT:    sdiv w20, w19, w7
464; CHECK-SD-NEXT:    msub w8, w6, w4, w5
465; CHECK-SD-NEXT:    mov v2.b[5], w8
466; CHECK-SD-NEXT:    sdiv w23, w22, w21
467; CHECK-SD-NEXT:    msub w8, w20, w7, w19
468; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
469; CHECK-SD-NEXT:    mov v2.b[6], w8
470; CHECK-SD-NEXT:    sdiv w26, w25, w24
471; CHECK-SD-NEXT:    msub w8, w23, w21, w22
472; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
473; CHECK-SD-NEXT:    mov v2.b[7], w8
474; CHECK-SD-NEXT:    sdiv w9, w28, w27
475; CHECK-SD-NEXT:    msub w8, w26, w24, w25
476; CHECK-SD-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload
477; CHECK-SD-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload
478; CHECK-SD-NEXT:    mov v2.b[8], w8
479; CHECK-SD-NEXT:    sdiv w12, w11, w10
480; CHECK-SD-NEXT:    msub w8, w9, w27, w28
481; CHECK-SD-NEXT:    mov v2.b[9], w8
482; CHECK-SD-NEXT:    sdiv w15, w14, w13
483; CHECK-SD-NEXT:    msub w8, w12, w10, w11
484; CHECK-SD-NEXT:    smov w10, v1.b[14]
485; CHECK-SD-NEXT:    smov w11, v0.b[14]
486; CHECK-SD-NEXT:    mov v2.b[10], w8
487; CHECK-SD-NEXT:    sdiv w18, w17, w16
488; CHECK-SD-NEXT:    msub w8, w15, w13, w14
489; CHECK-SD-NEXT:    smov w13, v1.b[15]
490; CHECK-SD-NEXT:    smov w14, v0.b[15]
491; CHECK-SD-NEXT:    mov v2.b[11], w8
492; CHECK-SD-NEXT:    sdiv w9, w1, w0
493; CHECK-SD-NEXT:    msub w8, w18, w16, w17
494; CHECK-SD-NEXT:    mov v2.b[12], w8
495; CHECK-SD-NEXT:    sdiv w12, w11, w10
496; CHECK-SD-NEXT:    msub w8, w9, w0, w1
497; CHECK-SD-NEXT:    mov v2.b[13], w8
498; CHECK-SD-NEXT:    sdiv w9, w14, w13
499; CHECK-SD-NEXT:    msub w8, w12, w10, w11
500; CHECK-SD-NEXT:    mov v2.b[14], w8
501; CHECK-SD-NEXT:    msub w8, w9, w13, w14
502; CHECK-SD-NEXT:    mov v2.b[15], w8
503; CHECK-SD-NEXT:    mov v0.16b, v2.16b
504; CHECK-SD-NEXT:    ldp x28, x27, [sp], #80 // 16-byte Folded Reload
505; CHECK-SD-NEXT:    ret
506;
507; CHECK-GI-LABEL: sv16i8:
508; CHECK-GI:       // %bb.0: // %entry
509; CHECK-GI-NEXT:    sshll v4.8h, v0.8b, #0
510; CHECK-GI-NEXT:    sshll v5.8h, v1.8b, #0
511; CHECK-GI-NEXT:    sshll2 v6.8h, v0.16b, #0
512; CHECK-GI-NEXT:    sshll2 v7.8h, v1.16b, #0
513; CHECK-GI-NEXT:    sshll v2.4s, v4.4h, #0
514; CHECK-GI-NEXT:    sshll v3.4s, v5.4h, #0
515; CHECK-GI-NEXT:    sshll2 v4.4s, v4.8h, #0
516; CHECK-GI-NEXT:    sshll2 v5.4s, v5.8h, #0
517; CHECK-GI-NEXT:    sshll v0.4s, v6.4h, #0
518; CHECK-GI-NEXT:    sshll v1.4s, v7.4h, #0
519; CHECK-GI-NEXT:    sshll2 v6.4s, v6.8h, #0
520; CHECK-GI-NEXT:    sshll2 v7.4s, v7.8h, #0
521; CHECK-GI-NEXT:    fmov w8, s2
522; CHECK-GI-NEXT:    fmov w9, s3
523; CHECK-GI-NEXT:    mov w12, v3.s[3]
524; CHECK-GI-NEXT:    fmov w13, s5
525; CHECK-GI-NEXT:    mov w16, v5.s[3]
526; CHECK-GI-NEXT:    fmov w17, s1
527; CHECK-GI-NEXT:    mov w18, v1.s[1]
528; CHECK-GI-NEXT:    mov w0, v1.s[2]
529; CHECK-GI-NEXT:    mov w1, v1.s[3]
530; CHECK-GI-NEXT:    sdiv w11, w8, w9
531; CHECK-GI-NEXT:    mov w8, v2.s[1]
532; CHECK-GI-NEXT:    mov w9, v3.s[1]
533; CHECK-GI-NEXT:    fmov w2, s7
534; CHECK-GI-NEXT:    mov w3, v7.s[1]
535; CHECK-GI-NEXT:    mov w4, v7.s[2]
536; CHECK-GI-NEXT:    sdiv w10, w8, w9
537; CHECK-GI-NEXT:    mov w8, v2.s[2]
538; CHECK-GI-NEXT:    mov w9, v3.s[2]
539; CHECK-GI-NEXT:    mov v16.s[0], w11
540; CHECK-GI-NEXT:    mov w11, v6.s[3]
541; CHECK-GI-NEXT:    sdiv w9, w8, w9
542; CHECK-GI-NEXT:    mov w8, v2.s[3]
543; CHECK-GI-NEXT:    mov v16.s[1], w10
544; CHECK-GI-NEXT:    sdiv w8, w8, w12
545; CHECK-GI-NEXT:    fmov w12, s4
546; CHECK-GI-NEXT:    mov v16.s[2], w9
547; CHECK-GI-NEXT:    sdiv w14, w12, w13
548; CHECK-GI-NEXT:    mov w12, v4.s[1]
549; CHECK-GI-NEXT:    mov w13, v5.s[1]
550; CHECK-GI-NEXT:    mov v16.s[3], w8
551; CHECK-GI-NEXT:    mls v2.4s, v16.4s, v3.4s
552; CHECK-GI-NEXT:    sdiv w15, w12, w13
553; CHECK-GI-NEXT:    mov w12, v4.s[2]
554; CHECK-GI-NEXT:    mov w13, v5.s[2]
555; CHECK-GI-NEXT:    mov v17.s[0], w14
556; CHECK-GI-NEXT:    mov w14, v7.s[3]
557; CHECK-GI-NEXT:    sdiv w13, w12, w13
558; CHECK-GI-NEXT:    mov w12, v4.s[3]
559; CHECK-GI-NEXT:    mov v17.s[1], w15
560; CHECK-GI-NEXT:    sdiv w12, w12, w16
561; CHECK-GI-NEXT:    fmov w16, s0
562; CHECK-GI-NEXT:    mov v17.s[2], w13
563; CHECK-GI-NEXT:    sdiv w16, w16, w17
564; CHECK-GI-NEXT:    mov w17, v0.s[1]
565; CHECK-GI-NEXT:    mov v17.s[3], w12
566; CHECK-GI-NEXT:    mls v4.4s, v17.4s, v5.4s
567; CHECK-GI-NEXT:    sdiv w17, w17, w18
568; CHECK-GI-NEXT:    mov w18, v0.s[2]
569; CHECK-GI-NEXT:    mov v18.s[0], w16
570; CHECK-GI-NEXT:    sdiv w18, w18, w0
571; CHECK-GI-NEXT:    mov w0, v0.s[3]
572; CHECK-GI-NEXT:    mov v18.s[1], w17
573; CHECK-GI-NEXT:    sdiv w0, w0, w1
574; CHECK-GI-NEXT:    fmov w1, s6
575; CHECK-GI-NEXT:    mov v18.s[2], w18
576; CHECK-GI-NEXT:    sdiv w1, w1, w2
577; CHECK-GI-NEXT:    mov w2, v6.s[1]
578; CHECK-GI-NEXT:    mov v18.s[3], w0
579; CHECK-GI-NEXT:    mls v0.4s, v18.4s, v1.4s
580; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v4.8h
581; CHECK-GI-NEXT:    sdiv w2, w2, w3
582; CHECK-GI-NEXT:    mov w3, v6.s[2]
583; CHECK-GI-NEXT:    mov v19.s[0], w1
584; CHECK-GI-NEXT:    sdiv w3, w3, w4
585; CHECK-GI-NEXT:    mov v19.s[1], w2
586; CHECK-GI-NEXT:    sdiv w10, w11, w14
587; CHECK-GI-NEXT:    mov v19.s[2], w3
588; CHECK-GI-NEXT:    mov v19.s[3], w10
589; CHECK-GI-NEXT:    mls v6.4s, v19.4s, v7.4s
590; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v6.8h
591; CHECK-GI-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
592; CHECK-GI-NEXT:    ret
593entry:
594  %s = srem <16 x i8> %d, %e
595  ret <16 x i8> %s
596}
597
598define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) {
599; CHECK-SD-LABEL: sv32i8:
600; CHECK-SD:       // %bb.0: // %entry
601; CHECK-SD-NEXT:    sub sp, sp, #304
602; CHECK-SD-NEXT:    stp x29, x30, [sp, #208] // 16-byte Folded Spill
603; CHECK-SD-NEXT:    stp x28, x27, [sp, #224] // 16-byte Folded Spill
604; CHECK-SD-NEXT:    stp x26, x25, [sp, #240] // 16-byte Folded Spill
605; CHECK-SD-NEXT:    stp x24, x23, [sp, #256] // 16-byte Folded Spill
606; CHECK-SD-NEXT:    stp x22, x21, [sp, #272] // 16-byte Folded Spill
607; CHECK-SD-NEXT:    stp x20, x19, [sp, #288] // 16-byte Folded Spill
608; CHECK-SD-NEXT:    .cfi_def_cfa_offset 304
609; CHECK-SD-NEXT:    .cfi_offset w19, -8
610; CHECK-SD-NEXT:    .cfi_offset w20, -16
611; CHECK-SD-NEXT:    .cfi_offset w21, -24
612; CHECK-SD-NEXT:    .cfi_offset w22, -32
613; CHECK-SD-NEXT:    .cfi_offset w23, -40
614; CHECK-SD-NEXT:    .cfi_offset w24, -48
615; CHECK-SD-NEXT:    .cfi_offset w25, -56
616; CHECK-SD-NEXT:    .cfi_offset w26, -64
617; CHECK-SD-NEXT:    .cfi_offset w27, -72
618; CHECK-SD-NEXT:    .cfi_offset w28, -80
619; CHECK-SD-NEXT:    .cfi_offset w30, -88
620; CHECK-SD-NEXT:    .cfi_offset w29, -96
621; CHECK-SD-NEXT:    smov w8, v2.b[1]
622; CHECK-SD-NEXT:    smov w9, v0.b[1]
623; CHECK-SD-NEXT:    smov w19, v3.b[7]
624; CHECK-SD-NEXT:    smov w7, v1.b[7]
625; CHECK-SD-NEXT:    smov w6, v3.b[8]
626; CHECK-SD-NEXT:    smov w3, v1.b[8]
627; CHECK-SD-NEXT:    smov w13, v3.b[0]
628; CHECK-SD-NEXT:    smov w5, v3.b[1]
629; CHECK-SD-NEXT:    smov w0, v1.b[1]
630; CHECK-SD-NEXT:    smov w12, v3.b[2]
631; CHECK-SD-NEXT:    smov w17, v3.b[3]
632; CHECK-SD-NEXT:    smov w16, v1.b[3]
633; CHECK-SD-NEXT:    str w8, [sp, #80] // 4-byte Folded Spill
634; CHECK-SD-NEXT:    sdiv w10, w9, w8
635; CHECK-SD-NEXT:    smov w8, v2.b[0]
636; CHECK-SD-NEXT:    str w9, [sp, #88] // 4-byte Folded Spill
637; CHECK-SD-NEXT:    smov w9, v0.b[0]
638; CHECK-SD-NEXT:    ldr w30, [sp, #80] // 4-byte Folded Reload
639; CHECK-SD-NEXT:    smov w15, v3.b[4]
640; CHECK-SD-NEXT:    smov w14, v1.b[4]
641; CHECK-SD-NEXT:    smov w4, v3.b[5]
642; CHECK-SD-NEXT:    smov w1, v1.b[5]
643; CHECK-SD-NEXT:    smov w2, v3.b[6]
644; CHECK-SD-NEXT:    smov w18, v1.b[6]
645; CHECK-SD-NEXT:    str w8, [sp, #32] // 4-byte Folded Spill
646; CHECK-SD-NEXT:    smov w21, v3.b[9]
647; CHECK-SD-NEXT:    smov w20, v1.b[9]
648; CHECK-SD-NEXT:    str w9, [sp, #40] // 4-byte Folded Spill
649; CHECK-SD-NEXT:    ldr w29, [sp, #32] // 4-byte Folded Reload
650; CHECK-SD-NEXT:    sdiv w11, w9, w8
651; CHECK-SD-NEXT:    smov w8, v2.b[2]
652; CHECK-SD-NEXT:    smov w9, v0.b[2]
653; CHECK-SD-NEXT:    str w10, [sp, #96] // 4-byte Folded Spill
654; CHECK-SD-NEXT:    stp w8, w9, [sp, #16] // 8-byte Folded Spill
655; CHECK-SD-NEXT:    sdiv w10, w9, w8
656; CHECK-SD-NEXT:    smov w8, v2.b[3]
657; CHECK-SD-NEXT:    smov w9, v0.b[3]
658; CHECK-SD-NEXT:    stp w11, w8, [sp, #48] // 8-byte Folded Spill
659; CHECK-SD-NEXT:    str w10, [sp, #24] // 4-byte Folded Spill
660; CHECK-SD-NEXT:    sdiv w10, w9, w8
661; CHECK-SD-NEXT:    smov w8, v2.b[4]
662; CHECK-SD-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
663; CHECK-SD-NEXT:    stp w9, w10, [sp, #56] // 8-byte Folded Spill
664; CHECK-SD-NEXT:    smov w9, v0.b[4]
665; CHECK-SD-NEXT:    sdiv w27, w0, w5
666; CHECK-SD-NEXT:    str w9, [sp, #36] // 4-byte Folded Spill
667; CHECK-SD-NEXT:    sdiv w10, w9, w8
668; CHECK-SD-NEXT:    smov w8, v2.b[5]
669; CHECK-SD-NEXT:    smov w9, v0.b[5]
670; CHECK-SD-NEXT:    str w8, [sp, #76] // 4-byte Folded Spill
671; CHECK-SD-NEXT:    str w9, [sp, #84] // 4-byte Folded Spill
672; CHECK-SD-NEXT:    str w10, [sp, #44] // 4-byte Folded Spill
673; CHECK-SD-NEXT:    sdiv w10, w9, w8
674; CHECK-SD-NEXT:    smov w8, v2.b[6]
675; CHECK-SD-NEXT:    smov w9, v0.b[6]
676; CHECK-SD-NEXT:    stp w8, w9, [sp, #64] // 8-byte Folded Spill
677; CHECK-SD-NEXT:    str w10, [sp, #92] // 4-byte Folded Spill
678; CHECK-SD-NEXT:    sdiv w10, w9, w8
679; CHECK-SD-NEXT:    smov w8, v2.b[7]
680; CHECK-SD-NEXT:    smov w9, v0.b[7]
681; CHECK-SD-NEXT:    stp w8, w9, [sp, #112] // 8-byte Folded Spill
682; CHECK-SD-NEXT:    sdiv w11, w9, w8
683; CHECK-SD-NEXT:    smov w8, v2.b[8]
684; CHECK-SD-NEXT:    smov w9, v0.b[8]
685; CHECK-SD-NEXT:    str w10, [sp, #72] // 4-byte Folded Spill
686; CHECK-SD-NEXT:    stp w8, w9, [sp, #100] // 8-byte Folded Spill
687; CHECK-SD-NEXT:    sdiv w10, w9, w8
688; CHECK-SD-NEXT:    smov w8, v2.b[9]
689; CHECK-SD-NEXT:    smov w9, v0.b[9]
690; CHECK-SD-NEXT:    stp w8, w9, [sp, #136] // 8-byte Folded Spill
691; CHECK-SD-NEXT:    str w10, [sp, #108] // 4-byte Folded Spill
692; CHECK-SD-NEXT:    sdiv w10, w9, w8
693; CHECK-SD-NEXT:    smov w8, v2.b[10]
694; CHECK-SD-NEXT:    smov w9, v0.b[10]
695; CHECK-SD-NEXT:    stp w11, w8, [sp, #120] // 8-byte Folded Spill
696; CHECK-SD-NEXT:    str w10, [sp, #144] // 4-byte Folded Spill
697; CHECK-SD-NEXT:    sdiv w10, w9, w8
698; CHECK-SD-NEXT:    smov w8, v2.b[11]
699; CHECK-SD-NEXT:    stp w9, w10, [sp, #128] // 8-byte Folded Spill
700; CHECK-SD-NEXT:    smov w9, v0.b[11]
701; CHECK-SD-NEXT:    sdiv w25, w16, w17
702; CHECK-SD-NEXT:    stp w8, w9, [sp, #172] // 8-byte Folded Spill
703; CHECK-SD-NEXT:    sdiv w11, w9, w8
704; CHECK-SD-NEXT:    smov w8, v2.b[12]
705; CHECK-SD-NEXT:    smov w9, v0.b[12]
706; CHECK-SD-NEXT:    str w8, [sp, #152] // 4-byte Folded Spill
707; CHECK-SD-NEXT:    str w9, [sp, #160] // 4-byte Folded Spill
708; CHECK-SD-NEXT:    sdiv w10, w9, w8
709; CHECK-SD-NEXT:    smov w8, v2.b[13]
710; CHECK-SD-NEXT:    smov w9, v0.b[13]
711; CHECK-SD-NEXT:    stp w8, w9, [sp, #196] // 8-byte Folded Spill
712; CHECK-SD-NEXT:    str w10, [sp, #168] // 4-byte Folded Spill
713; CHECK-SD-NEXT:    sdiv w10, w9, w8
714; CHECK-SD-NEXT:    smov w8, v2.b[14]
715; CHECK-SD-NEXT:    smov w9, v0.b[14]
716; CHECK-SD-NEXT:    stp w11, w8, [sp, #180] // 8-byte Folded Spill
717; CHECK-SD-NEXT:    smov w11, v1.b[2]
718; CHECK-SD-NEXT:    str w10, [sp, #204] // 4-byte Folded Spill
719; CHECK-SD-NEXT:    sdiv w10, w9, w8
720; CHECK-SD-NEXT:    smov w8, v2.b[15]
721; CHECK-SD-NEXT:    str w8, [sp, #148] // 4-byte Folded Spill
722; CHECK-SD-NEXT:    stp w9, w10, [sp, #188] // 8-byte Folded Spill
723; CHECK-SD-NEXT:    smov w9, v0.b[15]
724; CHECK-SD-NEXT:    sdiv w22, w11, w12
725; CHECK-SD-NEXT:    str w9, [sp, #156] // 4-byte Folded Spill
726; CHECK-SD-NEXT:    sdiv w10, w9, w8
727; CHECK-SD-NEXT:    str w10, [sp, #164] // 4-byte Folded Spill
728; CHECK-SD-NEXT:    smov w10, v1.b[0]
729; CHECK-SD-NEXT:    sdiv w9, w7, w19
730; CHECK-SD-NEXT:    sdiv w8, w3, w6
731; CHECK-SD-NEXT:    sdiv w23, w10, w13
732; CHECK-SD-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
733; CHECK-SD-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
734; CHECK-SD-NEXT:    ldr w9, [sp, #88] // 4-byte Folded Reload
735; CHECK-SD-NEXT:    msub w9, w8, w30, w9
736; CHECK-SD-NEXT:    ldr w8, [sp, #48] // 4-byte Folded Reload
737; CHECK-SD-NEXT:    ldr w30, [sp, #40] // 4-byte Folded Reload
738; CHECK-SD-NEXT:    msub w8, w8, w29, w30
739; CHECK-SD-NEXT:    ldp x29, x30, [sp, #208] // 16-byte Folded Reload
740; CHECK-SD-NEXT:    fmov s0, w8
741; CHECK-SD-NEXT:    msub w10, w23, w13, w10
742; CHECK-SD-NEXT:    sdiv w24, w14, w15
743; CHECK-SD-NEXT:    msub w13, w27, w5, w0
744; CHECK-SD-NEXT:    ldr w5, [sp, #16] // 4-byte Folded Reload
745; CHECK-SD-NEXT:    mov v0.b[1], w9
746; CHECK-SD-NEXT:    msub w9, w22, w12, w11
747; CHECK-SD-NEXT:    smov w11, v1.b[10]
748; CHECK-SD-NEXT:    fmov s2, w10
749; CHECK-SD-NEXT:    ldp w10, w8, [sp, #20] // 8-byte Folded Reload
750; CHECK-SD-NEXT:    mov v2.b[1], w13
751; CHECK-SD-NEXT:    msub w8, w8, w5, w10
752; CHECK-SD-NEXT:    ldr w5, [sp, #52] // 4-byte Folded Reload
753; CHECK-SD-NEXT:    smov w10, v3.b[10]
754; CHECK-SD-NEXT:    sdiv w28, w1, w4
755; CHECK-SD-NEXT:    ldp w13, w12, [sp, #56] // 8-byte Folded Reload
756; CHECK-SD-NEXT:    mov v2.b[2], w9
757; CHECK-SD-NEXT:    mov v0.b[2], w8
758; CHECK-SD-NEXT:    msub w8, w25, w17, w16
759; CHECK-SD-NEXT:    ldr w17, [sp, #28] // 4-byte Folded Reload
760; CHECK-SD-NEXT:    ldr w16, [sp, #36] // 4-byte Folded Reload
761; CHECK-SD-NEXT:    msub w12, w12, w5, w13
762; CHECK-SD-NEXT:    ldr w13, [sp, #44] // 4-byte Folded Reload
763; CHECK-SD-NEXT:    ldr w5, [sp, #136] // 4-byte Folded Reload
764; CHECK-SD-NEXT:    mov v2.b[3], w8
765; CHECK-SD-NEXT:    msub w8, w24, w15, w14
766; CHECK-SD-NEXT:    ldr w15, [sp, #92] // 4-byte Folded Reload
767; CHECK-SD-NEXT:    mov v0.b[3], w12
768; CHECK-SD-NEXT:    msub w13, w13, w17, w16
769; CHECK-SD-NEXT:    ldr w17, [sp, #76] // 4-byte Folded Reload
770; CHECK-SD-NEXT:    sdiv w26, w18, w2
771; CHECK-SD-NEXT:    ldr w16, [sp, #84] // 4-byte Folded Reload
772; CHECK-SD-NEXT:    smov w12, v3.b[11]
773; CHECK-SD-NEXT:    msub w15, w15, w17, w16
774; CHECK-SD-NEXT:    smov w14, v1.b[11]
775; CHECK-SD-NEXT:    mov v2.b[4], w8
776; CHECK-SD-NEXT:    msub w8, w28, w4, w1
777; CHECK-SD-NEXT:    ldr w1, [sp, #64] // 4-byte Folded Reload
778; CHECK-SD-NEXT:    mov v0.b[4], w13
779; CHECK-SD-NEXT:    ldr w4, [sp, #100] // 4-byte Folded Reload
780; CHECK-SD-NEXT:    ldp w17, w16, [sp, #68] // 8-byte Folded Reload
781; CHECK-SD-NEXT:    ldp x24, x23, [sp, #256] // 16-byte Folded Reload
782; CHECK-SD-NEXT:    mov v2.b[5], w8
783; CHECK-SD-NEXT:    ldp x28, x27, [sp, #224] // 16-byte Folded Reload
784; CHECK-SD-NEXT:    mov v0.b[5], w15
785; CHECK-SD-NEXT:    msub w16, w16, w1, w17
786; CHECK-SD-NEXT:    smov w15, v3.b[12]
787; CHECK-SD-NEXT:    msub w8, w26, w2, w18
788; CHECK-SD-NEXT:    ldr w2, [sp, #112] // 4-byte Folded Reload
789; CHECK-SD-NEXT:    sdiv w0, w20, w21
790; CHECK-SD-NEXT:    ldp w1, w18, [sp, #116] // 8-byte Folded Reload
791; CHECK-SD-NEXT:    smov w17, v1.b[12]
792; CHECK-SD-NEXT:    ldp x26, x25, [sp, #240] // 16-byte Folded Reload
793; CHECK-SD-NEXT:    mov v2.b[6], w8
794; CHECK-SD-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
795; CHECK-SD-NEXT:    mov v0.b[6], w16
796; CHECK-SD-NEXT:    msub w18, w18, w2, w1
797; CHECK-SD-NEXT:    msub w8, w8, w19, w7
798; CHECK-SD-NEXT:    ldp w2, w1, [sp, #104] // 8-byte Folded Reload
799; CHECK-SD-NEXT:    mov v0.b[7], w18
800; CHECK-SD-NEXT:    smov w18, v3.b[13]
801; CHECK-SD-NEXT:    mov v2.b[7], w8
802; CHECK-SD-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
803; CHECK-SD-NEXT:    sdiv w9, w11, w10
804; CHECK-SD-NEXT:    msub w1, w1, w4, w2
805; CHECK-SD-NEXT:    smov w2, v1.b[13]
806; CHECK-SD-NEXT:    msub w8, w8, w6, w3
807; CHECK-SD-NEXT:    ldp w4, w3, [sp, #140] // 8-byte Folded Reload
808; CHECK-SD-NEXT:    mov v0.b[8], w1
809; CHECK-SD-NEXT:    mov v2.b[8], w8
810; CHECK-SD-NEXT:    msub w8, w0, w21, w20
811; CHECK-SD-NEXT:    msub w3, w3, w5, w4
812; CHECK-SD-NEXT:    ldr w5, [sp, #124] // 4-byte Folded Reload
813; CHECK-SD-NEXT:    ldp w4, w1, [sp, #128] // 8-byte Folded Reload
814; CHECK-SD-NEXT:    sdiv w13, w14, w12
815; CHECK-SD-NEXT:    ldp x20, x19, [sp, #288] // 16-byte Folded Reload
816; CHECK-SD-NEXT:    mov v2.b[9], w8
817; CHECK-SD-NEXT:    mov v0.b[9], w3
818; CHECK-SD-NEXT:    msub w8, w9, w10, w11
819; CHECK-SD-NEXT:    msub w1, w1, w5, w4
820; CHECK-SD-NEXT:    ldr w4, [sp, #172] // 4-byte Folded Reload
821; CHECK-SD-NEXT:    smov w9, v3.b[14]
822; CHECK-SD-NEXT:    ldp w3, w11, [sp, #176] // 8-byte Folded Reload
823; CHECK-SD-NEXT:    smov w10, v1.b[14]
824; CHECK-SD-NEXT:    ldp x22, x21, [sp, #272] // 16-byte Folded Reload
825; CHECK-SD-NEXT:    mov v2.b[10], w8
826; CHECK-SD-NEXT:    mov v0.b[10], w1
827; CHECK-SD-NEXT:    ldr w1, [sp, #152] // 4-byte Folded Reload
828; CHECK-SD-NEXT:    msub w11, w11, w4, w3
829; CHECK-SD-NEXT:    sdiv w16, w17, w15
830; CHECK-SD-NEXT:    msub w8, w13, w12, w14
831; CHECK-SD-NEXT:    ldr w13, [sp, #168] // 4-byte Folded Reload
832; CHECK-SD-NEXT:    ldr w14, [sp, #160] // 4-byte Folded Reload
833; CHECK-SD-NEXT:    mov v0.b[11], w11
834; CHECK-SD-NEXT:    smov w11, v3.b[15]
835; CHECK-SD-NEXT:    msub w13, w13, w1, w14
836; CHECK-SD-NEXT:    smov w14, v1.b[15]
837; CHECK-SD-NEXT:    mov v2.b[11], w8
838; CHECK-SD-NEXT:    mov v0.b[12], w13
839; CHECK-SD-NEXT:    sdiv w0, w2, w18
840; CHECK-SD-NEXT:    msub w8, w16, w15, w17
841; CHECK-SD-NEXT:    ldr w17, [sp, #196] // 4-byte Folded Reload
842; CHECK-SD-NEXT:    ldp w16, w15, [sp, #200] // 8-byte Folded Reload
843; CHECK-SD-NEXT:    mov v2.b[12], w8
844; CHECK-SD-NEXT:    msub w15, w15, w17, w16
845; CHECK-SD-NEXT:    ldp w17, w16, [sp, #188] // 8-byte Folded Reload
846; CHECK-SD-NEXT:    mov v0.b[13], w15
847; CHECK-SD-NEXT:    sdiv w12, w10, w9
848; CHECK-SD-NEXT:    msub w8, w0, w18, w2
849; CHECK-SD-NEXT:    ldr w18, [sp, #184] // 4-byte Folded Reload
850; CHECK-SD-NEXT:    msub w16, w16, w18, w17
851; CHECK-SD-NEXT:    mov v2.b[13], w8
852; CHECK-SD-NEXT:    mov v0.b[14], w16
853; CHECK-SD-NEXT:    sdiv w13, w14, w11
854; CHECK-SD-NEXT:    msub w8, w12, w9, w10
855; CHECK-SD-NEXT:    ldr w9, [sp, #164] // 4-byte Folded Reload
856; CHECK-SD-NEXT:    ldr w12, [sp, #148] // 4-byte Folded Reload
857; CHECK-SD-NEXT:    ldr w10, [sp, #156] // 4-byte Folded Reload
858; CHECK-SD-NEXT:    mov v2.b[14], w8
859; CHECK-SD-NEXT:    msub w9, w9, w12, w10
860; CHECK-SD-NEXT:    mov v0.b[15], w9
861; CHECK-SD-NEXT:    msub w8, w13, w11, w14
862; CHECK-SD-NEXT:    mov v2.b[15], w8
863; CHECK-SD-NEXT:    mov v1.16b, v2.16b
864; CHECK-SD-NEXT:    add sp, sp, #304
865; CHECK-SD-NEXT:    ret
866;
867; CHECK-GI-LABEL: sv32i8:
868; CHECK-GI:       // %bb.0: // %entry
869; CHECK-GI-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
870; CHECK-GI-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
871; CHECK-GI-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
872; CHECK-GI-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
873; CHECK-GI-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
874; CHECK-GI-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
875; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
876; CHECK-GI-NEXT:    .cfi_offset w19, -8
877; CHECK-GI-NEXT:    .cfi_offset w20, -16
878; CHECK-GI-NEXT:    .cfi_offset w21, -24
879; CHECK-GI-NEXT:    .cfi_offset w22, -32
880; CHECK-GI-NEXT:    .cfi_offset w23, -40
881; CHECK-GI-NEXT:    .cfi_offset w24, -48
882; CHECK-GI-NEXT:    .cfi_offset w25, -56
883; CHECK-GI-NEXT:    .cfi_offset w26, -64
884; CHECK-GI-NEXT:    .cfi_offset w27, -72
885; CHECK-GI-NEXT:    .cfi_offset w28, -80
886; CHECK-GI-NEXT:    .cfi_offset w30, -88
887; CHECK-GI-NEXT:    .cfi_offset w29, -96
888; CHECK-GI-NEXT:    sshll v4.8h, v0.8b, #0
889; CHECK-GI-NEXT:    sshll v5.8h, v2.8b, #0
890; CHECK-GI-NEXT:    sshll v16.8h, v1.8b, #0
891; CHECK-GI-NEXT:    sshll v17.8h, v3.8b, #0
892; CHECK-GI-NEXT:    sshll v6.4s, v4.4h, #0
893; CHECK-GI-NEXT:    sshll v7.4s, v5.4h, #0
894; CHECK-GI-NEXT:    sshll2 v4.4s, v4.8h, #0
895; CHECK-GI-NEXT:    sshll2 v5.4s, v5.8h, #0
896; CHECK-GI-NEXT:    sshll v18.4s, v16.4h, #0
897; CHECK-GI-NEXT:    sshll v19.4s, v17.4h, #0
898; CHECK-GI-NEXT:    sshll2 v16.4s, v16.8h, #0
899; CHECK-GI-NEXT:    sshll2 v17.4s, v17.8h, #0
900; CHECK-GI-NEXT:    fmov w8, s6
901; CHECK-GI-NEXT:    fmov w9, s7
902; CHECK-GI-NEXT:    mov w12, v7.s[3]
903; CHECK-GI-NEXT:    fmov w13, s5
904; CHECK-GI-NEXT:    mov w16, v5.s[3]
905; CHECK-GI-NEXT:    fmov w6, s19
906; CHECK-GI-NEXT:    mov w7, v19.s[3]
907; CHECK-GI-NEXT:    fmov w21, s17
908; CHECK-GI-NEXT:    mov w23, v17.s[3]
909; CHECK-GI-NEXT:    sdiv w11, w8, w9
910; CHECK-GI-NEXT:    mov w8, v6.s[1]
911; CHECK-GI-NEXT:    mov w9, v7.s[1]
912; CHECK-GI-NEXT:    sdiv w10, w8, w9
913; CHECK-GI-NEXT:    mov w8, v6.s[2]
914; CHECK-GI-NEXT:    mov w9, v7.s[2]
915; CHECK-GI-NEXT:    mov v20.s[0], w11
916; CHECK-GI-NEXT:    sdiv w9, w8, w9
917; CHECK-GI-NEXT:    mov w8, v6.s[3]
918; CHECK-GI-NEXT:    sshll2 v6.8h, v0.16b, #0
919; CHECK-GI-NEXT:    mov v20.s[1], w10
920; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
921; CHECK-GI-NEXT:    sshll v28.4s, v0.4h, #0
922; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
923; CHECK-GI-NEXT:    sdiv w8, w8, w12
924; CHECK-GI-NEXT:    fmov w12, s4
925; CHECK-GI-NEXT:    mov v20.s[2], w9
926; CHECK-GI-NEXT:    sdiv w15, w12, w13
927; CHECK-GI-NEXT:    mov w12, v4.s[1]
928; CHECK-GI-NEXT:    mov w13, v5.s[1]
929; CHECK-GI-NEXT:    mov v20.s[3], w8
930; CHECK-GI-NEXT:    sdiv w14, w12, w13
931; CHECK-GI-NEXT:    mov w12, v4.s[2]
932; CHECK-GI-NEXT:    mov w13, v5.s[2]
933; CHECK-GI-NEXT:    sshll v5.4s, v6.4h, #0
934; CHECK-GI-NEXT:    mov v21.s[0], w15
935; CHECK-GI-NEXT:    sdiv w13, w12, w13
936; CHECK-GI-NEXT:    mov w12, v4.s[3]
937; CHECK-GI-NEXT:    sshll2 v4.8h, v2.16b, #0
938; CHECK-GI-NEXT:    mov v21.s[1], w14
939; CHECK-GI-NEXT:    sshll v2.8h, v2.8b, #0
940; CHECK-GI-NEXT:    sshll v7.4s, v4.4h, #0
941; CHECK-GI-NEXT:    sshll v30.4s, v2.4h, #0
942; CHECK-GI-NEXT:    sshll2 v2.4s, v2.8h, #0
943; CHECK-GI-NEXT:    fmov w17, s7
944; CHECK-GI-NEXT:    mls v28.4s, v20.4s, v30.4s
945; CHECK-GI-NEXT:    sdiv w12, w12, w16
946; CHECK-GI-NEXT:    fmov w16, s5
947; CHECK-GI-NEXT:    mov v21.s[2], w13
948; CHECK-GI-NEXT:    sdiv w1, w16, w17
949; CHECK-GI-NEXT:    mov w16, v5.s[1]
950; CHECK-GI-NEXT:    mov w17, v7.s[1]
951; CHECK-GI-NEXT:    mov v21.s[3], w12
952; CHECK-GI-NEXT:    mls v0.4s, v21.4s, v2.4s
953; CHECK-GI-NEXT:    sdiv w0, w16, w17
954; CHECK-GI-NEXT:    mov w16, v5.s[2]
955; CHECK-GI-NEXT:    mov w17, v7.s[2]
956; CHECK-GI-NEXT:    mov v22.s[0], w1
957; CHECK-GI-NEXT:    uzp1 v0.8h, v28.8h, v0.8h
958; CHECK-GI-NEXT:    sdiv w18, w16, w17
959; CHECK-GI-NEXT:    mov w16, v5.s[3]
960; CHECK-GI-NEXT:    mov w17, v7.s[3]
961; CHECK-GI-NEXT:    sshll2 v5.4s, v6.8h, #0
962; CHECK-GI-NEXT:    sshll2 v7.4s, v4.8h, #0
963; CHECK-GI-NEXT:    mov v22.s[1], w0
964; CHECK-GI-NEXT:    sshll v6.4s, v6.4h, #0
965; CHECK-GI-NEXT:    sshll v4.4s, v4.4h, #0
966; CHECK-GI-NEXT:    fmov w2, s7
967; CHECK-GI-NEXT:    mov w4, v7.s[3]
968; CHECK-GI-NEXT:    sdiv w16, w16, w17
969; CHECK-GI-NEXT:    fmov w17, s5
970; CHECK-GI-NEXT:    mov v22.s[2], w18
971; CHECK-GI-NEXT:    sdiv w5, w17, w2
972; CHECK-GI-NEXT:    mov w17, v5.s[1]
973; CHECK-GI-NEXT:    mov w2, v7.s[1]
974; CHECK-GI-NEXT:    mov v22.s[3], w16
975; CHECK-GI-NEXT:    mls v6.4s, v22.4s, v4.4s
976; CHECK-GI-NEXT:    sdiv w3, w17, w2
977; CHECK-GI-NEXT:    mov w17, v5.s[2]
978; CHECK-GI-NEXT:    mov w2, v7.s[2]
979; CHECK-GI-NEXT:    mov v23.s[0], w5
980; CHECK-GI-NEXT:    sdiv w2, w17, w2
981; CHECK-GI-NEXT:    mov w17, v5.s[3]
982; CHECK-GI-NEXT:    mov v23.s[1], w3
983; CHECK-GI-NEXT:    sdiv w17, w17, w4
984; CHECK-GI-NEXT:    fmov w4, s18
985; CHECK-GI-NEXT:    mov v23.s[2], w2
986; CHECK-GI-NEXT:    sdiv w20, w4, w6
987; CHECK-GI-NEXT:    mov w4, v18.s[1]
988; CHECK-GI-NEXT:    mov w6, v19.s[1]
989; CHECK-GI-NEXT:    mov v23.s[3], w17
990; CHECK-GI-NEXT:    mls v5.4s, v23.4s, v7.4s
991; CHECK-GI-NEXT:    sdiv w19, w4, w6
992; CHECK-GI-NEXT:    mov w4, v18.s[2]
993; CHECK-GI-NEXT:    mov w6, v19.s[2]
994; CHECK-GI-NEXT:    mov v24.s[0], w20
995; CHECK-GI-NEXT:    uzp1 v2.8h, v6.8h, v5.8h
996; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
997; CHECK-GI-NEXT:    sdiv w6, w4, w6
998; CHECK-GI-NEXT:    mov w4, v18.s[3]
999; CHECK-GI-NEXT:    mov v24.s[1], w19
1000; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
1001; CHECK-GI-NEXT:    sdiv w4, w4, w7
1002; CHECK-GI-NEXT:    fmov w7, s16
1003; CHECK-GI-NEXT:    mov v24.s[2], w6
1004; CHECK-GI-NEXT:    sdiv w24, w7, w21
1005; CHECK-GI-NEXT:    mov w7, v16.s[1]
1006; CHECK-GI-NEXT:    mov w21, v17.s[1]
1007; CHECK-GI-NEXT:    mov v24.s[3], w4
1008; CHECK-GI-NEXT:    sdiv w22, w7, w21
1009; CHECK-GI-NEXT:    mov w7, v16.s[2]
1010; CHECK-GI-NEXT:    mov w21, v17.s[2]
1011; CHECK-GI-NEXT:    sshll2 v17.8h, v1.16b, #0
1012; CHECK-GI-NEXT:    mov v25.s[0], w24
1013; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
1014; CHECK-GI-NEXT:    sshll v18.4s, v17.4h, #0
1015; CHECK-GI-NEXT:    sshll v29.4s, v1.4h, #0
1016; CHECK-GI-NEXT:    sshll2 v1.4s, v1.8h, #0
1017; CHECK-GI-NEXT:    sdiv w21, w7, w21
1018; CHECK-GI-NEXT:    mov w7, v16.s[3]
1019; CHECK-GI-NEXT:    sshll2 v16.8h, v3.16b, #0
1020; CHECK-GI-NEXT:    mov v25.s[1], w22
1021; CHECK-GI-NEXT:    sshll v3.8h, v3.8b, #0
1022; CHECK-GI-NEXT:    sshll v19.4s, v16.4h, #0
1023; CHECK-GI-NEXT:    sshll v31.4s, v3.4h, #0
1024; CHECK-GI-NEXT:    sshll2 v3.4s, v3.8h, #0
1025; CHECK-GI-NEXT:    fmov w25, s19
1026; CHECK-GI-NEXT:    mov w26, v19.s[1]
1027; CHECK-GI-NEXT:    mov w27, v19.s[2]
1028; CHECK-GI-NEXT:    mov w28, v19.s[3]
1029; CHECK-GI-NEXT:    sshll2 v19.4s, v16.8h, #0
1030; CHECK-GI-NEXT:    sshll v16.4s, v16.4h, #0
1031; CHECK-GI-NEXT:    sdiv w7, w7, w23
1032; CHECK-GI-NEXT:    fmov w23, s18
1033; CHECK-GI-NEXT:    mov v25.s[2], w21
1034; CHECK-GI-NEXT:    mls v29.4s, v24.4s, v31.4s
1035; CHECK-GI-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
1036; CHECK-GI-NEXT:    fmov w29, s19
1037; CHECK-GI-NEXT:    mov w30, v19.s[1]
1038; CHECK-GI-NEXT:    mov w15, v19.s[2]
1039; CHECK-GI-NEXT:    sdiv w25, w23, w25
1040; CHECK-GI-NEXT:    mov w23, v18.s[1]
1041; CHECK-GI-NEXT:    mov v25.s[3], w7
1042; CHECK-GI-NEXT:    mls v1.4s, v25.4s, v3.4s
1043; CHECK-GI-NEXT:    sdiv w26, w23, w26
1044; CHECK-GI-NEXT:    mov w23, v18.s[2]
1045; CHECK-GI-NEXT:    mov v26.s[0], w25
1046; CHECK-GI-NEXT:    uzp1 v1.8h, v29.8h, v1.8h
1047; CHECK-GI-NEXT:    sdiv w27, w23, w27
1048; CHECK-GI-NEXT:    mov w23, v18.s[3]
1049; CHECK-GI-NEXT:    sshll2 v18.4s, v17.8h, #0
1050; CHECK-GI-NEXT:    mov v26.s[1], w26
1051; CHECK-GI-NEXT:    sshll v17.4s, v17.4h, #0
1052; CHECK-GI-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
1053; CHECK-GI-NEXT:    mov w11, v18.s[2]
1054; CHECK-GI-NEXT:    mov w9, v18.s[3]
1055; CHECK-GI-NEXT:    sdiv w23, w23, w28
1056; CHECK-GI-NEXT:    fmov w28, s18
1057; CHECK-GI-NEXT:    mov v26.s[2], w27
1058; CHECK-GI-NEXT:    sdiv w28, w28, w29
1059; CHECK-GI-NEXT:    mov w29, v18.s[1]
1060; CHECK-GI-NEXT:    mov v26.s[3], w23
1061; CHECK-GI-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
1062; CHECK-GI-NEXT:    mls v17.4s, v26.4s, v16.4s
1063; CHECK-GI-NEXT:    sdiv w29, w29, w30
1064; CHECK-GI-NEXT:    mov v27.s[0], w28
1065; CHECK-GI-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
1066; CHECK-GI-NEXT:    sdiv w10, w11, w15
1067; CHECK-GI-NEXT:    mov w11, v19.s[3]
1068; CHECK-GI-NEXT:    mov v27.s[1], w29
1069; CHECK-GI-NEXT:    sdiv w8, w9, w11
1070; CHECK-GI-NEXT:    mov v27.s[2], w10
1071; CHECK-GI-NEXT:    mov v27.s[3], w8
1072; CHECK-GI-NEXT:    mls v18.4s, v27.4s, v19.4s
1073; CHECK-GI-NEXT:    uzp1 v3.8h, v17.8h, v18.8h
1074; CHECK-GI-NEXT:    uzp1 v1.16b, v1.16b, v3.16b
1075; CHECK-GI-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
1076; CHECK-GI-NEXT:    ret
1077entry:
1078  %s = srem <32 x i8> %d, %e
1079  ret <32 x i8> %s
1080}
1081
1082define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) {
1083; CHECK-SD-LABEL: uv2i8:
1084; CHECK-SD:       // %bb.0: // %entry
1085; CHECK-SD-NEXT:    movi d2, #0x0000ff000000ff
1086; CHECK-SD-NEXT:    and v0.8b, v0.8b, v2.8b
1087; CHECK-SD-NEXT:    and v1.8b, v1.8b, v2.8b
1088; CHECK-SD-NEXT:    fmov w8, s1
1089; CHECK-SD-NEXT:    fmov w9, s0
1090; CHECK-SD-NEXT:    mov w11, v1.s[1]
1091; CHECK-SD-NEXT:    mov w12, v0.s[1]
1092; CHECK-SD-NEXT:    udiv w10, w9, w8
1093; CHECK-SD-NEXT:    udiv w13, w12, w11
1094; CHECK-SD-NEXT:    msub w8, w10, w8, w9
1095; CHECK-SD-NEXT:    fmov s0, w8
1096; CHECK-SD-NEXT:    msub w9, w13, w11, w12
1097; CHECK-SD-NEXT:    mov v0.s[1], w9
1098; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1099; CHECK-SD-NEXT:    ret
1100;
1101; CHECK-GI-LABEL: uv2i8:
1102; CHECK-GI:       // %bb.0: // %entry
1103; CHECK-GI-NEXT:    movi d2, #0x0000ff000000ff
1104; CHECK-GI-NEXT:    and v0.8b, v0.8b, v2.8b
1105; CHECK-GI-NEXT:    and v1.8b, v1.8b, v2.8b
1106; CHECK-GI-NEXT:    fmov w8, s0
1107; CHECK-GI-NEXT:    fmov w9, s1
1108; CHECK-GI-NEXT:    mov w10, v1.s[1]
1109; CHECK-GI-NEXT:    udiv w8, w8, w9
1110; CHECK-GI-NEXT:    mov w9, v0.s[1]
1111; CHECK-GI-NEXT:    udiv w9, w9, w10
1112; CHECK-GI-NEXT:    mov v2.s[0], w8
1113; CHECK-GI-NEXT:    mov v2.s[1], w9
1114; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
1115; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1116; CHECK-GI-NEXT:    ret
1117entry:
1118  %s = urem <2 x i8> %d, %e
1119  ret <2 x i8> %s
1120}
1121
1122define <3 x i8> @uv3i8(<3 x i8> %d, <3 x i8> %e) {
1123; CHECK-SD-LABEL: uv3i8:
1124; CHECK-SD:       // %bb.0: // %entry
1125; CHECK-SD-NEXT:    and w8, w3, #0xff
1126; CHECK-SD-NEXT:    and w9, w0, #0xff
1127; CHECK-SD-NEXT:    and w11, w4, #0xff
1128; CHECK-SD-NEXT:    and w12, w1, #0xff
1129; CHECK-SD-NEXT:    and w14, w5, #0xff
1130; CHECK-SD-NEXT:    and w15, w2, #0xff
1131; CHECK-SD-NEXT:    udiv w10, w9, w8
1132; CHECK-SD-NEXT:    udiv w13, w12, w11
1133; CHECK-SD-NEXT:    msub w0, w10, w8, w9
1134; CHECK-SD-NEXT:    udiv w16, w15, w14
1135; CHECK-SD-NEXT:    msub w1, w13, w11, w12
1136; CHECK-SD-NEXT:    msub w2, w16, w14, w15
1137; CHECK-SD-NEXT:    ret
1138;
1139; CHECK-GI-LABEL: uv3i8:
1140; CHECK-GI:       // %bb.0: // %entry
1141; CHECK-GI-NEXT:    and w8, w0, #0xff
1142; CHECK-GI-NEXT:    and w9, w3, #0xff
1143; CHECK-GI-NEXT:    and w11, w1, #0xff
1144; CHECK-GI-NEXT:    and w12, w4, #0xff
1145; CHECK-GI-NEXT:    and w14, w2, #0xff
1146; CHECK-GI-NEXT:    and w15, w5, #0xff
1147; CHECK-GI-NEXT:    udiv w10, w8, w9
1148; CHECK-GI-NEXT:    udiv w13, w11, w12
1149; CHECK-GI-NEXT:    msub w0, w10, w9, w8
1150; CHECK-GI-NEXT:    udiv w16, w14, w15
1151; CHECK-GI-NEXT:    msub w1, w13, w12, w11
1152; CHECK-GI-NEXT:    msub w2, w16, w15, w14
1153; CHECK-GI-NEXT:    ret
1154entry:
1155  %s = urem <3 x i8> %d, %e
1156  ret <3 x i8> %s
1157}
1158
1159define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) {
1160; CHECK-SD-LABEL: uv4i8:
1161; CHECK-SD:       // %bb.0: // %entry
1162; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
1163; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1164; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
1165; CHECK-SD-NEXT:    bic v1.4h, #255, lsl #8
1166; CHECK-SD-NEXT:    umov w11, v1.h[0]
1167; CHECK-SD-NEXT:    umov w12, v0.h[0]
1168; CHECK-SD-NEXT:    umov w8, v1.h[1]
1169; CHECK-SD-NEXT:    umov w9, v0.h[1]
1170; CHECK-SD-NEXT:    umov w14, v1.h[2]
1171; CHECK-SD-NEXT:    umov w15, v0.h[2]
1172; CHECK-SD-NEXT:    umov w17, v1.h[3]
1173; CHECK-SD-NEXT:    umov w18, v0.h[3]
1174; CHECK-SD-NEXT:    udiv w13, w12, w11
1175; CHECK-SD-NEXT:    udiv w10, w9, w8
1176; CHECK-SD-NEXT:    msub w11, w13, w11, w12
1177; CHECK-SD-NEXT:    fmov s0, w11
1178; CHECK-SD-NEXT:    udiv w16, w15, w14
1179; CHECK-SD-NEXT:    msub w8, w10, w8, w9
1180; CHECK-SD-NEXT:    mov v0.h[1], w8
1181; CHECK-SD-NEXT:    udiv w9, w18, w17
1182; CHECK-SD-NEXT:    msub w8, w16, w14, w15
1183; CHECK-SD-NEXT:    mov v0.h[2], w8
1184; CHECK-SD-NEXT:    msub w8, w9, w17, w18
1185; CHECK-SD-NEXT:    mov v0.h[3], w8
1186; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1187; CHECK-SD-NEXT:    ret
1188;
1189; CHECK-GI-LABEL: uv4i8:
1190; CHECK-GI:       // %bb.0: // %entry
1191; CHECK-GI-NEXT:    movi v2.2d, #0x0000ff000000ff
1192; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
1193; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
1194; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
1195; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
1196; CHECK-GI-NEXT:    fmov w8, s0
1197; CHECK-GI-NEXT:    fmov w9, s1
1198; CHECK-GI-NEXT:    mov w10, v1.s[1]
1199; CHECK-GI-NEXT:    mov w11, v1.s[2]
1200; CHECK-GI-NEXT:    mov w12, v1.s[3]
1201; CHECK-GI-NEXT:    udiv w8, w8, w9
1202; CHECK-GI-NEXT:    mov w9, v0.s[1]
1203; CHECK-GI-NEXT:    udiv w9, w9, w10
1204; CHECK-GI-NEXT:    mov w10, v0.s[2]
1205; CHECK-GI-NEXT:    mov v2.s[0], w8
1206; CHECK-GI-NEXT:    udiv w10, w10, w11
1207; CHECK-GI-NEXT:    mov w11, v0.s[3]
1208; CHECK-GI-NEXT:    mov v2.s[1], w9
1209; CHECK-GI-NEXT:    udiv w8, w11, w12
1210; CHECK-GI-NEXT:    mov v2.s[2], w10
1211; CHECK-GI-NEXT:    mov v2.s[3], w8
1212; CHECK-GI-NEXT:    mls v0.4s, v2.4s, v1.4s
1213; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
1214; CHECK-GI-NEXT:    ret
1215entry:
1216  %s = urem <4 x i8> %d, %e
1217  ret <4 x i8> %s
1218}
1219
1220define <8 x i8> @uv8i8(<8 x i8> %d, <8 x i8> %e) {
1221; CHECK-SD-LABEL: uv8i8:
1222; CHECK-SD:       // %bb.0: // %entry
1223; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
1224; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1225; CHECK-SD-NEXT:    umov w11, v1.b[0]
1226; CHECK-SD-NEXT:    umov w12, v0.b[0]
1227; CHECK-SD-NEXT:    umov w8, v1.b[1]
1228; CHECK-SD-NEXT:    umov w9, v0.b[1]
1229; CHECK-SD-NEXT:    umov w14, v1.b[2]
1230; CHECK-SD-NEXT:    umov w15, v0.b[2]
1231; CHECK-SD-NEXT:    umov w17, v1.b[3]
1232; CHECK-SD-NEXT:    umov w18, v0.b[3]
1233; CHECK-SD-NEXT:    umov w1, v1.b[4]
1234; CHECK-SD-NEXT:    umov w2, v0.b[4]
1235; CHECK-SD-NEXT:    umov w4, v1.b[5]
1236; CHECK-SD-NEXT:    umov w5, v0.b[5]
1237; CHECK-SD-NEXT:    udiv w13, w12, w11
1238; CHECK-SD-NEXT:    udiv w10, w9, w8
1239; CHECK-SD-NEXT:    msub w11, w13, w11, w12
1240; CHECK-SD-NEXT:    umov w13, v1.b[7]
1241; CHECK-SD-NEXT:    fmov s2, w11
1242; CHECK-SD-NEXT:    umov w11, v0.b[6]
1243; CHECK-SD-NEXT:    udiv w16, w15, w14
1244; CHECK-SD-NEXT:    msub w8, w10, w8, w9
1245; CHECK-SD-NEXT:    umov w10, v1.b[6]
1246; CHECK-SD-NEXT:    mov v2.b[1], w8
1247; CHECK-SD-NEXT:    udiv w0, w18, w17
1248; CHECK-SD-NEXT:    msub w8, w16, w14, w15
1249; CHECK-SD-NEXT:    umov w14, v0.b[7]
1250; CHECK-SD-NEXT:    mov v2.b[2], w8
1251; CHECK-SD-NEXT:    udiv w3, w2, w1
1252; CHECK-SD-NEXT:    msub w8, w0, w17, w18
1253; CHECK-SD-NEXT:    mov v2.b[3], w8
1254; CHECK-SD-NEXT:    udiv w9, w5, w4
1255; CHECK-SD-NEXT:    msub w8, w3, w1, w2
1256; CHECK-SD-NEXT:    mov v2.b[4], w8
1257; CHECK-SD-NEXT:    udiv w12, w11, w10
1258; CHECK-SD-NEXT:    msub w8, w9, w4, w5
1259; CHECK-SD-NEXT:    mov v2.b[5], w8
1260; CHECK-SD-NEXT:    udiv w9, w14, w13
1261; CHECK-SD-NEXT:    msub w8, w12, w10, w11
1262; CHECK-SD-NEXT:    mov v2.b[6], w8
1263; CHECK-SD-NEXT:    msub w8, w9, w13, w14
1264; CHECK-SD-NEXT:    mov v2.b[7], w8
1265; CHECK-SD-NEXT:    fmov d0, d2
1266; CHECK-SD-NEXT:    ret
1267;
1268; CHECK-GI-LABEL: uv8i8:
1269; CHECK-GI:       // %bb.0: // %entry
1270; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
1271; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
1272; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
1273; CHECK-GI-NEXT:    ushll v3.4s, v1.4h, #0
1274; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
1275; CHECK-GI-NEXT:    ushll2 v1.4s, v1.8h, #0
1276; CHECK-GI-NEXT:    fmov w8, s2
1277; CHECK-GI-NEXT:    fmov w9, s3
1278; CHECK-GI-NEXT:    mov w10, v3.s[1]
1279; CHECK-GI-NEXT:    mov w11, v3.s[2]
1280; CHECK-GI-NEXT:    mov w12, v3.s[3]
1281; CHECK-GI-NEXT:    fmov w13, s1
1282; CHECK-GI-NEXT:    mov w14, v1.s[1]
1283; CHECK-GI-NEXT:    mov w15, v1.s[2]
1284; CHECK-GI-NEXT:    udiv w8, w8, w9
1285; CHECK-GI-NEXT:    mov w9, v2.s[1]
1286; CHECK-GI-NEXT:    udiv w9, w9, w10
1287; CHECK-GI-NEXT:    mov w10, v2.s[2]
1288; CHECK-GI-NEXT:    mov v4.s[0], w8
1289; CHECK-GI-NEXT:    mov w8, v0.s[3]
1290; CHECK-GI-NEXT:    udiv w10, w10, w11
1291; CHECK-GI-NEXT:    mov w11, v2.s[3]
1292; CHECK-GI-NEXT:    mov v4.s[1], w9
1293; CHECK-GI-NEXT:    udiv w11, w11, w12
1294; CHECK-GI-NEXT:    fmov w12, s0
1295; CHECK-GI-NEXT:    mov v4.s[2], w10
1296; CHECK-GI-NEXT:    udiv w12, w12, w13
1297; CHECK-GI-NEXT:    mov w13, v0.s[1]
1298; CHECK-GI-NEXT:    mov v4.s[3], w11
1299; CHECK-GI-NEXT:    mls v2.4s, v4.4s, v3.4s
1300; CHECK-GI-NEXT:    udiv w13, w13, w14
1301; CHECK-GI-NEXT:    mov w14, v0.s[2]
1302; CHECK-GI-NEXT:    mov v5.s[0], w12
1303; CHECK-GI-NEXT:    mov w12, v1.s[3]
1304; CHECK-GI-NEXT:    udiv w14, w14, w15
1305; CHECK-GI-NEXT:    mov v5.s[1], w13
1306; CHECK-GI-NEXT:    udiv w8, w8, w12
1307; CHECK-GI-NEXT:    mov v5.s[2], w14
1308; CHECK-GI-NEXT:    mov v5.s[3], w8
1309; CHECK-GI-NEXT:    mls v0.4s, v5.4s, v1.4s
1310; CHECK-GI-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
1311; CHECK-GI-NEXT:    xtn v0.8b, v0.8h
1312; CHECK-GI-NEXT:    ret
1313entry:
1314  %s = urem <8 x i8> %d, %e
1315  ret <8 x i8> %s
1316}
1317
1318define <16 x i8> @uv16i8(<16 x i8> %d, <16 x i8> %e) {
1319; CHECK-SD-LABEL: uv16i8:
1320; CHECK-SD:       // %bb.0: // %entry
1321; CHECK-SD-NEXT:    stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
1322; CHECK-SD-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
1323; CHECK-SD-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
1324; CHECK-SD-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
1325; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
1326; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
1327; CHECK-SD-NEXT:    .cfi_offset w19, -8
1328; CHECK-SD-NEXT:    .cfi_offset w20, -16
1329; CHECK-SD-NEXT:    .cfi_offset w21, -24
1330; CHECK-SD-NEXT:    .cfi_offset w22, -32
1331; CHECK-SD-NEXT:    .cfi_offset w23, -40
1332; CHECK-SD-NEXT:    .cfi_offset w24, -48
1333; CHECK-SD-NEXT:    .cfi_offset w25, -56
1334; CHECK-SD-NEXT:    .cfi_offset w26, -64
1335; CHECK-SD-NEXT:    .cfi_offset w27, -72
1336; CHECK-SD-NEXT:    .cfi_offset w28, -80
1337; CHECK-SD-NEXT:    umov w11, v1.b[0]
1338; CHECK-SD-NEXT:    umov w12, v0.b[0]
1339; CHECK-SD-NEXT:    umov w8, v1.b[1]
1340; CHECK-SD-NEXT:    umov w9, v0.b[1]
1341; CHECK-SD-NEXT:    umov w14, v1.b[2]
1342; CHECK-SD-NEXT:    umov w15, v0.b[2]
1343; CHECK-SD-NEXT:    umov w17, v1.b[3]
1344; CHECK-SD-NEXT:    umov w18, v0.b[3]
1345; CHECK-SD-NEXT:    umov w1, v1.b[4]
1346; CHECK-SD-NEXT:    umov w2, v0.b[4]
1347; CHECK-SD-NEXT:    umov w4, v1.b[5]
1348; CHECK-SD-NEXT:    umov w5, v0.b[5]
1349; CHECK-SD-NEXT:    udiv w13, w12, w11
1350; CHECK-SD-NEXT:    umov w7, v1.b[6]
1351; CHECK-SD-NEXT:    umov w19, v0.b[6]
1352; CHECK-SD-NEXT:    umov w21, v1.b[7]
1353; CHECK-SD-NEXT:    umov w22, v0.b[7]
1354; CHECK-SD-NEXT:    umov w24, v1.b[8]
1355; CHECK-SD-NEXT:    umov w25, v0.b[8]
1356; CHECK-SD-NEXT:    umov w27, v1.b[9]
1357; CHECK-SD-NEXT:    umov w28, v0.b[9]
1358; CHECK-SD-NEXT:    udiv w10, w9, w8
1359; CHECK-SD-NEXT:    msub w11, w13, w11, w12
1360; CHECK-SD-NEXT:    umov w13, v1.b[11]
1361; CHECK-SD-NEXT:    fmov s2, w11
1362; CHECK-SD-NEXT:    umov w11, v0.b[10]
1363; CHECK-SD-NEXT:    udiv w16, w15, w14
1364; CHECK-SD-NEXT:    msub w8, w10, w8, w9
1365; CHECK-SD-NEXT:    umov w10, v1.b[10]
1366; CHECK-SD-NEXT:    mov v2.b[1], w8
1367; CHECK-SD-NEXT:    udiv w0, w18, w17
1368; CHECK-SD-NEXT:    msub w8, w16, w14, w15
1369; CHECK-SD-NEXT:    umov w14, v0.b[11]
1370; CHECK-SD-NEXT:    umov w16, v1.b[12]
1371; CHECK-SD-NEXT:    mov v2.b[2], w8
1372; CHECK-SD-NEXT:    udiv w3, w2, w1
1373; CHECK-SD-NEXT:    msub w8, w0, w17, w18
1374; CHECK-SD-NEXT:    umov w17, v0.b[12]
1375; CHECK-SD-NEXT:    umov w0, v1.b[13]
1376; CHECK-SD-NEXT:    mov v2.b[3], w8
1377; CHECK-SD-NEXT:    udiv w6, w5, w4
1378; CHECK-SD-NEXT:    msub w8, w3, w1, w2
1379; CHECK-SD-NEXT:    umov w1, v0.b[13]
1380; CHECK-SD-NEXT:    mov v2.b[4], w8
1381; CHECK-SD-NEXT:    udiv w20, w19, w7
1382; CHECK-SD-NEXT:    msub w8, w6, w4, w5
1383; CHECK-SD-NEXT:    mov v2.b[5], w8
1384; CHECK-SD-NEXT:    udiv w23, w22, w21
1385; CHECK-SD-NEXT:    msub w8, w20, w7, w19
1386; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
1387; CHECK-SD-NEXT:    mov v2.b[6], w8
1388; CHECK-SD-NEXT:    udiv w26, w25, w24
1389; CHECK-SD-NEXT:    msub w8, w23, w21, w22
1390; CHECK-SD-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
1391; CHECK-SD-NEXT:    mov v2.b[7], w8
1392; CHECK-SD-NEXT:    udiv w9, w28, w27
1393; CHECK-SD-NEXT:    msub w8, w26, w24, w25
1394; CHECK-SD-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload
1395; CHECK-SD-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload
1396; CHECK-SD-NEXT:    mov v2.b[8], w8
1397; CHECK-SD-NEXT:    udiv w12, w11, w10
1398; CHECK-SD-NEXT:    msub w8, w9, w27, w28
1399; CHECK-SD-NEXT:    mov v2.b[9], w8
1400; CHECK-SD-NEXT:    udiv w15, w14, w13
1401; CHECK-SD-NEXT:    msub w8, w12, w10, w11
1402; CHECK-SD-NEXT:    umov w10, v1.b[14]
1403; CHECK-SD-NEXT:    umov w11, v0.b[14]
1404; CHECK-SD-NEXT:    mov v2.b[10], w8
1405; CHECK-SD-NEXT:    udiv w18, w17, w16
1406; CHECK-SD-NEXT:    msub w8, w15, w13, w14
1407; CHECK-SD-NEXT:    umov w13, v1.b[15]
1408; CHECK-SD-NEXT:    umov w14, v0.b[15]
1409; CHECK-SD-NEXT:    mov v2.b[11], w8
1410; CHECK-SD-NEXT:    udiv w9, w1, w0
1411; CHECK-SD-NEXT:    msub w8, w18, w16, w17
1412; CHECK-SD-NEXT:    mov v2.b[12], w8
1413; CHECK-SD-NEXT:    udiv w12, w11, w10
1414; CHECK-SD-NEXT:    msub w8, w9, w0, w1
1415; CHECK-SD-NEXT:    mov v2.b[13], w8
1416; CHECK-SD-NEXT:    udiv w9, w14, w13
1417; CHECK-SD-NEXT:    msub w8, w12, w10, w11
1418; CHECK-SD-NEXT:    mov v2.b[14], w8
1419; CHECK-SD-NEXT:    msub w8, w9, w13, w14
1420; CHECK-SD-NEXT:    mov v2.b[15], w8
1421; CHECK-SD-NEXT:    mov v0.16b, v2.16b
1422; CHECK-SD-NEXT:    ldp x28, x27, [sp], #80 // 16-byte Folded Reload
1423; CHECK-SD-NEXT:    ret
1424;
1425; CHECK-GI-LABEL: uv16i8:
1426; CHECK-GI:       // %bb.0: // %entry
1427; CHECK-GI-NEXT:    ushll v4.8h, v0.8b, #0
1428; CHECK-GI-NEXT:    ushll v5.8h, v1.8b, #0
1429; CHECK-GI-NEXT:    ushll2 v6.8h, v0.16b, #0
1430; CHECK-GI-NEXT:    ushll2 v7.8h, v1.16b, #0
1431; CHECK-GI-NEXT:    ushll v2.4s, v4.4h, #0
1432; CHECK-GI-NEXT:    ushll v3.4s, v5.4h, #0
1433; CHECK-GI-NEXT:    ushll2 v4.4s, v4.8h, #0
1434; CHECK-GI-NEXT:    ushll2 v5.4s, v5.8h, #0
1435; CHECK-GI-NEXT:    ushll v0.4s, v6.4h, #0
1436; CHECK-GI-NEXT:    ushll v1.4s, v7.4h, #0
1437; CHECK-GI-NEXT:    ushll2 v6.4s, v6.8h, #0
1438; CHECK-GI-NEXT:    ushll2 v7.4s, v7.8h, #0
1439; CHECK-GI-NEXT:    fmov w8, s2
1440; CHECK-GI-NEXT:    fmov w9, s3
1441; CHECK-GI-NEXT:    mov w12, v3.s[3]
1442; CHECK-GI-NEXT:    fmov w13, s5
1443; CHECK-GI-NEXT:    mov w16, v5.s[3]
1444; CHECK-GI-NEXT:    fmov w17, s1
1445; CHECK-GI-NEXT:    mov w18, v1.s[1]
1446; CHECK-GI-NEXT:    mov w0, v1.s[2]
1447; CHECK-GI-NEXT:    mov w1, v1.s[3]
1448; CHECK-GI-NEXT:    udiv w11, w8, w9
1449; CHECK-GI-NEXT:    mov w8, v2.s[1]
1450; CHECK-GI-NEXT:    mov w9, v3.s[1]
1451; CHECK-GI-NEXT:    fmov w2, s7
1452; CHECK-GI-NEXT:    mov w3, v7.s[1]
1453; CHECK-GI-NEXT:    mov w4, v7.s[2]
1454; CHECK-GI-NEXT:    udiv w10, w8, w9
1455; CHECK-GI-NEXT:    mov w8, v2.s[2]
1456; CHECK-GI-NEXT:    mov w9, v3.s[2]
1457; CHECK-GI-NEXT:    mov v16.s[0], w11
1458; CHECK-GI-NEXT:    mov w11, v6.s[3]
1459; CHECK-GI-NEXT:    udiv w9, w8, w9
1460; CHECK-GI-NEXT:    mov w8, v2.s[3]
1461; CHECK-GI-NEXT:    mov v16.s[1], w10
1462; CHECK-GI-NEXT:    udiv w8, w8, w12
1463; CHECK-GI-NEXT:    fmov w12, s4
1464; CHECK-GI-NEXT:    mov v16.s[2], w9
1465; CHECK-GI-NEXT:    udiv w14, w12, w13
1466; CHECK-GI-NEXT:    mov w12, v4.s[1]
1467; CHECK-GI-NEXT:    mov w13, v5.s[1]
1468; CHECK-GI-NEXT:    mov v16.s[3], w8
1469; CHECK-GI-NEXT:    mls v2.4s, v16.4s, v3.4s
1470; CHECK-GI-NEXT:    udiv w15, w12, w13
1471; CHECK-GI-NEXT:    mov w12, v4.s[2]
1472; CHECK-GI-NEXT:    mov w13, v5.s[2]
1473; CHECK-GI-NEXT:    mov v17.s[0], w14
1474; CHECK-GI-NEXT:    mov w14, v7.s[3]
1475; CHECK-GI-NEXT:    udiv w13, w12, w13
1476; CHECK-GI-NEXT:    mov w12, v4.s[3]
1477; CHECK-GI-NEXT:    mov v17.s[1], w15
1478; CHECK-GI-NEXT:    udiv w12, w12, w16
1479; CHECK-GI-NEXT:    fmov w16, s0
1480; CHECK-GI-NEXT:    mov v17.s[2], w13
1481; CHECK-GI-NEXT:    udiv w16, w16, w17
1482; CHECK-GI-NEXT:    mov w17, v0.s[1]
1483; CHECK-GI-NEXT:    mov v17.s[3], w12
1484; CHECK-GI-NEXT:    mls v4.4s, v17.4s, v5.4s
1485; CHECK-GI-NEXT:    udiv w17, w17, w18
1486; CHECK-GI-NEXT:    mov w18, v0.s[2]
1487; CHECK-GI-NEXT:    mov v18.s[0], w16
1488; CHECK-GI-NEXT:    udiv w18, w18, w0
1489; CHECK-GI-NEXT:    mov w0, v0.s[3]
1490; CHECK-GI-NEXT:    mov v18.s[1], w17
1491; CHECK-GI-NEXT:    udiv w0, w0, w1
1492; CHECK-GI-NEXT:    fmov w1, s6
1493; CHECK-GI-NEXT:    mov v18.s[2], w18
1494; CHECK-GI-NEXT:    udiv w1, w1, w2
1495; CHECK-GI-NEXT:    mov w2, v6.s[1]
1496; CHECK-GI-NEXT:    mov v18.s[3], w0
1497; CHECK-GI-NEXT:    mls v0.4s, v18.4s, v1.4s
1498; CHECK-GI-NEXT:    uzp1 v1.8h, v2.8h, v4.8h
1499; CHECK-GI-NEXT:    udiv w2, w2, w3
1500; CHECK-GI-NEXT:    mov w3, v6.s[2]
1501; CHECK-GI-NEXT:    mov v19.s[0], w1
1502; CHECK-GI-NEXT:    udiv w3, w3, w4
1503; CHECK-GI-NEXT:    mov v19.s[1], w2
1504; CHECK-GI-NEXT:    udiv w10, w11, w14
1505; CHECK-GI-NEXT:    mov v19.s[2], w3
1506; CHECK-GI-NEXT:    mov v19.s[3], w10
1507; CHECK-GI-NEXT:    mls v6.4s, v19.4s, v7.4s
1508; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v6.8h
1509; CHECK-GI-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
1510; CHECK-GI-NEXT:    ret
1511entry:
1512  %s = urem <16 x i8> %d, %e
1513  ret <16 x i8> %s
1514}
1515
1516define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) {
1517; CHECK-SD-LABEL: uv32i8:
1518; CHECK-SD:       // %bb.0: // %entry
1519; CHECK-SD-NEXT:    sub sp, sp, #304
1520; CHECK-SD-NEXT:    stp x29, x30, [sp, #208] // 16-byte Folded Spill
1521; CHECK-SD-NEXT:    stp x28, x27, [sp, #224] // 16-byte Folded Spill
1522; CHECK-SD-NEXT:    stp x26, x25, [sp, #240] // 16-byte Folded Spill
1523; CHECK-SD-NEXT:    stp x24, x23, [sp, #256] // 16-byte Folded Spill
1524; CHECK-SD-NEXT:    stp x22, x21, [sp, #272] // 16-byte Folded Spill
1525; CHECK-SD-NEXT:    stp x20, x19, [sp, #288] // 16-byte Folded Spill
1526; CHECK-SD-NEXT:    .cfi_def_cfa_offset 304
1527; CHECK-SD-NEXT:    .cfi_offset w19, -8
1528; CHECK-SD-NEXT:    .cfi_offset w20, -16
1529; CHECK-SD-NEXT:    .cfi_offset w21, -24
1530; CHECK-SD-NEXT:    .cfi_offset w22, -32
1531; CHECK-SD-NEXT:    .cfi_offset w23, -40
1532; CHECK-SD-NEXT:    .cfi_offset w24, -48
1533; CHECK-SD-NEXT:    .cfi_offset w25, -56
1534; CHECK-SD-NEXT:    .cfi_offset w26, -64
1535; CHECK-SD-NEXT:    .cfi_offset w27, -72
1536; CHECK-SD-NEXT:    .cfi_offset w28, -80
1537; CHECK-SD-NEXT:    .cfi_offset w30, -88
1538; CHECK-SD-NEXT:    .cfi_offset w29, -96
1539; CHECK-SD-NEXT:    umov w8, v2.b[1]
1540; CHECK-SD-NEXT:    umov w9, v0.b[1]
1541; CHECK-SD-NEXT:    umov w19, v3.b[7]
1542; CHECK-SD-NEXT:    umov w7, v1.b[7]
1543; CHECK-SD-NEXT:    umov w6, v3.b[8]
1544; CHECK-SD-NEXT:    umov w3, v1.b[8]
1545; CHECK-SD-NEXT:    umov w13, v3.b[0]
1546; CHECK-SD-NEXT:    umov w5, v3.b[1]
1547; CHECK-SD-NEXT:    umov w0, v1.b[1]
1548; CHECK-SD-NEXT:    umov w12, v3.b[2]
1549; CHECK-SD-NEXT:    umov w17, v3.b[3]
1550; CHECK-SD-NEXT:    umov w16, v1.b[3]
1551; CHECK-SD-NEXT:    str w8, [sp, #80] // 4-byte Folded Spill
1552; CHECK-SD-NEXT:    udiv w10, w9, w8
1553; CHECK-SD-NEXT:    umov w8, v2.b[0]
1554; CHECK-SD-NEXT:    str w9, [sp, #88] // 4-byte Folded Spill
1555; CHECK-SD-NEXT:    umov w9, v0.b[0]
1556; CHECK-SD-NEXT:    ldr w30, [sp, #80] // 4-byte Folded Reload
1557; CHECK-SD-NEXT:    umov w15, v3.b[4]
1558; CHECK-SD-NEXT:    umov w14, v1.b[4]
1559; CHECK-SD-NEXT:    umov w4, v3.b[5]
1560; CHECK-SD-NEXT:    umov w1, v1.b[5]
1561; CHECK-SD-NEXT:    umov w2, v3.b[6]
1562; CHECK-SD-NEXT:    umov w18, v1.b[6]
1563; CHECK-SD-NEXT:    str w8, [sp, #32] // 4-byte Folded Spill
1564; CHECK-SD-NEXT:    umov w21, v3.b[9]
1565; CHECK-SD-NEXT:    umov w20, v1.b[9]
1566; CHECK-SD-NEXT:    str w9, [sp, #40] // 4-byte Folded Spill
1567; CHECK-SD-NEXT:    ldr w29, [sp, #32] // 4-byte Folded Reload
1568; CHECK-SD-NEXT:    udiv w11, w9, w8
1569; CHECK-SD-NEXT:    umov w8, v2.b[2]
1570; CHECK-SD-NEXT:    umov w9, v0.b[2]
1571; CHECK-SD-NEXT:    str w10, [sp, #96] // 4-byte Folded Spill
1572; CHECK-SD-NEXT:    stp w8, w9, [sp, #16] // 8-byte Folded Spill
1573; CHECK-SD-NEXT:    udiv w10, w9, w8
1574; CHECK-SD-NEXT:    umov w8, v2.b[3]
1575; CHECK-SD-NEXT:    umov w9, v0.b[3]
1576; CHECK-SD-NEXT:    stp w11, w8, [sp, #48] // 8-byte Folded Spill
1577; CHECK-SD-NEXT:    str w10, [sp, #24] // 4-byte Folded Spill
1578; CHECK-SD-NEXT:    udiv w10, w9, w8
1579; CHECK-SD-NEXT:    umov w8, v2.b[4]
1580; CHECK-SD-NEXT:    str w8, [sp, #28] // 4-byte Folded Spill
1581; CHECK-SD-NEXT:    stp w9, w10, [sp, #56] // 8-byte Folded Spill
1582; CHECK-SD-NEXT:    umov w9, v0.b[4]
1583; CHECK-SD-NEXT:    udiv w27, w0, w5
1584; CHECK-SD-NEXT:    str w9, [sp, #36] // 4-byte Folded Spill
1585; CHECK-SD-NEXT:    udiv w10, w9, w8
1586; CHECK-SD-NEXT:    umov w8, v2.b[5]
1587; CHECK-SD-NEXT:    umov w9, v0.b[5]
1588; CHECK-SD-NEXT:    str w8, [sp, #76] // 4-byte Folded Spill
1589; CHECK-SD-NEXT:    str w9, [sp, #84] // 4-byte Folded Spill
1590; CHECK-SD-NEXT:    str w10, [sp, #44] // 4-byte Folded Spill
1591; CHECK-SD-NEXT:    udiv w10, w9, w8
1592; CHECK-SD-NEXT:    umov w8, v2.b[6]
1593; CHECK-SD-NEXT:    umov w9, v0.b[6]
1594; CHECK-SD-NEXT:    stp w8, w9, [sp, #64] // 8-byte Folded Spill
1595; CHECK-SD-NEXT:    str w10, [sp, #92] // 4-byte Folded Spill
1596; CHECK-SD-NEXT:    udiv w10, w9, w8
1597; CHECK-SD-NEXT:    umov w8, v2.b[7]
1598; CHECK-SD-NEXT:    umov w9, v0.b[7]
1599; CHECK-SD-NEXT:    stp w8, w9, [sp, #112] // 8-byte Folded Spill
1600; CHECK-SD-NEXT:    udiv w11, w9, w8
1601; CHECK-SD-NEXT:    umov w8, v2.b[8]
1602; CHECK-SD-NEXT:    umov w9, v0.b[8]
1603; CHECK-SD-NEXT:    str w10, [sp, #72] // 4-byte Folded Spill
1604; CHECK-SD-NEXT:    stp w8, w9, [sp, #100] // 8-byte Folded Spill
1605; CHECK-SD-NEXT:    udiv w10, w9, w8
1606; CHECK-SD-NEXT:    umov w8, v2.b[9]
1607; CHECK-SD-NEXT:    umov w9, v0.b[9]
1608; CHECK-SD-NEXT:    stp w8, w9, [sp, #136] // 8-byte Folded Spill
1609; CHECK-SD-NEXT:    str w10, [sp, #108] // 4-byte Folded Spill
1610; CHECK-SD-NEXT:    udiv w10, w9, w8
1611; CHECK-SD-NEXT:    umov w8, v2.b[10]
1612; CHECK-SD-NEXT:    umov w9, v0.b[10]
1613; CHECK-SD-NEXT:    stp w11, w8, [sp, #120] // 8-byte Folded Spill
1614; CHECK-SD-NEXT:    str w10, [sp, #144] // 4-byte Folded Spill
1615; CHECK-SD-NEXT:    udiv w10, w9, w8
1616; CHECK-SD-NEXT:    umov w8, v2.b[11]
1617; CHECK-SD-NEXT:    stp w9, w10, [sp, #128] // 8-byte Folded Spill
1618; CHECK-SD-NEXT:    umov w9, v0.b[11]
1619; CHECK-SD-NEXT:    udiv w25, w16, w17
1620; CHECK-SD-NEXT:    stp w8, w9, [sp, #172] // 8-byte Folded Spill
1621; CHECK-SD-NEXT:    udiv w11, w9, w8
1622; CHECK-SD-NEXT:    umov w8, v2.b[12]
1623; CHECK-SD-NEXT:    umov w9, v0.b[12]
1624; CHECK-SD-NEXT:    str w8, [sp, #152] // 4-byte Folded Spill
1625; CHECK-SD-NEXT:    str w9, [sp, #160] // 4-byte Folded Spill
1626; CHECK-SD-NEXT:    udiv w10, w9, w8
1627; CHECK-SD-NEXT:    umov w8, v2.b[13]
1628; CHECK-SD-NEXT:    umov w9, v0.b[13]
1629; CHECK-SD-NEXT:    stp w8, w9, [sp, #196] // 8-byte Folded Spill
1630; CHECK-SD-NEXT:    str w10, [sp, #168] // 4-byte Folded Spill
1631; CHECK-SD-NEXT:    udiv w10, w9, w8
1632; CHECK-SD-NEXT:    umov w8, v2.b[14]
1633; CHECK-SD-NEXT:    umov w9, v0.b[14]
1634; CHECK-SD-NEXT:    stp w11, w8, [sp, #180] // 8-byte Folded Spill
1635; CHECK-SD-NEXT:    umov w11, v1.b[2]
1636; CHECK-SD-NEXT:    str w10, [sp, #204] // 4-byte Folded Spill
1637; CHECK-SD-NEXT:    udiv w10, w9, w8
1638; CHECK-SD-NEXT:    umov w8, v2.b[15]
1639; CHECK-SD-NEXT:    str w8, [sp, #148] // 4-byte Folded Spill
1640; CHECK-SD-NEXT:    stp w9, w10, [sp, #188] // 8-byte Folded Spill
1641; CHECK-SD-NEXT:    umov w9, v0.b[15]
1642; CHECK-SD-NEXT:    udiv w22, w11, w12
1643; CHECK-SD-NEXT:    str w9, [sp, #156] // 4-byte Folded Spill
1644; CHECK-SD-NEXT:    udiv w10, w9, w8
1645; CHECK-SD-NEXT:    str w10, [sp, #164] // 4-byte Folded Spill
1646; CHECK-SD-NEXT:    umov w10, v1.b[0]
1647; CHECK-SD-NEXT:    udiv w9, w7, w19
1648; CHECK-SD-NEXT:    udiv w8, w3, w6
1649; CHECK-SD-NEXT:    udiv w23, w10, w13
1650; CHECK-SD-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
1651; CHECK-SD-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
1652; CHECK-SD-NEXT:    ldr w9, [sp, #88] // 4-byte Folded Reload
1653; CHECK-SD-NEXT:    msub w9, w8, w30, w9
1654; CHECK-SD-NEXT:    ldr w8, [sp, #48] // 4-byte Folded Reload
1655; CHECK-SD-NEXT:    ldr w30, [sp, #40] // 4-byte Folded Reload
1656; CHECK-SD-NEXT:    msub w8, w8, w29, w30
1657; CHECK-SD-NEXT:    ldp x29, x30, [sp, #208] // 16-byte Folded Reload
1658; CHECK-SD-NEXT:    fmov s0, w8
1659; CHECK-SD-NEXT:    msub w10, w23, w13, w10
1660; CHECK-SD-NEXT:    udiv w24, w14, w15
1661; CHECK-SD-NEXT:    msub w13, w27, w5, w0
1662; CHECK-SD-NEXT:    ldr w5, [sp, #16] // 4-byte Folded Reload
1663; CHECK-SD-NEXT:    mov v0.b[1], w9
1664; CHECK-SD-NEXT:    msub w9, w22, w12, w11
1665; CHECK-SD-NEXT:    umov w11, v1.b[10]
1666; CHECK-SD-NEXT:    fmov s2, w10
1667; CHECK-SD-NEXT:    ldp w10, w8, [sp, #20] // 8-byte Folded Reload
1668; CHECK-SD-NEXT:    mov v2.b[1], w13
1669; CHECK-SD-NEXT:    msub w8, w8, w5, w10
1670; CHECK-SD-NEXT:    ldr w5, [sp, #52] // 4-byte Folded Reload
1671; CHECK-SD-NEXT:    umov w10, v3.b[10]
1672; CHECK-SD-NEXT:    udiv w28, w1, w4
1673; CHECK-SD-NEXT:    ldp w13, w12, [sp, #56] // 8-byte Folded Reload
1674; CHECK-SD-NEXT:    mov v2.b[2], w9
1675; CHECK-SD-NEXT:    mov v0.b[2], w8
1676; CHECK-SD-NEXT:    msub w8, w25, w17, w16
1677; CHECK-SD-NEXT:    ldr w17, [sp, #28] // 4-byte Folded Reload
1678; CHECK-SD-NEXT:    ldr w16, [sp, #36] // 4-byte Folded Reload
1679; CHECK-SD-NEXT:    msub w12, w12, w5, w13
1680; CHECK-SD-NEXT:    ldr w13, [sp, #44] // 4-byte Folded Reload
1681; CHECK-SD-NEXT:    ldr w5, [sp, #136] // 4-byte Folded Reload
1682; CHECK-SD-NEXT:    mov v2.b[3], w8
1683; CHECK-SD-NEXT:    msub w8, w24, w15, w14
1684; CHECK-SD-NEXT:    ldr w15, [sp, #92] // 4-byte Folded Reload
1685; CHECK-SD-NEXT:    mov v0.b[3], w12
1686; CHECK-SD-NEXT:    msub w13, w13, w17, w16
1687; CHECK-SD-NEXT:    ldr w17, [sp, #76] // 4-byte Folded Reload
1688; CHECK-SD-NEXT:    udiv w26, w18, w2
1689; CHECK-SD-NEXT:    ldr w16, [sp, #84] // 4-byte Folded Reload
1690; CHECK-SD-NEXT:    umov w12, v3.b[11]
1691; CHECK-SD-NEXT:    msub w15, w15, w17, w16
1692; CHECK-SD-NEXT:    umov w14, v1.b[11]
1693; CHECK-SD-NEXT:    mov v2.b[4], w8
1694; CHECK-SD-NEXT:    msub w8, w28, w4, w1
1695; CHECK-SD-NEXT:    ldr w1, [sp, #64] // 4-byte Folded Reload
1696; CHECK-SD-NEXT:    mov v0.b[4], w13
1697; CHECK-SD-NEXT:    ldr w4, [sp, #100] // 4-byte Folded Reload
1698; CHECK-SD-NEXT:    ldp w17, w16, [sp, #68] // 8-byte Folded Reload
1699; CHECK-SD-NEXT:    ldp x24, x23, [sp, #256] // 16-byte Folded Reload
1700; CHECK-SD-NEXT:    mov v2.b[5], w8
1701; CHECK-SD-NEXT:    ldp x28, x27, [sp, #224] // 16-byte Folded Reload
1702; CHECK-SD-NEXT:    mov v0.b[5], w15
1703; CHECK-SD-NEXT:    msub w16, w16, w1, w17
1704; CHECK-SD-NEXT:    umov w15, v3.b[12]
1705; CHECK-SD-NEXT:    msub w8, w26, w2, w18
1706; CHECK-SD-NEXT:    ldr w2, [sp, #112] // 4-byte Folded Reload
1707; CHECK-SD-NEXT:    udiv w0, w20, w21
1708; CHECK-SD-NEXT:    ldp w1, w18, [sp, #116] // 8-byte Folded Reload
1709; CHECK-SD-NEXT:    umov w17, v1.b[12]
1710; CHECK-SD-NEXT:    ldp x26, x25, [sp, #240] // 16-byte Folded Reload
1711; CHECK-SD-NEXT:    mov v2.b[6], w8
1712; CHECK-SD-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
1713; CHECK-SD-NEXT:    mov v0.b[6], w16
1714; CHECK-SD-NEXT:    msub w18, w18, w2, w1
1715; CHECK-SD-NEXT:    msub w8, w8, w19, w7
1716; CHECK-SD-NEXT:    ldp w2, w1, [sp, #104] // 8-byte Folded Reload
1717; CHECK-SD-NEXT:    mov v0.b[7], w18
1718; CHECK-SD-NEXT:    umov w18, v3.b[13]
1719; CHECK-SD-NEXT:    mov v2.b[7], w8
1720; CHECK-SD-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
1721; CHECK-SD-NEXT:    udiv w9, w11, w10
1722; CHECK-SD-NEXT:    msub w1, w1, w4, w2
1723; CHECK-SD-NEXT:    umov w2, v1.b[13]
1724; CHECK-SD-NEXT:    msub w8, w8, w6, w3
1725; CHECK-SD-NEXT:    ldp w4, w3, [sp, #140] // 8-byte Folded Reload
1726; CHECK-SD-NEXT:    mov v0.b[8], w1
1727; CHECK-SD-NEXT:    mov v2.b[8], w8
1728; CHECK-SD-NEXT:    msub w8, w0, w21, w20
1729; CHECK-SD-NEXT:    msub w3, w3, w5, w4
1730; CHECK-SD-NEXT:    ldr w5, [sp, #124] // 4-byte Folded Reload
1731; CHECK-SD-NEXT:    ldp w4, w1, [sp, #128] // 8-byte Folded Reload
1732; CHECK-SD-NEXT:    udiv w13, w14, w12
1733; CHECK-SD-NEXT:    ldp x20, x19, [sp, #288] // 16-byte Folded Reload
1734; CHECK-SD-NEXT:    mov v2.b[9], w8
1735; CHECK-SD-NEXT:    mov v0.b[9], w3
1736; CHECK-SD-NEXT:    msub w8, w9, w10, w11
1737; CHECK-SD-NEXT:    msub w1, w1, w5, w4
1738; CHECK-SD-NEXT:    ldr w4, [sp, #172] // 4-byte Folded Reload
1739; CHECK-SD-NEXT:    umov w9, v3.b[14]
1740; CHECK-SD-NEXT:    ldp w3, w11, [sp, #176] // 8-byte Folded Reload
1741; CHECK-SD-NEXT:    umov w10, v1.b[14]
1742; CHECK-SD-NEXT:    ldp x22, x21, [sp, #272] // 16-byte Folded Reload
1743; CHECK-SD-NEXT:    mov v2.b[10], w8
1744; CHECK-SD-NEXT:    mov v0.b[10], w1
1745; CHECK-SD-NEXT:    ldr w1, [sp, #152] // 4-byte Folded Reload
1746; CHECK-SD-NEXT:    msub w11, w11, w4, w3
1747; CHECK-SD-NEXT:    udiv w16, w17, w15
1748; CHECK-SD-NEXT:    msub w8, w13, w12, w14
1749; CHECK-SD-NEXT:    ldr w13, [sp, #168] // 4-byte Folded Reload
1750; CHECK-SD-NEXT:    ldr w14, [sp, #160] // 4-byte Folded Reload
1751; CHECK-SD-NEXT:    mov v0.b[11], w11
1752; CHECK-SD-NEXT:    umov w11, v3.b[15]
1753; CHECK-SD-NEXT:    msub w13, w13, w1, w14
1754; CHECK-SD-NEXT:    umov w14, v1.b[15]
1755; CHECK-SD-NEXT:    mov v2.b[11], w8
1756; CHECK-SD-NEXT:    mov v0.b[12], w13
1757; CHECK-SD-NEXT:    udiv w0, w2, w18
1758; CHECK-SD-NEXT:    msub w8, w16, w15, w17
1759; CHECK-SD-NEXT:    ldr w17, [sp, #196] // 4-byte Folded Reload
1760; CHECK-SD-NEXT:    ldp w16, w15, [sp, #200] // 8-byte Folded Reload
1761; CHECK-SD-NEXT:    mov v2.b[12], w8
1762; CHECK-SD-NEXT:    msub w15, w15, w17, w16
1763; CHECK-SD-NEXT:    ldp w17, w16, [sp, #188] // 8-byte Folded Reload
1764; CHECK-SD-NEXT:    mov v0.b[13], w15
1765; CHECK-SD-NEXT:    udiv w12, w10, w9
1766; CHECK-SD-NEXT:    msub w8, w0, w18, w2
1767; CHECK-SD-NEXT:    ldr w18, [sp, #184] // 4-byte Folded Reload
1768; CHECK-SD-NEXT:    msub w16, w16, w18, w17
1769; CHECK-SD-NEXT:    mov v2.b[13], w8
1770; CHECK-SD-NEXT:    mov v0.b[14], w16
1771; CHECK-SD-NEXT:    udiv w13, w14, w11
1772; CHECK-SD-NEXT:    msub w8, w12, w9, w10
1773; CHECK-SD-NEXT:    ldr w9, [sp, #164] // 4-byte Folded Reload
1774; CHECK-SD-NEXT:    ldr w12, [sp, #148] // 4-byte Folded Reload
1775; CHECK-SD-NEXT:    ldr w10, [sp, #156] // 4-byte Folded Reload
1776; CHECK-SD-NEXT:    mov v2.b[14], w8
1777; CHECK-SD-NEXT:    msub w9, w9, w12, w10
1778; CHECK-SD-NEXT:    mov v0.b[15], w9
1779; CHECK-SD-NEXT:    msub w8, w13, w11, w14
1780; CHECK-SD-NEXT:    mov v2.b[15], w8
1781; CHECK-SD-NEXT:    mov v1.16b, v2.16b
1782; CHECK-SD-NEXT:    add sp, sp, #304
1783; CHECK-SD-NEXT:    ret
1784;
1785; CHECK-GI-LABEL: uv32i8:
1786; CHECK-GI:       // %bb.0: // %entry
1787; CHECK-GI-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
1788; CHECK-GI-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
1789; CHECK-GI-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
1790; CHECK-GI-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
1791; CHECK-GI-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
1792; CHECK-GI-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
1793; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
1794; CHECK-GI-NEXT:    .cfi_offset w19, -8
1795; CHECK-GI-NEXT:    .cfi_offset w20, -16
1796; CHECK-GI-NEXT:    .cfi_offset w21, -24
1797; CHECK-GI-NEXT:    .cfi_offset w22, -32
1798; CHECK-GI-NEXT:    .cfi_offset w23, -40
1799; CHECK-GI-NEXT:    .cfi_offset w24, -48
1800; CHECK-GI-NEXT:    .cfi_offset w25, -56
1801; CHECK-GI-NEXT:    .cfi_offset w26, -64
1802; CHECK-GI-NEXT:    .cfi_offset w27, -72
1803; CHECK-GI-NEXT:    .cfi_offset w28, -80
1804; CHECK-GI-NEXT:    .cfi_offset w30, -88
1805; CHECK-GI-NEXT:    .cfi_offset w29, -96
1806; CHECK-GI-NEXT:    ushll v4.8h, v0.8b, #0
1807; CHECK-GI-NEXT:    ushll v5.8h, v2.8b, #0
1808; CHECK-GI-NEXT:    ushll v16.8h, v1.8b, #0
1809; CHECK-GI-NEXT:    ushll v17.8h, v3.8b, #0
1810; CHECK-GI-NEXT:    ushll v6.4s, v4.4h, #0
1811; CHECK-GI-NEXT:    ushll v7.4s, v5.4h, #0
1812; CHECK-GI-NEXT:    ushll2 v4.4s, v4.8h, #0
1813; CHECK-GI-NEXT:    ushll2 v5.4s, v5.8h, #0
1814; CHECK-GI-NEXT:    ushll v18.4s, v16.4h, #0
1815; CHECK-GI-NEXT:    ushll v19.4s, v17.4h, #0
1816; CHECK-GI-NEXT:    ushll2 v16.4s, v16.8h, #0
1817; CHECK-GI-NEXT:    ushll2 v17.4s, v17.8h, #0
1818; CHECK-GI-NEXT:    fmov w8, s6
1819; CHECK-GI-NEXT:    fmov w9, s7
1820; CHECK-GI-NEXT:    mov w12, v7.s[3]
1821; CHECK-GI-NEXT:    fmov w13, s5
1822; CHECK-GI-NEXT:    mov w16, v5.s[3]
1823; CHECK-GI-NEXT:    fmov w6, s19
1824; CHECK-GI-NEXT:    mov w7, v19.s[3]
1825; CHECK-GI-NEXT:    fmov w21, s17
1826; CHECK-GI-NEXT:    mov w23, v17.s[3]
1827; CHECK-GI-NEXT:    udiv w11, w8, w9
1828; CHECK-GI-NEXT:    mov w8, v6.s[1]
1829; CHECK-GI-NEXT:    mov w9, v7.s[1]
1830; CHECK-GI-NEXT:    udiv w10, w8, w9
1831; CHECK-GI-NEXT:    mov w8, v6.s[2]
1832; CHECK-GI-NEXT:    mov w9, v7.s[2]
1833; CHECK-GI-NEXT:    mov v20.s[0], w11
1834; CHECK-GI-NEXT:    udiv w9, w8, w9
1835; CHECK-GI-NEXT:    mov w8, v6.s[3]
1836; CHECK-GI-NEXT:    ushll2 v6.8h, v0.16b, #0
1837; CHECK-GI-NEXT:    mov v20.s[1], w10
1838; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
1839; CHECK-GI-NEXT:    ushll v28.4s, v0.4h, #0
1840; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
1841; CHECK-GI-NEXT:    udiv w8, w8, w12
1842; CHECK-GI-NEXT:    fmov w12, s4
1843; CHECK-GI-NEXT:    mov v20.s[2], w9
1844; CHECK-GI-NEXT:    udiv w15, w12, w13
1845; CHECK-GI-NEXT:    mov w12, v4.s[1]
1846; CHECK-GI-NEXT:    mov w13, v5.s[1]
1847; CHECK-GI-NEXT:    mov v20.s[3], w8
1848; CHECK-GI-NEXT:    udiv w14, w12, w13
1849; CHECK-GI-NEXT:    mov w12, v4.s[2]
1850; CHECK-GI-NEXT:    mov w13, v5.s[2]
1851; CHECK-GI-NEXT:    ushll v5.4s, v6.4h, #0
1852; CHECK-GI-NEXT:    mov v21.s[0], w15
1853; CHECK-GI-NEXT:    udiv w13, w12, w13
1854; CHECK-GI-NEXT:    mov w12, v4.s[3]
1855; CHECK-GI-NEXT:    ushll2 v4.8h, v2.16b, #0
1856; CHECK-GI-NEXT:    mov v21.s[1], w14
1857; CHECK-GI-NEXT:    ushll v2.8h, v2.8b, #0
1858; CHECK-GI-NEXT:    ushll v7.4s, v4.4h, #0
1859; CHECK-GI-NEXT:    ushll v30.4s, v2.4h, #0
1860; CHECK-GI-NEXT:    ushll2 v2.4s, v2.8h, #0
1861; CHECK-GI-NEXT:    fmov w17, s7
1862; CHECK-GI-NEXT:    mls v28.4s, v20.4s, v30.4s
1863; CHECK-GI-NEXT:    udiv w12, w12, w16
1864; CHECK-GI-NEXT:    fmov w16, s5
1865; CHECK-GI-NEXT:    mov v21.s[2], w13
1866; CHECK-GI-NEXT:    udiv w1, w16, w17
1867; CHECK-GI-NEXT:    mov w16, v5.s[1]
1868; CHECK-GI-NEXT:    mov w17, v7.s[1]
1869; CHECK-GI-NEXT:    mov v21.s[3], w12
1870; CHECK-GI-NEXT:    mls v0.4s, v21.4s, v2.4s
1871; CHECK-GI-NEXT:    udiv w0, w16, w17
1872; CHECK-GI-NEXT:    mov w16, v5.s[2]
1873; CHECK-GI-NEXT:    mov w17, v7.s[2]
1874; CHECK-GI-NEXT:    mov v22.s[0], w1
1875; CHECK-GI-NEXT:    uzp1 v0.8h, v28.8h, v0.8h
1876; CHECK-GI-NEXT:    udiv w18, w16, w17
1877; CHECK-GI-NEXT:    mov w16, v5.s[3]
1878; CHECK-GI-NEXT:    mov w17, v7.s[3]
1879; CHECK-GI-NEXT:    ushll2 v5.4s, v6.8h, #0
1880; CHECK-GI-NEXT:    ushll2 v7.4s, v4.8h, #0
1881; CHECK-GI-NEXT:    mov v22.s[1], w0
1882; CHECK-GI-NEXT:    ushll v6.4s, v6.4h, #0
1883; CHECK-GI-NEXT:    ushll v4.4s, v4.4h, #0
1884; CHECK-GI-NEXT:    fmov w2, s7
1885; CHECK-GI-NEXT:    mov w4, v7.s[3]
1886; CHECK-GI-NEXT:    udiv w16, w16, w17
1887; CHECK-GI-NEXT:    fmov w17, s5
1888; CHECK-GI-NEXT:    mov v22.s[2], w18
1889; CHECK-GI-NEXT:    udiv w5, w17, w2
1890; CHECK-GI-NEXT:    mov w17, v5.s[1]
1891; CHECK-GI-NEXT:    mov w2, v7.s[1]
1892; CHECK-GI-NEXT:    mov v22.s[3], w16
1893; CHECK-GI-NEXT:    mls v6.4s, v22.4s, v4.4s
1894; CHECK-GI-NEXT:    udiv w3, w17, w2
1895; CHECK-GI-NEXT:    mov w17, v5.s[2]
1896; CHECK-GI-NEXT:    mov w2, v7.s[2]
1897; CHECK-GI-NEXT:    mov v23.s[0], w5
1898; CHECK-GI-NEXT:    udiv w2, w17, w2
1899; CHECK-GI-NEXT:    mov w17, v5.s[3]
1900; CHECK-GI-NEXT:    mov v23.s[1], w3
1901; CHECK-GI-NEXT:    udiv w17, w17, w4
1902; CHECK-GI-NEXT:    fmov w4, s18
1903; CHECK-GI-NEXT:    mov v23.s[2], w2
1904; CHECK-GI-NEXT:    udiv w20, w4, w6
1905; CHECK-GI-NEXT:    mov w4, v18.s[1]
1906; CHECK-GI-NEXT:    mov w6, v19.s[1]
1907; CHECK-GI-NEXT:    mov v23.s[3], w17
1908; CHECK-GI-NEXT:    mls v5.4s, v23.4s, v7.4s
1909; CHECK-GI-NEXT:    udiv w19, w4, w6
1910; CHECK-GI-NEXT:    mov w4, v18.s[2]
1911; CHECK-GI-NEXT:    mov w6, v19.s[2]
1912; CHECK-GI-NEXT:    mov v24.s[0], w20
1913; CHECK-GI-NEXT:    uzp1 v2.8h, v6.8h, v5.8h
1914; CHECK-GI-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
1915; CHECK-GI-NEXT:    udiv w6, w4, w6
1916; CHECK-GI-NEXT:    mov w4, v18.s[3]
1917; CHECK-GI-NEXT:    mov v24.s[1], w19
1918; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
1919; CHECK-GI-NEXT:    udiv w4, w4, w7
1920; CHECK-GI-NEXT:    fmov w7, s16
1921; CHECK-GI-NEXT:    mov v24.s[2], w6
1922; CHECK-GI-NEXT:    udiv w24, w7, w21
1923; CHECK-GI-NEXT:    mov w7, v16.s[1]
1924; CHECK-GI-NEXT:    mov w21, v17.s[1]
1925; CHECK-GI-NEXT:    mov v24.s[3], w4
1926; CHECK-GI-NEXT:    udiv w22, w7, w21
1927; CHECK-GI-NEXT:    mov w7, v16.s[2]
1928; CHECK-GI-NEXT:    mov w21, v17.s[2]
1929; CHECK-GI-NEXT:    ushll2 v17.8h, v1.16b, #0
1930; CHECK-GI-NEXT:    mov v25.s[0], w24
1931; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
1932; CHECK-GI-NEXT:    ushll v18.4s, v17.4h, #0
1933; CHECK-GI-NEXT:    ushll v29.4s, v1.4h, #0
1934; CHECK-GI-NEXT:    ushll2 v1.4s, v1.8h, #0
1935; CHECK-GI-NEXT:    udiv w21, w7, w21
1936; CHECK-GI-NEXT:    mov w7, v16.s[3]
1937; CHECK-GI-NEXT:    ushll2 v16.8h, v3.16b, #0
1938; CHECK-GI-NEXT:    mov v25.s[1], w22
1939; CHECK-GI-NEXT:    ushll v3.8h, v3.8b, #0
1940; CHECK-GI-NEXT:    ushll v19.4s, v16.4h, #0
1941; CHECK-GI-NEXT:    ushll v31.4s, v3.4h, #0
1942; CHECK-GI-NEXT:    ushll2 v3.4s, v3.8h, #0
1943; CHECK-GI-NEXT:    fmov w25, s19
1944; CHECK-GI-NEXT:    mov w26, v19.s[1]
1945; CHECK-GI-NEXT:    mov w27, v19.s[2]
1946; CHECK-GI-NEXT:    mov w28, v19.s[3]
1947; CHECK-GI-NEXT:    ushll2 v19.4s, v16.8h, #0
1948; CHECK-GI-NEXT:    ushll v16.4s, v16.4h, #0
1949; CHECK-GI-NEXT:    udiv w7, w7, w23
1950; CHECK-GI-NEXT:    fmov w23, s18
1951; CHECK-GI-NEXT:    mov v25.s[2], w21
1952; CHECK-GI-NEXT:    mls v29.4s, v24.4s, v31.4s
1953; CHECK-GI-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
1954; CHECK-GI-NEXT:    fmov w29, s19
1955; CHECK-GI-NEXT:    mov w30, v19.s[1]
1956; CHECK-GI-NEXT:    mov w15, v19.s[2]
1957; CHECK-GI-NEXT:    udiv w25, w23, w25
1958; CHECK-GI-NEXT:    mov w23, v18.s[1]
1959; CHECK-GI-NEXT:    mov v25.s[3], w7
1960; CHECK-GI-NEXT:    mls v1.4s, v25.4s, v3.4s
1961; CHECK-GI-NEXT:    udiv w26, w23, w26
1962; CHECK-GI-NEXT:    mov w23, v18.s[2]
1963; CHECK-GI-NEXT:    mov v26.s[0], w25
1964; CHECK-GI-NEXT:    uzp1 v1.8h, v29.8h, v1.8h
1965; CHECK-GI-NEXT:    udiv w27, w23, w27
1966; CHECK-GI-NEXT:    mov w23, v18.s[3]
1967; CHECK-GI-NEXT:    ushll2 v18.4s, v17.8h, #0
1968; CHECK-GI-NEXT:    mov v26.s[1], w26
1969; CHECK-GI-NEXT:    ushll v17.4s, v17.4h, #0
1970; CHECK-GI-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
1971; CHECK-GI-NEXT:    mov w11, v18.s[2]
1972; CHECK-GI-NEXT:    mov w9, v18.s[3]
1973; CHECK-GI-NEXT:    udiv w23, w23, w28
1974; CHECK-GI-NEXT:    fmov w28, s18
1975; CHECK-GI-NEXT:    mov v26.s[2], w27
1976; CHECK-GI-NEXT:    udiv w28, w28, w29
1977; CHECK-GI-NEXT:    mov w29, v18.s[1]
1978; CHECK-GI-NEXT:    mov v26.s[3], w23
1979; CHECK-GI-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
1980; CHECK-GI-NEXT:    mls v17.4s, v26.4s, v16.4s
1981; CHECK-GI-NEXT:    udiv w29, w29, w30
1982; CHECK-GI-NEXT:    mov v27.s[0], w28
1983; CHECK-GI-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
1984; CHECK-GI-NEXT:    udiv w10, w11, w15
1985; CHECK-GI-NEXT:    mov w11, v19.s[3]
1986; CHECK-GI-NEXT:    mov v27.s[1], w29
1987; CHECK-GI-NEXT:    udiv w8, w9, w11
1988; CHECK-GI-NEXT:    mov v27.s[2], w10
1989; CHECK-GI-NEXT:    mov v27.s[3], w8
1990; CHECK-GI-NEXT:    mls v18.4s, v27.4s, v19.4s
1991; CHECK-GI-NEXT:    uzp1 v3.8h, v17.8h, v18.8h
1992; CHECK-GI-NEXT:    uzp1 v1.16b, v1.16b, v3.16b
1993; CHECK-GI-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
1994; CHECK-GI-NEXT:    ret
1995entry:
1996  %s = urem <32 x i8> %d, %e
1997  ret <32 x i8> %s
1998}
1999
2000define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) {
2001; CHECK-SD-LABEL: sv2i16:
2002; CHECK-SD:       // %bb.0: // %entry
2003; CHECK-SD-NEXT:    shl v0.2s, v0.2s, #16
2004; CHECK-SD-NEXT:    shl v1.2s, v1.2s, #16
2005; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #16
2006; CHECK-SD-NEXT:    sshr v1.2s, v1.2s, #16
2007; CHECK-SD-NEXT:    fmov w8, s1
2008; CHECK-SD-NEXT:    fmov w9, s0
2009; CHECK-SD-NEXT:    mov w11, v1.s[1]
2010; CHECK-SD-NEXT:    mov w12, v0.s[1]
2011; CHECK-SD-NEXT:    sdiv w10, w9, w8
2012; CHECK-SD-NEXT:    sdiv w13, w12, w11
2013; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2014; CHECK-SD-NEXT:    fmov s0, w8
2015; CHECK-SD-NEXT:    msub w9, w13, w11, w12
2016; CHECK-SD-NEXT:    mov v0.s[1], w9
2017; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2018; CHECK-SD-NEXT:    ret
2019;
2020; CHECK-GI-LABEL: sv2i16:
2021; CHECK-GI:       // %bb.0: // %entry
2022; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #16
2023; CHECK-GI-NEXT:    shl v1.2s, v1.2s, #16
2024; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #16
2025; CHECK-GI-NEXT:    sshr v1.2s, v1.2s, #16
2026; CHECK-GI-NEXT:    fmov w8, s0
2027; CHECK-GI-NEXT:    fmov w9, s1
2028; CHECK-GI-NEXT:    mov w10, v1.s[1]
2029; CHECK-GI-NEXT:    sdiv w8, w8, w9
2030; CHECK-GI-NEXT:    mov w9, v0.s[1]
2031; CHECK-GI-NEXT:    sdiv w9, w9, w10
2032; CHECK-GI-NEXT:    mov v2.s[0], w8
2033; CHECK-GI-NEXT:    mov v2.s[1], w9
2034; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
2035; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
2036; CHECK-GI-NEXT:    ret
2037entry:
2038  %s = srem <2 x i16> %d, %e
2039  ret <2 x i16> %s
2040}
2041
2042define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
2043; CHECK-SD-LABEL: sv3i16:
2044; CHECK-SD:       // %bb.0: // %entry
2045; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
2046; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
2047; CHECK-SD-NEXT:    smov w11, v1.h[0]
2048; CHECK-SD-NEXT:    smov w12, v0.h[0]
2049; CHECK-SD-NEXT:    smov w8, v1.h[1]
2050; CHECK-SD-NEXT:    smov w9, v0.h[1]
2051; CHECK-SD-NEXT:    smov w14, v1.h[2]
2052; CHECK-SD-NEXT:    smov w15, v0.h[2]
2053; CHECK-SD-NEXT:    sdiv w13, w12, w11
2054; CHECK-SD-NEXT:    sdiv w10, w9, w8
2055; CHECK-SD-NEXT:    msub w11, w13, w11, w12
2056; CHECK-SD-NEXT:    fmov s0, w11
2057; CHECK-SD-NEXT:    sdiv w16, w15, w14
2058; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2059; CHECK-SD-NEXT:    mov v0.h[1], w8
2060; CHECK-SD-NEXT:    msub w8, w16, w14, w15
2061; CHECK-SD-NEXT:    mov v0.h[2], w8
2062; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2063; CHECK-SD-NEXT:    ret
2064;
2065; CHECK-GI-LABEL: sv3i16:
2066; CHECK-GI:       // %bb.0: // %entry
2067; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
2068; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
2069; CHECK-GI-NEXT:    smov w8, v0.h[0]
2070; CHECK-GI-NEXT:    smov w9, v1.h[0]
2071; CHECK-GI-NEXT:    smov w11, v0.h[1]
2072; CHECK-GI-NEXT:    smov w12, v1.h[1]
2073; CHECK-GI-NEXT:    smov w14, v0.h[2]
2074; CHECK-GI-NEXT:    smov w15, v1.h[2]
2075; CHECK-GI-NEXT:    sdiv w10, w8, w9
2076; CHECK-GI-NEXT:    sdiv w13, w11, w12
2077; CHECK-GI-NEXT:    msub w8, w10, w9, w8
2078; CHECK-GI-NEXT:    fmov s0, w8
2079; CHECK-GI-NEXT:    sdiv w16, w14, w15
2080; CHECK-GI-NEXT:    msub w9, w13, w12, w11
2081; CHECK-GI-NEXT:    mov v0.h[1], w9
2082; CHECK-GI-NEXT:    msub w8, w16, w15, w14
2083; CHECK-GI-NEXT:    mov v0.h[2], w8
2084; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
2085; CHECK-GI-NEXT:    ret
2086entry:
2087  %s = srem <3 x i16> %d, %e
2088  ret <3 x i16> %s
2089}
2090
2091define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) {
2092; CHECK-SD-LABEL: sv4i16:
2093; CHECK-SD:       // %bb.0: // %entry
2094; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
2095; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
2096; CHECK-SD-NEXT:    smov w11, v1.h[0]
2097; CHECK-SD-NEXT:    smov w12, v0.h[0]
2098; CHECK-SD-NEXT:    smov w8, v1.h[1]
2099; CHECK-SD-NEXT:    smov w9, v0.h[1]
2100; CHECK-SD-NEXT:    smov w14, v1.h[2]
2101; CHECK-SD-NEXT:    smov w15, v0.h[2]
2102; CHECK-SD-NEXT:    smov w17, v1.h[3]
2103; CHECK-SD-NEXT:    smov w18, v0.h[3]
2104; CHECK-SD-NEXT:    sdiv w13, w12, w11
2105; CHECK-SD-NEXT:    sdiv w10, w9, w8
2106; CHECK-SD-NEXT:    msub w11, w13, w11, w12
2107; CHECK-SD-NEXT:    fmov s0, w11
2108; CHECK-SD-NEXT:    sdiv w16, w15, w14
2109; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2110; CHECK-SD-NEXT:    mov v0.h[1], w8
2111; CHECK-SD-NEXT:    sdiv w9, w18, w17
2112; CHECK-SD-NEXT:    msub w8, w16, w14, w15
2113; CHECK-SD-NEXT:    mov v0.h[2], w8
2114; CHECK-SD-NEXT:    msub w8, w9, w17, w18
2115; CHECK-SD-NEXT:    mov v0.h[3], w8
2116; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2117; CHECK-SD-NEXT:    ret
2118;
2119; CHECK-GI-LABEL: sv4i16:
2120; CHECK-GI:       // %bb.0: // %entry
2121; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
2122; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
2123; CHECK-GI-NEXT:    fmov w8, s0
2124; CHECK-GI-NEXT:    fmov w9, s1
2125; CHECK-GI-NEXT:    mov w10, v1.s[1]
2126; CHECK-GI-NEXT:    mov w11, v1.s[2]
2127; CHECK-GI-NEXT:    mov w12, v1.s[3]
2128; CHECK-GI-NEXT:    sdiv w8, w8, w9
2129; CHECK-GI-NEXT:    mov w9, v0.s[1]
2130; CHECK-GI-NEXT:    sdiv w9, w9, w10
2131; CHECK-GI-NEXT:    mov w10, v0.s[2]
2132; CHECK-GI-NEXT:    mov v2.s[0], w8
2133; CHECK-GI-NEXT:    sdiv w10, w10, w11
2134; CHECK-GI-NEXT:    mov w11, v0.s[3]
2135; CHECK-GI-NEXT:    mov v2.s[1], w9
2136; CHECK-GI-NEXT:    sdiv w8, w11, w12
2137; CHECK-GI-NEXT:    mov v2.s[2], w10
2138; CHECK-GI-NEXT:    mov v2.s[3], w8
2139; CHECK-GI-NEXT:    mls v0.4s, v2.4s, v1.4s
2140; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
2141; CHECK-GI-NEXT:    ret
2142entry:
2143  %s = srem <4 x i16> %d, %e
2144  ret <4 x i16> %s
2145}
2146
2147define <8 x i16> @sv8i16(<8 x i16> %d, <8 x i16> %e) {
2148; CHECK-SD-LABEL: sv8i16:
2149; CHECK-SD:       // %bb.0: // %entry
2150; CHECK-SD-NEXT:    smov w11, v1.h[0]
2151; CHECK-SD-NEXT:    smov w12, v0.h[0]
2152; CHECK-SD-NEXT:    smov w8, v1.h[1]
2153; CHECK-SD-NEXT:    smov w9, v0.h[1]
2154; CHECK-SD-NEXT:    smov w14, v1.h[2]
2155; CHECK-SD-NEXT:    smov w15, v0.h[2]
2156; CHECK-SD-NEXT:    smov w17, v1.h[3]
2157; CHECK-SD-NEXT:    smov w18, v0.h[3]
2158; CHECK-SD-NEXT:    smov w1, v1.h[4]
2159; CHECK-SD-NEXT:    smov w2, v0.h[4]
2160; CHECK-SD-NEXT:    smov w4, v1.h[5]
2161; CHECK-SD-NEXT:    smov w5, v0.h[5]
2162; CHECK-SD-NEXT:    sdiv w13, w12, w11
2163; CHECK-SD-NEXT:    sdiv w10, w9, w8
2164; CHECK-SD-NEXT:    msub w11, w13, w11, w12
2165; CHECK-SD-NEXT:    smov w13, v1.h[7]
2166; CHECK-SD-NEXT:    fmov s2, w11
2167; CHECK-SD-NEXT:    smov w11, v0.h[6]
2168; CHECK-SD-NEXT:    sdiv w16, w15, w14
2169; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2170; CHECK-SD-NEXT:    smov w10, v1.h[6]
2171; CHECK-SD-NEXT:    mov v2.h[1], w8
2172; CHECK-SD-NEXT:    sdiv w0, w18, w17
2173; CHECK-SD-NEXT:    msub w8, w16, w14, w15
2174; CHECK-SD-NEXT:    smov w14, v0.h[7]
2175; CHECK-SD-NEXT:    mov v2.h[2], w8
2176; CHECK-SD-NEXT:    sdiv w3, w2, w1
2177; CHECK-SD-NEXT:    msub w8, w0, w17, w18
2178; CHECK-SD-NEXT:    mov v2.h[3], w8
2179; CHECK-SD-NEXT:    sdiv w9, w5, w4
2180; CHECK-SD-NEXT:    msub w8, w3, w1, w2
2181; CHECK-SD-NEXT:    mov v2.h[4], w8
2182; CHECK-SD-NEXT:    sdiv w12, w11, w10
2183; CHECK-SD-NEXT:    msub w8, w9, w4, w5
2184; CHECK-SD-NEXT:    mov v2.h[5], w8
2185; CHECK-SD-NEXT:    sdiv w9, w14, w13
2186; CHECK-SD-NEXT:    msub w8, w12, w10, w11
2187; CHECK-SD-NEXT:    mov v2.h[6], w8
2188; CHECK-SD-NEXT:    msub w8, w9, w13, w14
2189; CHECK-SD-NEXT:    mov v2.h[7], w8
2190; CHECK-SD-NEXT:    mov v0.16b, v2.16b
2191; CHECK-SD-NEXT:    ret
2192;
2193; CHECK-GI-LABEL: sv8i16:
2194; CHECK-GI:       // %bb.0: // %entry
2195; CHECK-GI-NEXT:    sshll v2.4s, v0.4h, #0
2196; CHECK-GI-NEXT:    sshll v3.4s, v1.4h, #0
2197; CHECK-GI-NEXT:    sshll2 v0.4s, v0.8h, #0
2198; CHECK-GI-NEXT:    sshll2 v1.4s, v1.8h, #0
2199; CHECK-GI-NEXT:    fmov w8, s2
2200; CHECK-GI-NEXT:    fmov w9, s3
2201; CHECK-GI-NEXT:    mov w10, v3.s[1]
2202; CHECK-GI-NEXT:    mov w11, v3.s[2]
2203; CHECK-GI-NEXT:    mov w12, v3.s[3]
2204; CHECK-GI-NEXT:    fmov w13, s1
2205; CHECK-GI-NEXT:    mov w14, v1.s[1]
2206; CHECK-GI-NEXT:    mov w15, v1.s[2]
2207; CHECK-GI-NEXT:    sdiv w8, w8, w9
2208; CHECK-GI-NEXT:    mov w9, v2.s[1]
2209; CHECK-GI-NEXT:    sdiv w9, w9, w10
2210; CHECK-GI-NEXT:    mov w10, v2.s[2]
2211; CHECK-GI-NEXT:    mov v4.s[0], w8
2212; CHECK-GI-NEXT:    mov w8, v0.s[3]
2213; CHECK-GI-NEXT:    sdiv w10, w10, w11
2214; CHECK-GI-NEXT:    mov w11, v2.s[3]
2215; CHECK-GI-NEXT:    mov v4.s[1], w9
2216; CHECK-GI-NEXT:    sdiv w11, w11, w12
2217; CHECK-GI-NEXT:    fmov w12, s0
2218; CHECK-GI-NEXT:    mov v4.s[2], w10
2219; CHECK-GI-NEXT:    sdiv w12, w12, w13
2220; CHECK-GI-NEXT:    mov w13, v0.s[1]
2221; CHECK-GI-NEXT:    mov v4.s[3], w11
2222; CHECK-GI-NEXT:    mls v2.4s, v4.4s, v3.4s
2223; CHECK-GI-NEXT:    sdiv w13, w13, w14
2224; CHECK-GI-NEXT:    mov w14, v0.s[2]
2225; CHECK-GI-NEXT:    mov v5.s[0], w12
2226; CHECK-GI-NEXT:    mov w12, v1.s[3]
2227; CHECK-GI-NEXT:    sdiv w14, w14, w15
2228; CHECK-GI-NEXT:    mov v5.s[1], w13
2229; CHECK-GI-NEXT:    sdiv w8, w8, w12
2230; CHECK-GI-NEXT:    mov v5.s[2], w14
2231; CHECK-GI-NEXT:    mov v5.s[3], w8
2232; CHECK-GI-NEXT:    mls v0.4s, v5.4s, v1.4s
2233; CHECK-GI-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
2234; CHECK-GI-NEXT:    ret
2235entry:
2236  %s = srem <8 x i16> %d, %e
2237  ret <8 x i16> %s
2238}
2239
2240define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) {
2241; CHECK-SD-LABEL: sv16i16:
2242; CHECK-SD:       // %bb.0: // %entry
2243; CHECK-SD-NEXT:    sub sp, sp, #160
2244; CHECK-SD-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
2245; CHECK-SD-NEXT:    stp x28, x27, [sp, #80] // 16-byte Folded Spill
2246; CHECK-SD-NEXT:    stp x26, x25, [sp, #96] // 16-byte Folded Spill
2247; CHECK-SD-NEXT:    stp x24, x23, [sp, #112] // 16-byte Folded Spill
2248; CHECK-SD-NEXT:    stp x22, x21, [sp, #128] // 16-byte Folded Spill
2249; CHECK-SD-NEXT:    stp x20, x19, [sp, #144] // 16-byte Folded Spill
2250; CHECK-SD-NEXT:    .cfi_def_cfa_offset 160
2251; CHECK-SD-NEXT:    .cfi_offset w19, -8
2252; CHECK-SD-NEXT:    .cfi_offset w20, -16
2253; CHECK-SD-NEXT:    .cfi_offset w21, -24
2254; CHECK-SD-NEXT:    .cfi_offset w22, -32
2255; CHECK-SD-NEXT:    .cfi_offset w23, -40
2256; CHECK-SD-NEXT:    .cfi_offset w24, -48
2257; CHECK-SD-NEXT:    .cfi_offset w25, -56
2258; CHECK-SD-NEXT:    .cfi_offset w26, -64
2259; CHECK-SD-NEXT:    .cfi_offset w27, -72
2260; CHECK-SD-NEXT:    .cfi_offset w28, -80
2261; CHECK-SD-NEXT:    .cfi_offset w30, -88
2262; CHECK-SD-NEXT:    .cfi_offset w29, -96
2263; CHECK-SD-NEXT:    smov w8, v2.h[1]
2264; CHECK-SD-NEXT:    smov w9, v0.h[1]
2265; CHECK-SD-NEXT:    smov w19, v2.h[2]
2266; CHECK-SD-NEXT:    smov w22, v0.h[2]
2267; CHECK-SD-NEXT:    smov w1, v2.h[0]
2268; CHECK-SD-NEXT:    smov w3, v0.h[0]
2269; CHECK-SD-NEXT:    smov w7, v2.h[3]
2270; CHECK-SD-NEXT:    smov w18, v0.h[3]
2271; CHECK-SD-NEXT:    smov w4, v0.h[6]
2272; CHECK-SD-NEXT:    smov w0, v2.h[4]
2273; CHECK-SD-NEXT:    smov w5, v0.h[4]
2274; CHECK-SD-NEXT:    smov w2, v2.h[7]
2275; CHECK-SD-NEXT:    str w8, [sp, #52] // 4-byte Folded Spill
2276; CHECK-SD-NEXT:    smov w6, v0.h[7]
2277; CHECK-SD-NEXT:    smov w27, v3.h[0]
2278; CHECK-SD-NEXT:    str w9, [sp, #44] // 4-byte Folded Spill
2279; CHECK-SD-NEXT:    sdiv w9, w9, w8
2280; CHECK-SD-NEXT:    smov w28, v1.h[0]
2281; CHECK-SD-NEXT:    smov w24, v3.h[1]
2282; CHECK-SD-NEXT:    smov w25, v1.h[1]
2283; CHECK-SD-NEXT:    ldr w21, [sp, #52] // 4-byte Folded Reload
2284; CHECK-SD-NEXT:    ldr w23, [sp, #44] // 4-byte Folded Reload
2285; CHECK-SD-NEXT:    smov w30, v3.h[2]
2286; CHECK-SD-NEXT:    smov w12, v3.h[3]
2287; CHECK-SD-NEXT:    smov w11, v1.h[3]
2288; CHECK-SD-NEXT:    smov w14, v3.h[5]
2289; CHECK-SD-NEXT:    smov w13, v1.h[5]
2290; CHECK-SD-NEXT:    sdiv w8, w22, w19
2291; CHECK-SD-NEXT:    str w9, [sp, #60] // 4-byte Folded Spill
2292; CHECK-SD-NEXT:    ldr w20, [sp, #60] // 4-byte Folded Reload
2293; CHECK-SD-NEXT:    msub w21, w20, w21, w23
2294; CHECK-SD-NEXT:    sdiv w9, w3, w1
2295; CHECK-SD-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
2296; CHECK-SD-NEXT:    sdiv w8, w18, w7
2297; CHECK-SD-NEXT:    stp w9, w8, [sp, #24] // 8-byte Folded Spill
2298; CHECK-SD-NEXT:    smov w8, v2.h[5]
2299; CHECK-SD-NEXT:    smov w9, v0.h[5]
2300; CHECK-SD-NEXT:    sdiv w10, w5, w0
2301; CHECK-SD-NEXT:    ldr w20, [sp, #24] // 4-byte Folded Reload
2302; CHECK-SD-NEXT:    msub w1, w20, w1, w3
2303; CHECK-SD-NEXT:    str w9, [sp, #40] // 4-byte Folded Spill
2304; CHECK-SD-NEXT:    str w8, [sp, #48] // 4-byte Folded Spill
2305; CHECK-SD-NEXT:    fmov s0, w1
2306; CHECK-SD-NEXT:    ldr w1, [sp, #12] // 4-byte Folded Reload
2307; CHECK-SD-NEXT:    msub w1, w1, w19, w22
2308; CHECK-SD-NEXT:    ldr w19, [sp, #28] // 4-byte Folded Reload
2309; CHECK-SD-NEXT:    sdiv w9, w9, w8
2310; CHECK-SD-NEXT:    smov w8, v2.h[6]
2311; CHECK-SD-NEXT:    mov v0.h[1], w21
2312; CHECK-SD-NEXT:    msub w18, w19, w7, w18
2313; CHECK-SD-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
2314; CHECK-SD-NEXT:    ldp x22, x21, [sp, #128] // 16-byte Folded Reload
2315; CHECK-SD-NEXT:    mov v0.h[2], w1
2316; CHECK-SD-NEXT:    str w9, [sp, #56] // 4-byte Folded Spill
2317; CHECK-SD-NEXT:    sdiv w9, w4, w8
2318; CHECK-SD-NEXT:    mov v0.h[3], w18
2319; CHECK-SD-NEXT:    ldr w18, [sp, #40] // 4-byte Folded Reload
2320; CHECK-SD-NEXT:    stp w8, w9, [sp, #32] // 8-byte Folded Spill
2321; CHECK-SD-NEXT:    sdiv w8, w6, w2
2322; CHECK-SD-NEXT:    smov w9, v1.h[4]
2323; CHECK-SD-NEXT:    sdiv w29, w28, w27
2324; CHECK-SD-NEXT:    stp w8, w10, [sp, #16] // 8-byte Folded Spill
2325; CHECK-SD-NEXT:    smov w8, v1.h[2]
2326; CHECK-SD-NEXT:    smov w10, v3.h[4]
2327; CHECK-SD-NEXT:    sdiv w26, w25, w24
2328; CHECK-SD-NEXT:    msub w3, w29, w27, w28
2329; CHECK-SD-NEXT:    ldp x28, x27, [sp, #80] // 16-byte Folded Reload
2330; CHECK-SD-NEXT:    fmov s2, w3
2331; CHECK-SD-NEXT:    smov w3, v1.h[6]
2332; CHECK-SD-NEXT:    sdiv w15, w8, w30
2333; CHECK-SD-NEXT:    msub w24, w26, w24, w25
2334; CHECK-SD-NEXT:    mov v2.h[1], w24
2335; CHECK-SD-NEXT:    ldp x24, x23, [sp, #112] // 16-byte Folded Reload
2336; CHECK-SD-NEXT:    sdiv w17, w11, w12
2337; CHECK-SD-NEXT:    msub w8, w15, w30, w8
2338; CHECK-SD-NEXT:    smov w15, v3.h[6]
2339; CHECK-SD-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
2340; CHECK-SD-NEXT:    mov v2.h[2], w8
2341; CHECK-SD-NEXT:    sdiv w16, w9, w10
2342; CHECK-SD-NEXT:    msub w8, w17, w12, w11
2343; CHECK-SD-NEXT:    ldr w12, [sp, #20] // 4-byte Folded Reload
2344; CHECK-SD-NEXT:    ldr w17, [sp, #48] // 4-byte Folded Reload
2345; CHECK-SD-NEXT:    msub w12, w12, w0, w5
2346; CHECK-SD-NEXT:    mov v2.h[3], w8
2347; CHECK-SD-NEXT:    mov v0.h[4], w12
2348; CHECK-SD-NEXT:    sdiv w25, w13, w14
2349; CHECK-SD-NEXT:    msub w8, w16, w10, w9
2350; CHECK-SD-NEXT:    smov w9, v3.h[7]
2351; CHECK-SD-NEXT:    smov w10, v1.h[7]
2352; CHECK-SD-NEXT:    ldr w16, [sp, #56] // 4-byte Folded Reload
2353; CHECK-SD-NEXT:    mov v2.h[4], w8
2354; CHECK-SD-NEXT:    msub w16, w16, w17, w18
2355; CHECK-SD-NEXT:    mov v0.h[5], w16
2356; CHECK-SD-NEXT:    sdiv w11, w3, w15
2357; CHECK-SD-NEXT:    msub w8, w25, w14, w13
2358; CHECK-SD-NEXT:    ldp w14, w13, [sp, #32] // 8-byte Folded Reload
2359; CHECK-SD-NEXT:    ldp x26, x25, [sp, #96] // 16-byte Folded Reload
2360; CHECK-SD-NEXT:    mov v2.h[5], w8
2361; CHECK-SD-NEXT:    msub w13, w13, w14, w4
2362; CHECK-SD-NEXT:    mov v0.h[6], w13
2363; CHECK-SD-NEXT:    sdiv w12, w10, w9
2364; CHECK-SD-NEXT:    msub w8, w11, w15, w3
2365; CHECK-SD-NEXT:    ldr w11, [sp, #16] // 4-byte Folded Reload
2366; CHECK-SD-NEXT:    msub w11, w11, w2, w6
2367; CHECK-SD-NEXT:    mov v2.h[6], w8
2368; CHECK-SD-NEXT:    mov v0.h[7], w11
2369; CHECK-SD-NEXT:    msub w8, w12, w9, w10
2370; CHECK-SD-NEXT:    mov v2.h[7], w8
2371; CHECK-SD-NEXT:    mov v1.16b, v2.16b
2372; CHECK-SD-NEXT:    add sp, sp, #160
2373; CHECK-SD-NEXT:    ret
2374;
2375; CHECK-GI-LABEL: sv16i16:
2376; CHECK-GI:       // %bb.0: // %entry
2377; CHECK-GI-NEXT:    sshll v4.4s, v0.4h, #0
2378; CHECK-GI-NEXT:    sshll v5.4s, v2.4h, #0
2379; CHECK-GI-NEXT:    sshll v6.4s, v1.4h, #0
2380; CHECK-GI-NEXT:    sshll v7.4s, v3.4h, #0
2381; CHECK-GI-NEXT:    fmov w8, s4
2382; CHECK-GI-NEXT:    fmov w9, s5
2383; CHECK-GI-NEXT:    mov w12, v5.s[3]
2384; CHECK-GI-NEXT:    fmov w17, s7
2385; CHECK-GI-NEXT:    mov w18, v7.s[1]
2386; CHECK-GI-NEXT:    mov w0, v7.s[2]
2387; CHECK-GI-NEXT:    mov w1, v7.s[3]
2388; CHECK-GI-NEXT:    sshll2 v7.4s, v3.8h, #0
2389; CHECK-GI-NEXT:    sshll v3.4s, v3.4h, #0
2390; CHECK-GI-NEXT:    sdiv w11, w8, w9
2391; CHECK-GI-NEXT:    mov w8, v4.s[1]
2392; CHECK-GI-NEXT:    mov w9, v5.s[1]
2393; CHECK-GI-NEXT:    fmov w2, s7
2394; CHECK-GI-NEXT:    mov w3, v7.s[1]
2395; CHECK-GI-NEXT:    mov w4, v7.s[2]
2396; CHECK-GI-NEXT:    sdiv w10, w8, w9
2397; CHECK-GI-NEXT:    mov w8, v4.s[2]
2398; CHECK-GI-NEXT:    mov w9, v5.s[2]
2399; CHECK-GI-NEXT:    sshll2 v5.4s, v2.8h, #0
2400; CHECK-GI-NEXT:    mov v16.s[0], w11
2401; CHECK-GI-NEXT:    sshll v2.4s, v2.4h, #0
2402; CHECK-GI-NEXT:    fmov w13, s5
2403; CHECK-GI-NEXT:    mov w14, v5.s[1]
2404; CHECK-GI-NEXT:    mov w15, v5.s[2]
2405; CHECK-GI-NEXT:    mov w16, v5.s[3]
2406; CHECK-GI-NEXT:    sdiv w9, w8, w9
2407; CHECK-GI-NEXT:    mov w8, v4.s[3]
2408; CHECK-GI-NEXT:    sshll2 v4.4s, v0.8h, #0
2409; CHECK-GI-NEXT:    mov v16.s[1], w10
2410; CHECK-GI-NEXT:    sshll v0.4s, v0.4h, #0
2411; CHECK-GI-NEXT:    sdiv w8, w8, w12
2412; CHECK-GI-NEXT:    fmov w12, s4
2413; CHECK-GI-NEXT:    mov v16.s[2], w9
2414; CHECK-GI-NEXT:    sdiv w13, w12, w13
2415; CHECK-GI-NEXT:    mov w12, v4.s[1]
2416; CHECK-GI-NEXT:    mov v16.s[3], w8
2417; CHECK-GI-NEXT:    mls v0.4s, v16.4s, v2.4s
2418; CHECK-GI-NEXT:    sdiv w14, w12, w14
2419; CHECK-GI-NEXT:    mov w12, v4.s[2]
2420; CHECK-GI-NEXT:    mov v17.s[0], w13
2421; CHECK-GI-NEXT:    mov w13, v7.s[3]
2422; CHECK-GI-NEXT:    sdiv w15, w12, w15
2423; CHECK-GI-NEXT:    mov w12, v4.s[3]
2424; CHECK-GI-NEXT:    mov v17.s[1], w14
2425; CHECK-GI-NEXT:    sdiv w12, w12, w16
2426; CHECK-GI-NEXT:    fmov w16, s6
2427; CHECK-GI-NEXT:    mov v17.s[2], w15
2428; CHECK-GI-NEXT:    sdiv w16, w16, w17
2429; CHECK-GI-NEXT:    mov w17, v6.s[1]
2430; CHECK-GI-NEXT:    mov v17.s[3], w12
2431; CHECK-GI-NEXT:    mls v4.4s, v17.4s, v5.4s
2432; CHECK-GI-NEXT:    sdiv w17, w17, w18
2433; CHECK-GI-NEXT:    mov w18, v6.s[2]
2434; CHECK-GI-NEXT:    mov v18.s[0], w16
2435; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
2436; CHECK-GI-NEXT:    sdiv w18, w18, w0
2437; CHECK-GI-NEXT:    mov w0, v6.s[3]
2438; CHECK-GI-NEXT:    sshll2 v6.4s, v1.8h, #0
2439; CHECK-GI-NEXT:    mov v18.s[1], w17
2440; CHECK-GI-NEXT:    sshll v1.4s, v1.4h, #0
2441; CHECK-GI-NEXT:    mov w11, v6.s[3]
2442; CHECK-GI-NEXT:    sdiv w0, w0, w1
2443; CHECK-GI-NEXT:    fmov w1, s6
2444; CHECK-GI-NEXT:    mov v18.s[2], w18
2445; CHECK-GI-NEXT:    sdiv w1, w1, w2
2446; CHECK-GI-NEXT:    mov w2, v6.s[1]
2447; CHECK-GI-NEXT:    mov v18.s[3], w0
2448; CHECK-GI-NEXT:    mls v1.4s, v18.4s, v3.4s
2449; CHECK-GI-NEXT:    sdiv w2, w2, w3
2450; CHECK-GI-NEXT:    mov w3, v6.s[2]
2451; CHECK-GI-NEXT:    mov v19.s[0], w1
2452; CHECK-GI-NEXT:    sdiv w3, w3, w4
2453; CHECK-GI-NEXT:    mov v19.s[1], w2
2454; CHECK-GI-NEXT:    sdiv w10, w11, w13
2455; CHECK-GI-NEXT:    mov v19.s[2], w3
2456; CHECK-GI-NEXT:    mov v19.s[3], w10
2457; CHECK-GI-NEXT:    mls v6.4s, v19.4s, v7.4s
2458; CHECK-GI-NEXT:    uzp1 v1.8h, v1.8h, v6.8h
2459; CHECK-GI-NEXT:    ret
2460entry:
2461  %s = srem <16 x i16> %d, %e
2462  ret <16 x i16> %s
2463}
2464
2465define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) {
2466; CHECK-SD-LABEL: uv2i16:
2467; CHECK-SD:       // %bb.0: // %entry
2468; CHECK-SD-NEXT:    movi d2, #0x00ffff0000ffff
2469; CHECK-SD-NEXT:    and v0.8b, v0.8b, v2.8b
2470; CHECK-SD-NEXT:    and v1.8b, v1.8b, v2.8b
2471; CHECK-SD-NEXT:    fmov w8, s1
2472; CHECK-SD-NEXT:    fmov w9, s0
2473; CHECK-SD-NEXT:    mov w11, v1.s[1]
2474; CHECK-SD-NEXT:    mov w12, v0.s[1]
2475; CHECK-SD-NEXT:    udiv w10, w9, w8
2476; CHECK-SD-NEXT:    udiv w13, w12, w11
2477; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2478; CHECK-SD-NEXT:    fmov s0, w8
2479; CHECK-SD-NEXT:    msub w9, w13, w11, w12
2480; CHECK-SD-NEXT:    mov v0.s[1], w9
2481; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2482; CHECK-SD-NEXT:    ret
2483;
2484; CHECK-GI-LABEL: uv2i16:
2485; CHECK-GI:       // %bb.0: // %entry
2486; CHECK-GI-NEXT:    movi d2, #0x00ffff0000ffff
2487; CHECK-GI-NEXT:    and v0.8b, v0.8b, v2.8b
2488; CHECK-GI-NEXT:    and v1.8b, v1.8b, v2.8b
2489; CHECK-GI-NEXT:    fmov w8, s0
2490; CHECK-GI-NEXT:    fmov w9, s1
2491; CHECK-GI-NEXT:    mov w10, v1.s[1]
2492; CHECK-GI-NEXT:    udiv w8, w8, w9
2493; CHECK-GI-NEXT:    mov w9, v0.s[1]
2494; CHECK-GI-NEXT:    udiv w9, w9, w10
2495; CHECK-GI-NEXT:    mov v2.s[0], w8
2496; CHECK-GI-NEXT:    mov v2.s[1], w9
2497; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
2498; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
2499; CHECK-GI-NEXT:    ret
2500entry:
2501  %s = urem <2 x i16> %d, %e
2502  ret <2 x i16> %s
2503}
2504
2505define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
2506; CHECK-SD-LABEL: uv3i16:
2507; CHECK-SD:       // %bb.0: // %entry
2508; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
2509; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
2510; CHECK-SD-NEXT:    umov w11, v1.h[0]
2511; CHECK-SD-NEXT:    umov w12, v0.h[0]
2512; CHECK-SD-NEXT:    umov w8, v1.h[1]
2513; CHECK-SD-NEXT:    umov w9, v0.h[1]
2514; CHECK-SD-NEXT:    umov w13, v0.h[2]
2515; CHECK-SD-NEXT:    umov w14, v1.h[0]
2516; CHECK-SD-NEXT:    umov w16, v0.h[0]
2517; CHECK-SD-NEXT:    udiv w11, w12, w11
2518; CHECK-SD-NEXT:    umov w12, v1.h[2]
2519; CHECK-SD-NEXT:    udiv w10, w9, w8
2520; CHECK-SD-NEXT:    msub w11, w11, w14, w16
2521; CHECK-SD-NEXT:    udiv w15, w13, w12
2522; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2523; CHECK-SD-NEXT:    sxth w9, w11
2524; CHECK-SD-NEXT:    fmov s0, w9
2525; CHECK-SD-NEXT:    sxth w8, w8
2526; CHECK-SD-NEXT:    mov v0.h[1], w8
2527; CHECK-SD-NEXT:    msub w10, w15, w12, w13
2528; CHECK-SD-NEXT:    sxth w8, w10
2529; CHECK-SD-NEXT:    mov v0.h[2], w8
2530; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2531; CHECK-SD-NEXT:    ret
2532;
2533; CHECK-GI-LABEL: uv3i16:
2534; CHECK-GI:       // %bb.0: // %entry
2535; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
2536; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
2537; CHECK-GI-NEXT:    umov w8, v0.h[0]
2538; CHECK-GI-NEXT:    umov w9, v1.h[0]
2539; CHECK-GI-NEXT:    umov w11, v0.h[1]
2540; CHECK-GI-NEXT:    umov w12, v1.h[1]
2541; CHECK-GI-NEXT:    umov w14, v0.h[2]
2542; CHECK-GI-NEXT:    umov w15, v1.h[2]
2543; CHECK-GI-NEXT:    udiv w10, w8, w9
2544; CHECK-GI-NEXT:    udiv w13, w11, w12
2545; CHECK-GI-NEXT:    msub w8, w10, w9, w8
2546; CHECK-GI-NEXT:    fmov s0, w8
2547; CHECK-GI-NEXT:    udiv w16, w14, w15
2548; CHECK-GI-NEXT:    msub w9, w13, w12, w11
2549; CHECK-GI-NEXT:    mov v0.h[1], w9
2550; CHECK-GI-NEXT:    msub w8, w16, w15, w14
2551; CHECK-GI-NEXT:    mov v0.h[2], w8
2552; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
2553; CHECK-GI-NEXT:    ret
2554entry:
2555  %s = urem <3 x i16> %d, %e
2556  ret <3 x i16> %s
2557}
2558
2559define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) {
2560; CHECK-SD-LABEL: uv4i16:
2561; CHECK-SD:       // %bb.0: // %entry
2562; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
2563; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
2564; CHECK-SD-NEXT:    umov w11, v1.h[0]
2565; CHECK-SD-NEXT:    umov w12, v0.h[0]
2566; CHECK-SD-NEXT:    umov w8, v1.h[1]
2567; CHECK-SD-NEXT:    umov w9, v0.h[1]
2568; CHECK-SD-NEXT:    umov w14, v1.h[2]
2569; CHECK-SD-NEXT:    umov w15, v0.h[2]
2570; CHECK-SD-NEXT:    umov w17, v1.h[3]
2571; CHECK-SD-NEXT:    umov w18, v0.h[3]
2572; CHECK-SD-NEXT:    udiv w13, w12, w11
2573; CHECK-SD-NEXT:    udiv w10, w9, w8
2574; CHECK-SD-NEXT:    msub w11, w13, w11, w12
2575; CHECK-SD-NEXT:    fmov s0, w11
2576; CHECK-SD-NEXT:    udiv w16, w15, w14
2577; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2578; CHECK-SD-NEXT:    mov v0.h[1], w8
2579; CHECK-SD-NEXT:    udiv w9, w18, w17
2580; CHECK-SD-NEXT:    msub w8, w16, w14, w15
2581; CHECK-SD-NEXT:    mov v0.h[2], w8
2582; CHECK-SD-NEXT:    msub w8, w9, w17, w18
2583; CHECK-SD-NEXT:    mov v0.h[3], w8
2584; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2585; CHECK-SD-NEXT:    ret
2586;
2587; CHECK-GI-LABEL: uv4i16:
2588; CHECK-GI:       // %bb.0: // %entry
2589; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
2590; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
2591; CHECK-GI-NEXT:    fmov w8, s0
2592; CHECK-GI-NEXT:    fmov w9, s1
2593; CHECK-GI-NEXT:    mov w10, v1.s[1]
2594; CHECK-GI-NEXT:    mov w11, v1.s[2]
2595; CHECK-GI-NEXT:    mov w12, v1.s[3]
2596; CHECK-GI-NEXT:    udiv w8, w8, w9
2597; CHECK-GI-NEXT:    mov w9, v0.s[1]
2598; CHECK-GI-NEXT:    udiv w9, w9, w10
2599; CHECK-GI-NEXT:    mov w10, v0.s[2]
2600; CHECK-GI-NEXT:    mov v2.s[0], w8
2601; CHECK-GI-NEXT:    udiv w10, w10, w11
2602; CHECK-GI-NEXT:    mov w11, v0.s[3]
2603; CHECK-GI-NEXT:    mov v2.s[1], w9
2604; CHECK-GI-NEXT:    udiv w8, w11, w12
2605; CHECK-GI-NEXT:    mov v2.s[2], w10
2606; CHECK-GI-NEXT:    mov v2.s[3], w8
2607; CHECK-GI-NEXT:    mls v0.4s, v2.4s, v1.4s
2608; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
2609; CHECK-GI-NEXT:    ret
2610entry:
2611  %s = urem <4 x i16> %d, %e
2612  ret <4 x i16> %s
2613}
2614
2615define <8 x i16> @uv8i16(<8 x i16> %d, <8 x i16> %e) {
2616; CHECK-SD-LABEL: uv8i16:
2617; CHECK-SD:       // %bb.0: // %entry
2618; CHECK-SD-NEXT:    umov w11, v1.h[0]
2619; CHECK-SD-NEXT:    umov w12, v0.h[0]
2620; CHECK-SD-NEXT:    umov w8, v1.h[1]
2621; CHECK-SD-NEXT:    umov w9, v0.h[1]
2622; CHECK-SD-NEXT:    umov w14, v1.h[2]
2623; CHECK-SD-NEXT:    umov w15, v0.h[2]
2624; CHECK-SD-NEXT:    umov w17, v1.h[3]
2625; CHECK-SD-NEXT:    umov w18, v0.h[3]
2626; CHECK-SD-NEXT:    umov w1, v1.h[4]
2627; CHECK-SD-NEXT:    umov w2, v0.h[4]
2628; CHECK-SD-NEXT:    umov w4, v1.h[5]
2629; CHECK-SD-NEXT:    umov w5, v0.h[5]
2630; CHECK-SD-NEXT:    udiv w13, w12, w11
2631; CHECK-SD-NEXT:    udiv w10, w9, w8
2632; CHECK-SD-NEXT:    msub w11, w13, w11, w12
2633; CHECK-SD-NEXT:    umov w13, v1.h[7]
2634; CHECK-SD-NEXT:    fmov s2, w11
2635; CHECK-SD-NEXT:    umov w11, v0.h[6]
2636; CHECK-SD-NEXT:    udiv w16, w15, w14
2637; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2638; CHECK-SD-NEXT:    umov w10, v1.h[6]
2639; CHECK-SD-NEXT:    mov v2.h[1], w8
2640; CHECK-SD-NEXT:    udiv w0, w18, w17
2641; CHECK-SD-NEXT:    msub w8, w16, w14, w15
2642; CHECK-SD-NEXT:    umov w14, v0.h[7]
2643; CHECK-SD-NEXT:    mov v2.h[2], w8
2644; CHECK-SD-NEXT:    udiv w3, w2, w1
2645; CHECK-SD-NEXT:    msub w8, w0, w17, w18
2646; CHECK-SD-NEXT:    mov v2.h[3], w8
2647; CHECK-SD-NEXT:    udiv w9, w5, w4
2648; CHECK-SD-NEXT:    msub w8, w3, w1, w2
2649; CHECK-SD-NEXT:    mov v2.h[4], w8
2650; CHECK-SD-NEXT:    udiv w12, w11, w10
2651; CHECK-SD-NEXT:    msub w8, w9, w4, w5
2652; CHECK-SD-NEXT:    mov v2.h[5], w8
2653; CHECK-SD-NEXT:    udiv w9, w14, w13
2654; CHECK-SD-NEXT:    msub w8, w12, w10, w11
2655; CHECK-SD-NEXT:    mov v2.h[6], w8
2656; CHECK-SD-NEXT:    msub w8, w9, w13, w14
2657; CHECK-SD-NEXT:    mov v2.h[7], w8
2658; CHECK-SD-NEXT:    mov v0.16b, v2.16b
2659; CHECK-SD-NEXT:    ret
2660;
2661; CHECK-GI-LABEL: uv8i16:
2662; CHECK-GI:       // %bb.0: // %entry
2663; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
2664; CHECK-GI-NEXT:    ushll v3.4s, v1.4h, #0
2665; CHECK-GI-NEXT:    ushll2 v0.4s, v0.8h, #0
2666; CHECK-GI-NEXT:    ushll2 v1.4s, v1.8h, #0
2667; CHECK-GI-NEXT:    fmov w8, s2
2668; CHECK-GI-NEXT:    fmov w9, s3
2669; CHECK-GI-NEXT:    mov w10, v3.s[1]
2670; CHECK-GI-NEXT:    mov w11, v3.s[2]
2671; CHECK-GI-NEXT:    mov w12, v3.s[3]
2672; CHECK-GI-NEXT:    fmov w13, s1
2673; CHECK-GI-NEXT:    mov w14, v1.s[1]
2674; CHECK-GI-NEXT:    mov w15, v1.s[2]
2675; CHECK-GI-NEXT:    udiv w8, w8, w9
2676; CHECK-GI-NEXT:    mov w9, v2.s[1]
2677; CHECK-GI-NEXT:    udiv w9, w9, w10
2678; CHECK-GI-NEXT:    mov w10, v2.s[2]
2679; CHECK-GI-NEXT:    mov v4.s[0], w8
2680; CHECK-GI-NEXT:    mov w8, v0.s[3]
2681; CHECK-GI-NEXT:    udiv w10, w10, w11
2682; CHECK-GI-NEXT:    mov w11, v2.s[3]
2683; CHECK-GI-NEXT:    mov v4.s[1], w9
2684; CHECK-GI-NEXT:    udiv w11, w11, w12
2685; CHECK-GI-NEXT:    fmov w12, s0
2686; CHECK-GI-NEXT:    mov v4.s[2], w10
2687; CHECK-GI-NEXT:    udiv w12, w12, w13
2688; CHECK-GI-NEXT:    mov w13, v0.s[1]
2689; CHECK-GI-NEXT:    mov v4.s[3], w11
2690; CHECK-GI-NEXT:    mls v2.4s, v4.4s, v3.4s
2691; CHECK-GI-NEXT:    udiv w13, w13, w14
2692; CHECK-GI-NEXT:    mov w14, v0.s[2]
2693; CHECK-GI-NEXT:    mov v5.s[0], w12
2694; CHECK-GI-NEXT:    mov w12, v1.s[3]
2695; CHECK-GI-NEXT:    udiv w14, w14, w15
2696; CHECK-GI-NEXT:    mov v5.s[1], w13
2697; CHECK-GI-NEXT:    udiv w8, w8, w12
2698; CHECK-GI-NEXT:    mov v5.s[2], w14
2699; CHECK-GI-NEXT:    mov v5.s[3], w8
2700; CHECK-GI-NEXT:    mls v0.4s, v5.4s, v1.4s
2701; CHECK-GI-NEXT:    uzp1 v0.8h, v2.8h, v0.8h
2702; CHECK-GI-NEXT:    ret
2703entry:
2704  %s = urem <8 x i16> %d, %e
2705  ret <8 x i16> %s
2706}
2707
2708define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) {
2709; CHECK-SD-LABEL: uv16i16:
2710; CHECK-SD:       // %bb.0: // %entry
2711; CHECK-SD-NEXT:    sub sp, sp, #160
2712; CHECK-SD-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill
2713; CHECK-SD-NEXT:    stp x28, x27, [sp, #80] // 16-byte Folded Spill
2714; CHECK-SD-NEXT:    stp x26, x25, [sp, #96] // 16-byte Folded Spill
2715; CHECK-SD-NEXT:    stp x24, x23, [sp, #112] // 16-byte Folded Spill
2716; CHECK-SD-NEXT:    stp x22, x21, [sp, #128] // 16-byte Folded Spill
2717; CHECK-SD-NEXT:    stp x20, x19, [sp, #144] // 16-byte Folded Spill
2718; CHECK-SD-NEXT:    .cfi_def_cfa_offset 160
2719; CHECK-SD-NEXT:    .cfi_offset w19, -8
2720; CHECK-SD-NEXT:    .cfi_offset w20, -16
2721; CHECK-SD-NEXT:    .cfi_offset w21, -24
2722; CHECK-SD-NEXT:    .cfi_offset w22, -32
2723; CHECK-SD-NEXT:    .cfi_offset w23, -40
2724; CHECK-SD-NEXT:    .cfi_offset w24, -48
2725; CHECK-SD-NEXT:    .cfi_offset w25, -56
2726; CHECK-SD-NEXT:    .cfi_offset w26, -64
2727; CHECK-SD-NEXT:    .cfi_offset w27, -72
2728; CHECK-SD-NEXT:    .cfi_offset w28, -80
2729; CHECK-SD-NEXT:    .cfi_offset w30, -88
2730; CHECK-SD-NEXT:    .cfi_offset w29, -96
2731; CHECK-SD-NEXT:    umov w8, v2.h[1]
2732; CHECK-SD-NEXT:    umov w9, v0.h[1]
2733; CHECK-SD-NEXT:    umov w19, v2.h[2]
2734; CHECK-SD-NEXT:    umov w22, v0.h[2]
2735; CHECK-SD-NEXT:    umov w1, v2.h[0]
2736; CHECK-SD-NEXT:    umov w3, v0.h[0]
2737; CHECK-SD-NEXT:    umov w7, v2.h[3]
2738; CHECK-SD-NEXT:    umov w18, v0.h[3]
2739; CHECK-SD-NEXT:    umov w4, v0.h[6]
2740; CHECK-SD-NEXT:    umov w0, v2.h[4]
2741; CHECK-SD-NEXT:    umov w5, v0.h[4]
2742; CHECK-SD-NEXT:    umov w2, v2.h[7]
2743; CHECK-SD-NEXT:    str w8, [sp, #52] // 4-byte Folded Spill
2744; CHECK-SD-NEXT:    umov w6, v0.h[7]
2745; CHECK-SD-NEXT:    umov w27, v3.h[0]
2746; CHECK-SD-NEXT:    str w9, [sp, #44] // 4-byte Folded Spill
2747; CHECK-SD-NEXT:    udiv w9, w9, w8
2748; CHECK-SD-NEXT:    umov w28, v1.h[0]
2749; CHECK-SD-NEXT:    umov w24, v3.h[1]
2750; CHECK-SD-NEXT:    umov w25, v1.h[1]
2751; CHECK-SD-NEXT:    ldr w21, [sp, #52] // 4-byte Folded Reload
2752; CHECK-SD-NEXT:    ldr w23, [sp, #44] // 4-byte Folded Reload
2753; CHECK-SD-NEXT:    umov w30, v3.h[2]
2754; CHECK-SD-NEXT:    umov w12, v3.h[3]
2755; CHECK-SD-NEXT:    umov w11, v1.h[3]
2756; CHECK-SD-NEXT:    umov w14, v3.h[5]
2757; CHECK-SD-NEXT:    umov w13, v1.h[5]
2758; CHECK-SD-NEXT:    udiv w8, w22, w19
2759; CHECK-SD-NEXT:    str w9, [sp, #60] // 4-byte Folded Spill
2760; CHECK-SD-NEXT:    ldr w20, [sp, #60] // 4-byte Folded Reload
2761; CHECK-SD-NEXT:    msub w21, w20, w21, w23
2762; CHECK-SD-NEXT:    udiv w9, w3, w1
2763; CHECK-SD-NEXT:    str w8, [sp, #12] // 4-byte Folded Spill
2764; CHECK-SD-NEXT:    udiv w8, w18, w7
2765; CHECK-SD-NEXT:    stp w9, w8, [sp, #24] // 8-byte Folded Spill
2766; CHECK-SD-NEXT:    umov w8, v2.h[5]
2767; CHECK-SD-NEXT:    umov w9, v0.h[5]
2768; CHECK-SD-NEXT:    udiv w10, w5, w0
2769; CHECK-SD-NEXT:    ldr w20, [sp, #24] // 4-byte Folded Reload
2770; CHECK-SD-NEXT:    msub w1, w20, w1, w3
2771; CHECK-SD-NEXT:    str w9, [sp, #40] // 4-byte Folded Spill
2772; CHECK-SD-NEXT:    str w8, [sp, #48] // 4-byte Folded Spill
2773; CHECK-SD-NEXT:    fmov s0, w1
2774; CHECK-SD-NEXT:    ldr w1, [sp, #12] // 4-byte Folded Reload
2775; CHECK-SD-NEXT:    msub w1, w1, w19, w22
2776; CHECK-SD-NEXT:    ldr w19, [sp, #28] // 4-byte Folded Reload
2777; CHECK-SD-NEXT:    udiv w9, w9, w8
2778; CHECK-SD-NEXT:    umov w8, v2.h[6]
2779; CHECK-SD-NEXT:    mov v0.h[1], w21
2780; CHECK-SD-NEXT:    msub w18, w19, w7, w18
2781; CHECK-SD-NEXT:    ldp x20, x19, [sp, #144] // 16-byte Folded Reload
2782; CHECK-SD-NEXT:    ldp x22, x21, [sp, #128] // 16-byte Folded Reload
2783; CHECK-SD-NEXT:    mov v0.h[2], w1
2784; CHECK-SD-NEXT:    str w9, [sp, #56] // 4-byte Folded Spill
2785; CHECK-SD-NEXT:    udiv w9, w4, w8
2786; CHECK-SD-NEXT:    mov v0.h[3], w18
2787; CHECK-SD-NEXT:    ldr w18, [sp, #40] // 4-byte Folded Reload
2788; CHECK-SD-NEXT:    stp w8, w9, [sp, #32] // 8-byte Folded Spill
2789; CHECK-SD-NEXT:    udiv w8, w6, w2
2790; CHECK-SD-NEXT:    umov w9, v1.h[4]
2791; CHECK-SD-NEXT:    udiv w29, w28, w27
2792; CHECK-SD-NEXT:    stp w8, w10, [sp, #16] // 8-byte Folded Spill
2793; CHECK-SD-NEXT:    umov w8, v1.h[2]
2794; CHECK-SD-NEXT:    umov w10, v3.h[4]
2795; CHECK-SD-NEXT:    udiv w26, w25, w24
2796; CHECK-SD-NEXT:    msub w3, w29, w27, w28
2797; CHECK-SD-NEXT:    ldp x28, x27, [sp, #80] // 16-byte Folded Reload
2798; CHECK-SD-NEXT:    fmov s2, w3
2799; CHECK-SD-NEXT:    umov w3, v1.h[6]
2800; CHECK-SD-NEXT:    udiv w15, w8, w30
2801; CHECK-SD-NEXT:    msub w24, w26, w24, w25
2802; CHECK-SD-NEXT:    mov v2.h[1], w24
2803; CHECK-SD-NEXT:    ldp x24, x23, [sp, #112] // 16-byte Folded Reload
2804; CHECK-SD-NEXT:    udiv w17, w11, w12
2805; CHECK-SD-NEXT:    msub w8, w15, w30, w8
2806; CHECK-SD-NEXT:    umov w15, v3.h[6]
2807; CHECK-SD-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload
2808; CHECK-SD-NEXT:    mov v2.h[2], w8
2809; CHECK-SD-NEXT:    udiv w16, w9, w10
2810; CHECK-SD-NEXT:    msub w8, w17, w12, w11
2811; CHECK-SD-NEXT:    ldr w12, [sp, #20] // 4-byte Folded Reload
2812; CHECK-SD-NEXT:    ldr w17, [sp, #48] // 4-byte Folded Reload
2813; CHECK-SD-NEXT:    msub w12, w12, w0, w5
2814; CHECK-SD-NEXT:    mov v2.h[3], w8
2815; CHECK-SD-NEXT:    mov v0.h[4], w12
2816; CHECK-SD-NEXT:    udiv w25, w13, w14
2817; CHECK-SD-NEXT:    msub w8, w16, w10, w9
2818; CHECK-SD-NEXT:    umov w9, v3.h[7]
2819; CHECK-SD-NEXT:    umov w10, v1.h[7]
2820; CHECK-SD-NEXT:    ldr w16, [sp, #56] // 4-byte Folded Reload
2821; CHECK-SD-NEXT:    mov v2.h[4], w8
2822; CHECK-SD-NEXT:    msub w16, w16, w17, w18
2823; CHECK-SD-NEXT:    mov v0.h[5], w16
2824; CHECK-SD-NEXT:    udiv w11, w3, w15
2825; CHECK-SD-NEXT:    msub w8, w25, w14, w13
2826; CHECK-SD-NEXT:    ldp w14, w13, [sp, #32] // 8-byte Folded Reload
2827; CHECK-SD-NEXT:    ldp x26, x25, [sp, #96] // 16-byte Folded Reload
2828; CHECK-SD-NEXT:    mov v2.h[5], w8
2829; CHECK-SD-NEXT:    msub w13, w13, w14, w4
2830; CHECK-SD-NEXT:    mov v0.h[6], w13
2831; CHECK-SD-NEXT:    udiv w12, w10, w9
2832; CHECK-SD-NEXT:    msub w8, w11, w15, w3
2833; CHECK-SD-NEXT:    ldr w11, [sp, #16] // 4-byte Folded Reload
2834; CHECK-SD-NEXT:    msub w11, w11, w2, w6
2835; CHECK-SD-NEXT:    mov v2.h[6], w8
2836; CHECK-SD-NEXT:    mov v0.h[7], w11
2837; CHECK-SD-NEXT:    msub w8, w12, w9, w10
2838; CHECK-SD-NEXT:    mov v2.h[7], w8
2839; CHECK-SD-NEXT:    mov v1.16b, v2.16b
2840; CHECK-SD-NEXT:    add sp, sp, #160
2841; CHECK-SD-NEXT:    ret
2842;
2843; CHECK-GI-LABEL: uv16i16:
2844; CHECK-GI:       // %bb.0: // %entry
2845; CHECK-GI-NEXT:    ushll v4.4s, v0.4h, #0
2846; CHECK-GI-NEXT:    ushll v5.4s, v2.4h, #0
2847; CHECK-GI-NEXT:    ushll v6.4s, v1.4h, #0
2848; CHECK-GI-NEXT:    ushll v7.4s, v3.4h, #0
2849; CHECK-GI-NEXT:    fmov w8, s4
2850; CHECK-GI-NEXT:    fmov w9, s5
2851; CHECK-GI-NEXT:    mov w12, v5.s[3]
2852; CHECK-GI-NEXT:    fmov w17, s7
2853; CHECK-GI-NEXT:    mov w18, v7.s[1]
2854; CHECK-GI-NEXT:    mov w0, v7.s[2]
2855; CHECK-GI-NEXT:    mov w1, v7.s[3]
2856; CHECK-GI-NEXT:    ushll2 v7.4s, v3.8h, #0
2857; CHECK-GI-NEXT:    ushll v3.4s, v3.4h, #0
2858; CHECK-GI-NEXT:    udiv w11, w8, w9
2859; CHECK-GI-NEXT:    mov w8, v4.s[1]
2860; CHECK-GI-NEXT:    mov w9, v5.s[1]
2861; CHECK-GI-NEXT:    fmov w2, s7
2862; CHECK-GI-NEXT:    mov w3, v7.s[1]
2863; CHECK-GI-NEXT:    mov w4, v7.s[2]
2864; CHECK-GI-NEXT:    udiv w10, w8, w9
2865; CHECK-GI-NEXT:    mov w8, v4.s[2]
2866; CHECK-GI-NEXT:    mov w9, v5.s[2]
2867; CHECK-GI-NEXT:    ushll2 v5.4s, v2.8h, #0
2868; CHECK-GI-NEXT:    mov v16.s[0], w11
2869; CHECK-GI-NEXT:    ushll v2.4s, v2.4h, #0
2870; CHECK-GI-NEXT:    fmov w13, s5
2871; CHECK-GI-NEXT:    mov w14, v5.s[1]
2872; CHECK-GI-NEXT:    mov w15, v5.s[2]
2873; CHECK-GI-NEXT:    mov w16, v5.s[3]
2874; CHECK-GI-NEXT:    udiv w9, w8, w9
2875; CHECK-GI-NEXT:    mov w8, v4.s[3]
2876; CHECK-GI-NEXT:    ushll2 v4.4s, v0.8h, #0
2877; CHECK-GI-NEXT:    mov v16.s[1], w10
2878; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
2879; CHECK-GI-NEXT:    udiv w8, w8, w12
2880; CHECK-GI-NEXT:    fmov w12, s4
2881; CHECK-GI-NEXT:    mov v16.s[2], w9
2882; CHECK-GI-NEXT:    udiv w13, w12, w13
2883; CHECK-GI-NEXT:    mov w12, v4.s[1]
2884; CHECK-GI-NEXT:    mov v16.s[3], w8
2885; CHECK-GI-NEXT:    mls v0.4s, v16.4s, v2.4s
2886; CHECK-GI-NEXT:    udiv w14, w12, w14
2887; CHECK-GI-NEXT:    mov w12, v4.s[2]
2888; CHECK-GI-NEXT:    mov v17.s[0], w13
2889; CHECK-GI-NEXT:    mov w13, v7.s[3]
2890; CHECK-GI-NEXT:    udiv w15, w12, w15
2891; CHECK-GI-NEXT:    mov w12, v4.s[3]
2892; CHECK-GI-NEXT:    mov v17.s[1], w14
2893; CHECK-GI-NEXT:    udiv w12, w12, w16
2894; CHECK-GI-NEXT:    fmov w16, s6
2895; CHECK-GI-NEXT:    mov v17.s[2], w15
2896; CHECK-GI-NEXT:    udiv w16, w16, w17
2897; CHECK-GI-NEXT:    mov w17, v6.s[1]
2898; CHECK-GI-NEXT:    mov v17.s[3], w12
2899; CHECK-GI-NEXT:    mls v4.4s, v17.4s, v5.4s
2900; CHECK-GI-NEXT:    udiv w17, w17, w18
2901; CHECK-GI-NEXT:    mov w18, v6.s[2]
2902; CHECK-GI-NEXT:    mov v18.s[0], w16
2903; CHECK-GI-NEXT:    uzp1 v0.8h, v0.8h, v4.8h
2904; CHECK-GI-NEXT:    udiv w18, w18, w0
2905; CHECK-GI-NEXT:    mov w0, v6.s[3]
2906; CHECK-GI-NEXT:    ushll2 v6.4s, v1.8h, #0
2907; CHECK-GI-NEXT:    mov v18.s[1], w17
2908; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
2909; CHECK-GI-NEXT:    mov w11, v6.s[3]
2910; CHECK-GI-NEXT:    udiv w0, w0, w1
2911; CHECK-GI-NEXT:    fmov w1, s6
2912; CHECK-GI-NEXT:    mov v18.s[2], w18
2913; CHECK-GI-NEXT:    udiv w1, w1, w2
2914; CHECK-GI-NEXT:    mov w2, v6.s[1]
2915; CHECK-GI-NEXT:    mov v18.s[3], w0
2916; CHECK-GI-NEXT:    mls v1.4s, v18.4s, v3.4s
2917; CHECK-GI-NEXT:    udiv w2, w2, w3
2918; CHECK-GI-NEXT:    mov w3, v6.s[2]
2919; CHECK-GI-NEXT:    mov v19.s[0], w1
2920; CHECK-GI-NEXT:    udiv w3, w3, w4
2921; CHECK-GI-NEXT:    mov v19.s[1], w2
2922; CHECK-GI-NEXT:    udiv w10, w11, w13
2923; CHECK-GI-NEXT:    mov v19.s[2], w3
2924; CHECK-GI-NEXT:    mov v19.s[3], w10
2925; CHECK-GI-NEXT:    mls v6.4s, v19.4s, v7.4s
2926; CHECK-GI-NEXT:    uzp1 v1.8h, v1.8h, v6.8h
2927; CHECK-GI-NEXT:    ret
2928entry:
2929  %s = urem <16 x i16> %d, %e
2930  ret <16 x i16> %s
2931}
2932
2933define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
2934; CHECK-SD-LABEL: sv2i32:
2935; CHECK-SD:       // %bb.0: // %entry
2936; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
2937; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
2938; CHECK-SD-NEXT:    fmov w8, s1
2939; CHECK-SD-NEXT:    fmov w9, s0
2940; CHECK-SD-NEXT:    mov w11, v1.s[1]
2941; CHECK-SD-NEXT:    mov w12, v0.s[1]
2942; CHECK-SD-NEXT:    sdiv w10, w9, w8
2943; CHECK-SD-NEXT:    sdiv w13, w12, w11
2944; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2945; CHECK-SD-NEXT:    fmov s0, w8
2946; CHECK-SD-NEXT:    msub w9, w13, w11, w12
2947; CHECK-SD-NEXT:    mov v0.s[1], w9
2948; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
2949; CHECK-SD-NEXT:    ret
2950;
2951; CHECK-GI-LABEL: sv2i32:
2952; CHECK-GI:       // %bb.0: // %entry
2953; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
2954; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
2955; CHECK-GI-NEXT:    fmov w8, s0
2956; CHECK-GI-NEXT:    fmov w9, s1
2957; CHECK-GI-NEXT:    mov w10, v1.s[1]
2958; CHECK-GI-NEXT:    sdiv w8, w8, w9
2959; CHECK-GI-NEXT:    mov w9, v0.s[1]
2960; CHECK-GI-NEXT:    sdiv w9, w9, w10
2961; CHECK-GI-NEXT:    mov v2.s[0], w8
2962; CHECK-GI-NEXT:    mov v2.s[1], w9
2963; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
2964; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
2965; CHECK-GI-NEXT:    ret
2966entry:
2967  %s = srem <2 x i32> %d, %e
2968  ret <2 x i32> %s
2969}
2970
2971define <3 x i32> @sv3i32(<3 x i32> %d, <3 x i32> %e) {
2972; CHECK-SD-LABEL: sv3i32:
2973; CHECK-SD:       // %bb.0: // %entry
2974; CHECK-SD-NEXT:    fmov w11, s1
2975; CHECK-SD-NEXT:    fmov w12, s0
2976; CHECK-SD-NEXT:    mov w8, v1.s[1]
2977; CHECK-SD-NEXT:    mov w9, v0.s[1]
2978; CHECK-SD-NEXT:    mov w14, v1.s[2]
2979; CHECK-SD-NEXT:    mov w15, v0.s[2]
2980; CHECK-SD-NEXT:    sdiv w13, w12, w11
2981; CHECK-SD-NEXT:    sdiv w10, w9, w8
2982; CHECK-SD-NEXT:    msub w11, w13, w11, w12
2983; CHECK-SD-NEXT:    fmov s0, w11
2984; CHECK-SD-NEXT:    sdiv w16, w15, w14
2985; CHECK-SD-NEXT:    msub w8, w10, w8, w9
2986; CHECK-SD-NEXT:    mov v0.s[1], w8
2987; CHECK-SD-NEXT:    msub w8, w16, w14, w15
2988; CHECK-SD-NEXT:    mov v0.s[2], w8
2989; CHECK-SD-NEXT:    ret
2990;
2991; CHECK-GI-LABEL: sv3i32:
2992; CHECK-GI:       // %bb.0: // %entry
2993; CHECK-GI-NEXT:    fmov w8, s0
2994; CHECK-GI-NEXT:    fmov w9, s1
2995; CHECK-GI-NEXT:    mov s2, v0.s[1]
2996; CHECK-GI-NEXT:    mov s3, v1.s[1]
2997; CHECK-GI-NEXT:    mov s0, v0.s[2]
2998; CHECK-GI-NEXT:    mov s1, v1.s[2]
2999; CHECK-GI-NEXT:    sdiv w10, w8, w9
3000; CHECK-GI-NEXT:    fmov w11, s2
3001; CHECK-GI-NEXT:    fmov w12, s3
3002; CHECK-GI-NEXT:    fmov w14, s0
3003; CHECK-GI-NEXT:    fmov w15, s1
3004; CHECK-GI-NEXT:    sdiv w13, w11, w12
3005; CHECK-GI-NEXT:    msub w8, w10, w9, w8
3006; CHECK-GI-NEXT:    mov v0.s[0], w8
3007; CHECK-GI-NEXT:    sdiv w9, w14, w15
3008; CHECK-GI-NEXT:    msub w8, w13, w12, w11
3009; CHECK-GI-NEXT:    mov v0.s[1], w8
3010; CHECK-GI-NEXT:    msub w8, w9, w15, w14
3011; CHECK-GI-NEXT:    mov v0.s[2], w8
3012; CHECK-GI-NEXT:    ret
3013entry:
3014  %s = srem <3 x i32> %d, %e
3015  ret <3 x i32> %s
3016}
3017
3018define <4 x i32> @sv4i32(<4 x i32> %d, <4 x i32> %e) {
3019; CHECK-SD-LABEL: sv4i32:
3020; CHECK-SD:       // %bb.0: // %entry
3021; CHECK-SD-NEXT:    fmov w11, s1
3022; CHECK-SD-NEXT:    fmov w12, s0
3023; CHECK-SD-NEXT:    mov w8, v1.s[1]
3024; CHECK-SD-NEXT:    mov w9, v0.s[1]
3025; CHECK-SD-NEXT:    mov w14, v1.s[2]
3026; CHECK-SD-NEXT:    mov w15, v0.s[2]
3027; CHECK-SD-NEXT:    mov w17, v1.s[3]
3028; CHECK-SD-NEXT:    mov w18, v0.s[3]
3029; CHECK-SD-NEXT:    sdiv w13, w12, w11
3030; CHECK-SD-NEXT:    sdiv w10, w9, w8
3031; CHECK-SD-NEXT:    msub w11, w13, w11, w12
3032; CHECK-SD-NEXT:    fmov s0, w11
3033; CHECK-SD-NEXT:    sdiv w16, w15, w14
3034; CHECK-SD-NEXT:    msub w8, w10, w8, w9
3035; CHECK-SD-NEXT:    mov v0.s[1], w8
3036; CHECK-SD-NEXT:    sdiv w9, w18, w17
3037; CHECK-SD-NEXT:    msub w8, w16, w14, w15
3038; CHECK-SD-NEXT:    mov v0.s[2], w8
3039; CHECK-SD-NEXT:    msub w8, w9, w17, w18
3040; CHECK-SD-NEXT:    mov v0.s[3], w8
3041; CHECK-SD-NEXT:    ret
3042;
3043; CHECK-GI-LABEL: sv4i32:
3044; CHECK-GI:       // %bb.0: // %entry
3045; CHECK-GI-NEXT:    fmov w8, s0
3046; CHECK-GI-NEXT:    fmov w9, s1
3047; CHECK-GI-NEXT:    mov w10, v1.s[1]
3048; CHECK-GI-NEXT:    mov w11, v1.s[2]
3049; CHECK-GI-NEXT:    mov w12, v1.s[3]
3050; CHECK-GI-NEXT:    sdiv w8, w8, w9
3051; CHECK-GI-NEXT:    mov w9, v0.s[1]
3052; CHECK-GI-NEXT:    sdiv w9, w9, w10
3053; CHECK-GI-NEXT:    mov w10, v0.s[2]
3054; CHECK-GI-NEXT:    mov v2.s[0], w8
3055; CHECK-GI-NEXT:    sdiv w10, w10, w11
3056; CHECK-GI-NEXT:    mov w11, v0.s[3]
3057; CHECK-GI-NEXT:    mov v2.s[1], w9
3058; CHECK-GI-NEXT:    sdiv w8, w11, w12
3059; CHECK-GI-NEXT:    mov v2.s[2], w10
3060; CHECK-GI-NEXT:    mov v2.s[3], w8
3061; CHECK-GI-NEXT:    mls v0.4s, v2.4s, v1.4s
3062; CHECK-GI-NEXT:    ret
3063entry:
3064  %s = srem <4 x i32> %d, %e
3065  ret <4 x i32> %s
3066}
3067
3068define <8 x i32> @sv8i32(<8 x i32> %d, <8 x i32> %e) {
3069; CHECK-SD-LABEL: sv8i32:
3070; CHECK-SD:       // %bb.0: // %entry
3071; CHECK-SD-NEXT:    stp x22, x21, [sp, #-32]! // 16-byte Folded Spill
3072; CHECK-SD-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
3073; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
3074; CHECK-SD-NEXT:    .cfi_offset w19, -8
3075; CHECK-SD-NEXT:    .cfi_offset w20, -16
3076; CHECK-SD-NEXT:    .cfi_offset w21, -24
3077; CHECK-SD-NEXT:    .cfi_offset w22, -32
3078; CHECK-SD-NEXT:    mov w8, v2.s[1]
3079; CHECK-SD-NEXT:    mov w9, v0.s[1]
3080; CHECK-SD-NEXT:    fmov w11, s2
3081; CHECK-SD-NEXT:    fmov w12, s0
3082; CHECK-SD-NEXT:    fmov w4, s3
3083; CHECK-SD-NEXT:    fmov w5, s1
3084; CHECK-SD-NEXT:    mov w1, v3.s[1]
3085; CHECK-SD-NEXT:    mov w2, v1.s[1]
3086; CHECK-SD-NEXT:    mov w14, v2.s[2]
3087; CHECK-SD-NEXT:    mov w15, v0.s[2]
3088; CHECK-SD-NEXT:    mov w7, v3.s[2]
3089; CHECK-SD-NEXT:    mov w19, v1.s[2]
3090; CHECK-SD-NEXT:    sdiv w10, w9, w8
3091; CHECK-SD-NEXT:    mov w17, v2.s[3]
3092; CHECK-SD-NEXT:    mov w18, v0.s[3]
3093; CHECK-SD-NEXT:    mov w21, v3.s[3]
3094; CHECK-SD-NEXT:    mov w22, v1.s[3]
3095; CHECK-SD-NEXT:    sdiv w13, w12, w11
3096; CHECK-SD-NEXT:    msub w8, w10, w8, w9
3097; CHECK-SD-NEXT:    sdiv w6, w5, w4
3098; CHECK-SD-NEXT:    msub w9, w13, w11, w12
3099; CHECK-SD-NEXT:    fmov s0, w9
3100; CHECK-SD-NEXT:    mov v0.s[1], w8
3101; CHECK-SD-NEXT:    sdiv w3, w2, w1
3102; CHECK-SD-NEXT:    msub w10, w6, w4, w5
3103; CHECK-SD-NEXT:    fmov s1, w10
3104; CHECK-SD-NEXT:    sdiv w16, w15, w14
3105; CHECK-SD-NEXT:    msub w11, w3, w1, w2
3106; CHECK-SD-NEXT:    mov v1.s[1], w11
3107; CHECK-SD-NEXT:    sdiv w20, w19, w7
3108; CHECK-SD-NEXT:    msub w9, w16, w14, w15
3109; CHECK-SD-NEXT:    mov v0.s[2], w9
3110; CHECK-SD-NEXT:    sdiv w0, w18, w17
3111; CHECK-SD-NEXT:    msub w8, w20, w7, w19
3112; CHECK-SD-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
3113; CHECK-SD-NEXT:    mov v1.s[2], w8
3114; CHECK-SD-NEXT:    sdiv w12, w22, w21
3115; CHECK-SD-NEXT:    msub w10, w0, w17, w18
3116; CHECK-SD-NEXT:    mov v0.s[3], w10
3117; CHECK-SD-NEXT:    msub w8, w12, w21, w22
3118; CHECK-SD-NEXT:    mov v1.s[3], w8
3119; CHECK-SD-NEXT:    ldp x22, x21, [sp], #32 // 16-byte Folded Reload
3120; CHECK-SD-NEXT:    ret
3121;
3122; CHECK-GI-LABEL: sv8i32:
3123; CHECK-GI:       // %bb.0: // %entry
3124; CHECK-GI-NEXT:    fmov w8, s0
3125; CHECK-GI-NEXT:    fmov w9, s2
3126; CHECK-GI-NEXT:    mov w10, v2.s[1]
3127; CHECK-GI-NEXT:    mov w11, v2.s[2]
3128; CHECK-GI-NEXT:    mov w12, v2.s[3]
3129; CHECK-GI-NEXT:    fmov w13, s3
3130; CHECK-GI-NEXT:    mov w14, v3.s[1]
3131; CHECK-GI-NEXT:    mov w15, v3.s[2]
3132; CHECK-GI-NEXT:    sdiv w8, w8, w9
3133; CHECK-GI-NEXT:    mov w9, v0.s[1]
3134; CHECK-GI-NEXT:    sdiv w9, w9, w10
3135; CHECK-GI-NEXT:    mov w10, v0.s[2]
3136; CHECK-GI-NEXT:    mov v4.s[0], w8
3137; CHECK-GI-NEXT:    mov w8, v1.s[3]
3138; CHECK-GI-NEXT:    sdiv w10, w10, w11
3139; CHECK-GI-NEXT:    mov w11, v0.s[3]
3140; CHECK-GI-NEXT:    mov v4.s[1], w9
3141; CHECK-GI-NEXT:    sdiv w11, w11, w12
3142; CHECK-GI-NEXT:    fmov w12, s1
3143; CHECK-GI-NEXT:    mov v4.s[2], w10
3144; CHECK-GI-NEXT:    sdiv w12, w12, w13
3145; CHECK-GI-NEXT:    mov w13, v1.s[1]
3146; CHECK-GI-NEXT:    mov v4.s[3], w11
3147; CHECK-GI-NEXT:    mls v0.4s, v4.4s, v2.4s
3148; CHECK-GI-NEXT:    sdiv w13, w13, w14
3149; CHECK-GI-NEXT:    mov w14, v1.s[2]
3150; CHECK-GI-NEXT:    mov v5.s[0], w12
3151; CHECK-GI-NEXT:    mov w12, v3.s[3]
3152; CHECK-GI-NEXT:    sdiv w14, w14, w15
3153; CHECK-GI-NEXT:    mov v5.s[1], w13
3154; CHECK-GI-NEXT:    sdiv w8, w8, w12
3155; CHECK-GI-NEXT:    mov v5.s[2], w14
3156; CHECK-GI-NEXT:    mov v5.s[3], w8
3157; CHECK-GI-NEXT:    mls v1.4s, v5.4s, v3.4s
3158; CHECK-GI-NEXT:    ret
3159entry:
3160  %s = srem <8 x i32> %d, %e
3161  ret <8 x i32> %s
3162}
3163
3164define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
3165; CHECK-SD-LABEL: uv2i32:
3166; CHECK-SD:       // %bb.0: // %entry
3167; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
3168; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
3169; CHECK-SD-NEXT:    fmov w8, s1
3170; CHECK-SD-NEXT:    fmov w9, s0
3171; CHECK-SD-NEXT:    mov w11, v1.s[1]
3172; CHECK-SD-NEXT:    mov w12, v0.s[1]
3173; CHECK-SD-NEXT:    udiv w10, w9, w8
3174; CHECK-SD-NEXT:    udiv w13, w12, w11
3175; CHECK-SD-NEXT:    msub w8, w10, w8, w9
3176; CHECK-SD-NEXT:    fmov s0, w8
3177; CHECK-SD-NEXT:    msub w9, w13, w11, w12
3178; CHECK-SD-NEXT:    mov v0.s[1], w9
3179; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
3180; CHECK-SD-NEXT:    ret
3181;
3182; CHECK-GI-LABEL: uv2i32:
3183; CHECK-GI:       // %bb.0: // %entry
3184; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
3185; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
3186; CHECK-GI-NEXT:    fmov w8, s0
3187; CHECK-GI-NEXT:    fmov w9, s1
3188; CHECK-GI-NEXT:    mov w10, v1.s[1]
3189; CHECK-GI-NEXT:    udiv w8, w8, w9
3190; CHECK-GI-NEXT:    mov w9, v0.s[1]
3191; CHECK-GI-NEXT:    udiv w9, w9, w10
3192; CHECK-GI-NEXT:    mov v2.s[0], w8
3193; CHECK-GI-NEXT:    mov v2.s[1], w9
3194; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
3195; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
3196; CHECK-GI-NEXT:    ret
3197entry:
3198  %s = urem <2 x i32> %d, %e
3199  ret <2 x i32> %s
3200}
3201
3202define <3 x i32> @uv3i32(<3 x i32> %d, <3 x i32> %e) {
3203; CHECK-SD-LABEL: uv3i32:
3204; CHECK-SD:       // %bb.0: // %entry
3205; CHECK-SD-NEXT:    fmov w11, s1
3206; CHECK-SD-NEXT:    fmov w12, s0
3207; CHECK-SD-NEXT:    mov w8, v1.s[1]
3208; CHECK-SD-NEXT:    mov w9, v0.s[1]
3209; CHECK-SD-NEXT:    mov w14, v1.s[2]
3210; CHECK-SD-NEXT:    mov w15, v0.s[2]
3211; CHECK-SD-NEXT:    udiv w13, w12, w11
3212; CHECK-SD-NEXT:    udiv w10, w9, w8
3213; CHECK-SD-NEXT:    msub w11, w13, w11, w12
3214; CHECK-SD-NEXT:    fmov s0, w11
3215; CHECK-SD-NEXT:    udiv w16, w15, w14
3216; CHECK-SD-NEXT:    msub w8, w10, w8, w9
3217; CHECK-SD-NEXT:    mov v0.s[1], w8
3218; CHECK-SD-NEXT:    msub w8, w16, w14, w15
3219; CHECK-SD-NEXT:    mov v0.s[2], w8
3220; CHECK-SD-NEXT:    ret
3221;
3222; CHECK-GI-LABEL: uv3i32:
3223; CHECK-GI:       // %bb.0: // %entry
3224; CHECK-GI-NEXT:    fmov w8, s0
3225; CHECK-GI-NEXT:    fmov w9, s1
3226; CHECK-GI-NEXT:    mov s2, v0.s[1]
3227; CHECK-GI-NEXT:    mov s3, v1.s[1]
3228; CHECK-GI-NEXT:    mov s0, v0.s[2]
3229; CHECK-GI-NEXT:    mov s1, v1.s[2]
3230; CHECK-GI-NEXT:    udiv w10, w8, w9
3231; CHECK-GI-NEXT:    fmov w11, s2
3232; CHECK-GI-NEXT:    fmov w12, s3
3233; CHECK-GI-NEXT:    fmov w14, s0
3234; CHECK-GI-NEXT:    fmov w15, s1
3235; CHECK-GI-NEXT:    udiv w13, w11, w12
3236; CHECK-GI-NEXT:    msub w8, w10, w9, w8
3237; CHECK-GI-NEXT:    mov v0.s[0], w8
3238; CHECK-GI-NEXT:    udiv w9, w14, w15
3239; CHECK-GI-NEXT:    msub w8, w13, w12, w11
3240; CHECK-GI-NEXT:    mov v0.s[1], w8
3241; CHECK-GI-NEXT:    msub w8, w9, w15, w14
3242; CHECK-GI-NEXT:    mov v0.s[2], w8
3243; CHECK-GI-NEXT:    ret
3244entry:
3245  %s = urem <3 x i32> %d, %e
3246  ret <3 x i32> %s
3247}
3248
3249define <4 x i32> @uv4i32(<4 x i32> %d, <4 x i32> %e) {
3250; CHECK-SD-LABEL: uv4i32:
3251; CHECK-SD:       // %bb.0: // %entry
3252; CHECK-SD-NEXT:    fmov w11, s1
3253; CHECK-SD-NEXT:    fmov w12, s0
3254; CHECK-SD-NEXT:    mov w8, v1.s[1]
3255; CHECK-SD-NEXT:    mov w9, v0.s[1]
3256; CHECK-SD-NEXT:    mov w14, v1.s[2]
3257; CHECK-SD-NEXT:    mov w15, v0.s[2]
3258; CHECK-SD-NEXT:    mov w17, v1.s[3]
3259; CHECK-SD-NEXT:    mov w18, v0.s[3]
3260; CHECK-SD-NEXT:    udiv w13, w12, w11
3261; CHECK-SD-NEXT:    udiv w10, w9, w8
3262; CHECK-SD-NEXT:    msub w11, w13, w11, w12
3263; CHECK-SD-NEXT:    fmov s0, w11
3264; CHECK-SD-NEXT:    udiv w16, w15, w14
3265; CHECK-SD-NEXT:    msub w8, w10, w8, w9
3266; CHECK-SD-NEXT:    mov v0.s[1], w8
3267; CHECK-SD-NEXT:    udiv w9, w18, w17
3268; CHECK-SD-NEXT:    msub w8, w16, w14, w15
3269; CHECK-SD-NEXT:    mov v0.s[2], w8
3270; CHECK-SD-NEXT:    msub w8, w9, w17, w18
3271; CHECK-SD-NEXT:    mov v0.s[3], w8
3272; CHECK-SD-NEXT:    ret
3273;
3274; CHECK-GI-LABEL: uv4i32:
3275; CHECK-GI:       // %bb.0: // %entry
3276; CHECK-GI-NEXT:    fmov w8, s0
3277; CHECK-GI-NEXT:    fmov w9, s1
3278; CHECK-GI-NEXT:    mov w10, v1.s[1]
3279; CHECK-GI-NEXT:    mov w11, v1.s[2]
3280; CHECK-GI-NEXT:    mov w12, v1.s[3]
3281; CHECK-GI-NEXT:    udiv w8, w8, w9
3282; CHECK-GI-NEXT:    mov w9, v0.s[1]
3283; CHECK-GI-NEXT:    udiv w9, w9, w10
3284; CHECK-GI-NEXT:    mov w10, v0.s[2]
3285; CHECK-GI-NEXT:    mov v2.s[0], w8
3286; CHECK-GI-NEXT:    udiv w10, w10, w11
3287; CHECK-GI-NEXT:    mov w11, v0.s[3]
3288; CHECK-GI-NEXT:    mov v2.s[1], w9
3289; CHECK-GI-NEXT:    udiv w8, w11, w12
3290; CHECK-GI-NEXT:    mov v2.s[2], w10
3291; CHECK-GI-NEXT:    mov v2.s[3], w8
3292; CHECK-GI-NEXT:    mls v0.4s, v2.4s, v1.4s
3293; CHECK-GI-NEXT:    ret
3294entry:
3295  %s = urem <4 x i32> %d, %e
3296  ret <4 x i32> %s
3297}
3298
3299define <8 x i32> @uv8i32(<8 x i32> %d, <8 x i32> %e) {
3300; CHECK-SD-LABEL: uv8i32:
3301; CHECK-SD:       // %bb.0: // %entry
3302; CHECK-SD-NEXT:    stp x22, x21, [sp, #-32]! // 16-byte Folded Spill
3303; CHECK-SD-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
3304; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
3305; CHECK-SD-NEXT:    .cfi_offset w19, -8
3306; CHECK-SD-NEXT:    .cfi_offset w20, -16
3307; CHECK-SD-NEXT:    .cfi_offset w21, -24
3308; CHECK-SD-NEXT:    .cfi_offset w22, -32
3309; CHECK-SD-NEXT:    mov w8, v2.s[1]
3310; CHECK-SD-NEXT:    mov w9, v0.s[1]
3311; CHECK-SD-NEXT:    fmov w11, s2
3312; CHECK-SD-NEXT:    fmov w12, s0
3313; CHECK-SD-NEXT:    fmov w4, s3
3314; CHECK-SD-NEXT:    fmov w5, s1
3315; CHECK-SD-NEXT:    mov w1, v3.s[1]
3316; CHECK-SD-NEXT:    mov w2, v1.s[1]
3317; CHECK-SD-NEXT:    mov w14, v2.s[2]
3318; CHECK-SD-NEXT:    mov w15, v0.s[2]
3319; CHECK-SD-NEXT:    mov w7, v3.s[2]
3320; CHECK-SD-NEXT:    mov w19, v1.s[2]
3321; CHECK-SD-NEXT:    udiv w10, w9, w8
3322; CHECK-SD-NEXT:    mov w17, v2.s[3]
3323; CHECK-SD-NEXT:    mov w18, v0.s[3]
3324; CHECK-SD-NEXT:    mov w21, v3.s[3]
3325; CHECK-SD-NEXT:    mov w22, v1.s[3]
3326; CHECK-SD-NEXT:    udiv w13, w12, w11
3327; CHECK-SD-NEXT:    msub w8, w10, w8, w9
3328; CHECK-SD-NEXT:    udiv w6, w5, w4
3329; CHECK-SD-NEXT:    msub w9, w13, w11, w12
3330; CHECK-SD-NEXT:    fmov s0, w9
3331; CHECK-SD-NEXT:    mov v0.s[1], w8
3332; CHECK-SD-NEXT:    udiv w3, w2, w1
3333; CHECK-SD-NEXT:    msub w10, w6, w4, w5
3334; CHECK-SD-NEXT:    fmov s1, w10
3335; CHECK-SD-NEXT:    udiv w16, w15, w14
3336; CHECK-SD-NEXT:    msub w11, w3, w1, w2
3337; CHECK-SD-NEXT:    mov v1.s[1], w11
3338; CHECK-SD-NEXT:    udiv w20, w19, w7
3339; CHECK-SD-NEXT:    msub w9, w16, w14, w15
3340; CHECK-SD-NEXT:    mov v0.s[2], w9
3341; CHECK-SD-NEXT:    udiv w0, w18, w17
3342; CHECK-SD-NEXT:    msub w8, w20, w7, w19
3343; CHECK-SD-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
3344; CHECK-SD-NEXT:    mov v1.s[2], w8
3345; CHECK-SD-NEXT:    udiv w12, w22, w21
3346; CHECK-SD-NEXT:    msub w10, w0, w17, w18
3347; CHECK-SD-NEXT:    mov v0.s[3], w10
3348; CHECK-SD-NEXT:    msub w8, w12, w21, w22
3349; CHECK-SD-NEXT:    mov v1.s[3], w8
3350; CHECK-SD-NEXT:    ldp x22, x21, [sp], #32 // 16-byte Folded Reload
3351; CHECK-SD-NEXT:    ret
3352;
3353; CHECK-GI-LABEL: uv8i32:
3354; CHECK-GI:       // %bb.0: // %entry
3355; CHECK-GI-NEXT:    fmov w8, s0
3356; CHECK-GI-NEXT:    fmov w9, s2
3357; CHECK-GI-NEXT:    mov w10, v2.s[1]
3358; CHECK-GI-NEXT:    mov w11, v2.s[2]
3359; CHECK-GI-NEXT:    mov w12, v2.s[3]
3360; CHECK-GI-NEXT:    fmov w13, s3
3361; CHECK-GI-NEXT:    mov w14, v3.s[1]
3362; CHECK-GI-NEXT:    mov w15, v3.s[2]
3363; CHECK-GI-NEXT:    udiv w8, w8, w9
3364; CHECK-GI-NEXT:    mov w9, v0.s[1]
3365; CHECK-GI-NEXT:    udiv w9, w9, w10
3366; CHECK-GI-NEXT:    mov w10, v0.s[2]
3367; CHECK-GI-NEXT:    mov v4.s[0], w8
3368; CHECK-GI-NEXT:    mov w8, v1.s[3]
3369; CHECK-GI-NEXT:    udiv w10, w10, w11
3370; CHECK-GI-NEXT:    mov w11, v0.s[3]
3371; CHECK-GI-NEXT:    mov v4.s[1], w9
3372; CHECK-GI-NEXT:    udiv w11, w11, w12
3373; CHECK-GI-NEXT:    fmov w12, s1
3374; CHECK-GI-NEXT:    mov v4.s[2], w10
3375; CHECK-GI-NEXT:    udiv w12, w12, w13
3376; CHECK-GI-NEXT:    mov w13, v1.s[1]
3377; CHECK-GI-NEXT:    mov v4.s[3], w11
3378; CHECK-GI-NEXT:    mls v0.4s, v4.4s, v2.4s
3379; CHECK-GI-NEXT:    udiv w13, w13, w14
3380; CHECK-GI-NEXT:    mov w14, v1.s[2]
3381; CHECK-GI-NEXT:    mov v5.s[0], w12
3382; CHECK-GI-NEXT:    mov w12, v3.s[3]
3383; CHECK-GI-NEXT:    udiv w14, w14, w15
3384; CHECK-GI-NEXT:    mov v5.s[1], w13
3385; CHECK-GI-NEXT:    udiv w8, w8, w12
3386; CHECK-GI-NEXT:    mov v5.s[2], w14
3387; CHECK-GI-NEXT:    mov v5.s[3], w8
3388; CHECK-GI-NEXT:    mls v1.4s, v5.4s, v3.4s
3389; CHECK-GI-NEXT:    ret
3390entry:
3391  %s = urem <8 x i32> %d, %e
3392  ret <8 x i32> %s
3393}
3394
3395define <2 x i64> @sv2i64(<2 x i64> %d, <2 x i64> %e) {
3396; CHECK-SD-LABEL: sv2i64:
3397; CHECK-SD:       // %bb.0: // %entry
3398; CHECK-SD-NEXT:    fmov x8, d1
3399; CHECK-SD-NEXT:    fmov x9, d0
3400; CHECK-SD-NEXT:    mov x11, v1.d[1]
3401; CHECK-SD-NEXT:    mov x12, v0.d[1]
3402; CHECK-SD-NEXT:    sdiv x10, x9, x8
3403; CHECK-SD-NEXT:    sdiv x13, x12, x11
3404; CHECK-SD-NEXT:    msub x8, x10, x8, x9
3405; CHECK-SD-NEXT:    fmov d0, x8
3406; CHECK-SD-NEXT:    msub x9, x13, x11, x12
3407; CHECK-SD-NEXT:    mov v0.d[1], x9
3408; CHECK-SD-NEXT:    ret
3409;
3410; CHECK-GI-LABEL: sv2i64:
3411; CHECK-GI:       // %bb.0: // %entry
3412; CHECK-GI-NEXT:    fmov x8, d0
3413; CHECK-GI-NEXT:    fmov x9, d1
3414; CHECK-GI-NEXT:    mov x10, v1.d[1]
3415; CHECK-GI-NEXT:    mov x11, v0.d[1]
3416; CHECK-GI-NEXT:    sdiv x8, x8, x9
3417; CHECK-GI-NEXT:    sdiv x11, x11, x10
3418; CHECK-GI-NEXT:    mov v1.d[0], x8
3419; CHECK-GI-NEXT:    mov v1.d[1], x11
3420; CHECK-GI-NEXT:    fmov x8, d1
3421; CHECK-GI-NEXT:    mov x11, v1.d[1]
3422; CHECK-GI-NEXT:    mul x8, x8, x9
3423; CHECK-GI-NEXT:    mul x9, x11, x10
3424; CHECK-GI-NEXT:    mov v1.d[0], x8
3425; CHECK-GI-NEXT:    mov v1.d[1], x9
3426; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v1.2d
3427; CHECK-GI-NEXT:    ret
3428entry:
3429  %s = srem <2 x i64> %d, %e
3430  ret <2 x i64> %s
3431}
3432
3433define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
3434; CHECK-SD-LABEL: sv3i64:
3435; CHECK-SD:       // %bb.0: // %entry
3436; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
3437; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
3438; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
3439; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
3440; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
3441; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
3442; CHECK-SD-NEXT:    fmov x8, d3
3443; CHECK-SD-NEXT:    fmov x9, d0
3444; CHECK-SD-NEXT:    fmov x11, d4
3445; CHECK-SD-NEXT:    fmov x12, d1
3446; CHECK-SD-NEXT:    fmov x14, d5
3447; CHECK-SD-NEXT:    fmov x15, d2
3448; CHECK-SD-NEXT:    sdiv x10, x9, x8
3449; CHECK-SD-NEXT:    sdiv x13, x12, x11
3450; CHECK-SD-NEXT:    msub x8, x10, x8, x9
3451; CHECK-SD-NEXT:    fmov d0, x8
3452; CHECK-SD-NEXT:    sdiv x16, x15, x14
3453; CHECK-SD-NEXT:    msub x9, x13, x11, x12
3454; CHECK-SD-NEXT:    fmov d1, x9
3455; CHECK-SD-NEXT:    msub x10, x16, x14, x15
3456; CHECK-SD-NEXT:    fmov d2, x10
3457; CHECK-SD-NEXT:    ret
3458;
3459; CHECK-GI-LABEL: sv3i64:
3460; CHECK-GI:       // %bb.0: // %entry
3461; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
3462; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
3463; CHECK-GI-NEXT:    fmov x8, d0
3464; CHECK-GI-NEXT:    fmov x9, d3
3465; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
3466; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
3467; CHECK-GI-NEXT:    fmov x10, d4
3468; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
3469; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
3470; CHECK-GI-NEXT:    sdiv x8, x8, x9
3471; CHECK-GI-NEXT:    fmov x9, d1
3472; CHECK-GI-NEXT:    fmov x11, d3
3473; CHECK-GI-NEXT:    mov x14, v3.d[1]
3474; CHECK-GI-NEXT:    sdiv x9, x9, x10
3475; CHECK-GI-NEXT:    mov v6.d[0], x8
3476; CHECK-GI-NEXT:    fmov x8, d2
3477; CHECK-GI-NEXT:    mov v6.d[1], x9
3478; CHECK-GI-NEXT:    fmov x9, d5
3479; CHECK-GI-NEXT:    sdiv x12, x8, x9
3480; CHECK-GI-NEXT:    fmov x10, d6
3481; CHECK-GI-NEXT:    mov x13, v6.d[1]
3482; CHECK-GI-NEXT:    mul x10, x10, x11
3483; CHECK-GI-NEXT:    mul x11, x13, x14
3484; CHECK-GI-NEXT:    mov v2.d[0], x10
3485; CHECK-GI-NEXT:    mov v2.d[1], x11
3486; CHECK-GI-NEXT:    msub x8, x12, x9, x8
3487; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
3488; CHECK-GI-NEXT:    mov d1, v0.d[1]
3489; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
3490; CHECK-GI-NEXT:    fmov d2, x8
3491; CHECK-GI-NEXT:    ret
3492entry:
3493  %s = srem <3 x i64> %d, %e
3494  ret <3 x i64> %s
3495}
3496
3497define <4 x i64> @sv4i64(<4 x i64> %d, <4 x i64> %e) {
3498; CHECK-SD-LABEL: sv4i64:
3499; CHECK-SD:       // %bb.0: // %entry
3500; CHECK-SD-NEXT:    mov x8, v2.d[1]
3501; CHECK-SD-NEXT:    mov x9, v0.d[1]
3502; CHECK-SD-NEXT:    fmov x11, d2
3503; CHECK-SD-NEXT:    fmov x12, d0
3504; CHECK-SD-NEXT:    fmov x14, d3
3505; CHECK-SD-NEXT:    fmov x15, d1
3506; CHECK-SD-NEXT:    mov x17, v3.d[1]
3507; CHECK-SD-NEXT:    mov x18, v1.d[1]
3508; CHECK-SD-NEXT:    sdiv x10, x9, x8
3509; CHECK-SD-NEXT:    sdiv x13, x12, x11
3510; CHECK-SD-NEXT:    msub x8, x10, x8, x9
3511; CHECK-SD-NEXT:    sdiv x16, x15, x14
3512; CHECK-SD-NEXT:    msub x9, x13, x11, x12
3513; CHECK-SD-NEXT:    fmov d0, x9
3514; CHECK-SD-NEXT:    mov v0.d[1], x8
3515; CHECK-SD-NEXT:    sdiv x0, x18, x17
3516; CHECK-SD-NEXT:    msub x10, x16, x14, x15
3517; CHECK-SD-NEXT:    fmov d1, x10
3518; CHECK-SD-NEXT:    msub x11, x0, x17, x18
3519; CHECK-SD-NEXT:    mov v1.d[1], x11
3520; CHECK-SD-NEXT:    ret
3521;
3522; CHECK-GI-LABEL: sv4i64:
3523; CHECK-GI:       // %bb.0: // %entry
3524; CHECK-GI-NEXT:    fmov x8, d0
3525; CHECK-GI-NEXT:    fmov x9, d2
3526; CHECK-GI-NEXT:    mov x10, v2.d[1]
3527; CHECK-GI-NEXT:    mov x11, v0.d[1]
3528; CHECK-GI-NEXT:    fmov x12, d1
3529; CHECK-GI-NEXT:    fmov x13, d3
3530; CHECK-GI-NEXT:    mov x14, v3.d[1]
3531; CHECK-GI-NEXT:    mov x15, v1.d[1]
3532; CHECK-GI-NEXT:    sdiv x8, x8, x9
3533; CHECK-GI-NEXT:    sdiv x12, x12, x13
3534; CHECK-GI-NEXT:    mov v2.d[0], x8
3535; CHECK-GI-NEXT:    sdiv x11, x11, x10
3536; CHECK-GI-NEXT:    mov v3.d[0], x12
3537; CHECK-GI-NEXT:    sdiv x15, x15, x14
3538; CHECK-GI-NEXT:    mov v2.d[1], x11
3539; CHECK-GI-NEXT:    fmov x8, d2
3540; CHECK-GI-NEXT:    mov x11, v2.d[1]
3541; CHECK-GI-NEXT:    mul x8, x8, x9
3542; CHECK-GI-NEXT:    mul x10, x11, x10
3543; CHECK-GI-NEXT:    mov v2.d[0], x8
3544; CHECK-GI-NEXT:    mov v3.d[1], x15
3545; CHECK-GI-NEXT:    mov v2.d[1], x10
3546; CHECK-GI-NEXT:    fmov x9, d3
3547; CHECK-GI-NEXT:    mov x12, v3.d[1]
3548; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
3549; CHECK-GI-NEXT:    mul x9, x9, x13
3550; CHECK-GI-NEXT:    mul x11, x12, x14
3551; CHECK-GI-NEXT:    mov v3.d[0], x9
3552; CHECK-GI-NEXT:    mov v3.d[1], x11
3553; CHECK-GI-NEXT:    sub v1.2d, v1.2d, v3.2d
3554; CHECK-GI-NEXT:    ret
3555entry:
3556  %s = srem <4 x i64> %d, %e
3557  ret <4 x i64> %s
3558}
3559
3560define <2 x i64> @uv2i64(<2 x i64> %d, <2 x i64> %e) {
3561; CHECK-SD-LABEL: uv2i64:
3562; CHECK-SD:       // %bb.0: // %entry
3563; CHECK-SD-NEXT:    fmov x8, d1
3564; CHECK-SD-NEXT:    fmov x9, d0
3565; CHECK-SD-NEXT:    mov x11, v1.d[1]
3566; CHECK-SD-NEXT:    mov x12, v0.d[1]
3567; CHECK-SD-NEXT:    udiv x10, x9, x8
3568; CHECK-SD-NEXT:    udiv x13, x12, x11
3569; CHECK-SD-NEXT:    msub x8, x10, x8, x9
3570; CHECK-SD-NEXT:    fmov d0, x8
3571; CHECK-SD-NEXT:    msub x9, x13, x11, x12
3572; CHECK-SD-NEXT:    mov v0.d[1], x9
3573; CHECK-SD-NEXT:    ret
3574;
3575; CHECK-GI-LABEL: uv2i64:
3576; CHECK-GI:       // %bb.0: // %entry
3577; CHECK-GI-NEXT:    fmov x8, d0
3578; CHECK-GI-NEXT:    fmov x9, d1
3579; CHECK-GI-NEXT:    mov x10, v1.d[1]
3580; CHECK-GI-NEXT:    mov x11, v0.d[1]
3581; CHECK-GI-NEXT:    udiv x8, x8, x9
3582; CHECK-GI-NEXT:    udiv x11, x11, x10
3583; CHECK-GI-NEXT:    mov v1.d[0], x8
3584; CHECK-GI-NEXT:    mov v1.d[1], x11
3585; CHECK-GI-NEXT:    fmov x8, d1
3586; CHECK-GI-NEXT:    mov x11, v1.d[1]
3587; CHECK-GI-NEXT:    mul x8, x8, x9
3588; CHECK-GI-NEXT:    mul x9, x11, x10
3589; CHECK-GI-NEXT:    mov v1.d[0], x8
3590; CHECK-GI-NEXT:    mov v1.d[1], x9
3591; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v1.2d
3592; CHECK-GI-NEXT:    ret
3593entry:
3594  %s = urem <2 x i64> %d, %e
3595  ret <2 x i64> %s
3596}
3597
3598define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
3599; CHECK-SD-LABEL: uv3i64:
3600; CHECK-SD:       // %bb.0: // %entry
3601; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
3602; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
3603; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
3604; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
3605; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
3606; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
3607; CHECK-SD-NEXT:    fmov x8, d3
3608; CHECK-SD-NEXT:    fmov x9, d0
3609; CHECK-SD-NEXT:    fmov x11, d4
3610; CHECK-SD-NEXT:    fmov x12, d1
3611; CHECK-SD-NEXT:    fmov x14, d5
3612; CHECK-SD-NEXT:    fmov x15, d2
3613; CHECK-SD-NEXT:    udiv x10, x9, x8
3614; CHECK-SD-NEXT:    udiv x13, x12, x11
3615; CHECK-SD-NEXT:    msub x8, x10, x8, x9
3616; CHECK-SD-NEXT:    fmov d0, x8
3617; CHECK-SD-NEXT:    udiv x16, x15, x14
3618; CHECK-SD-NEXT:    msub x9, x13, x11, x12
3619; CHECK-SD-NEXT:    fmov d1, x9
3620; CHECK-SD-NEXT:    msub x10, x16, x14, x15
3621; CHECK-SD-NEXT:    fmov d2, x10
3622; CHECK-SD-NEXT:    ret
3623;
3624; CHECK-GI-LABEL: uv3i64:
3625; CHECK-GI:       // %bb.0: // %entry
3626; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
3627; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
3628; CHECK-GI-NEXT:    fmov x8, d0
3629; CHECK-GI-NEXT:    fmov x9, d3
3630; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
3631; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
3632; CHECK-GI-NEXT:    fmov x10, d4
3633; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
3634; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
3635; CHECK-GI-NEXT:    udiv x8, x8, x9
3636; CHECK-GI-NEXT:    fmov x9, d1
3637; CHECK-GI-NEXT:    fmov x11, d3
3638; CHECK-GI-NEXT:    mov x14, v3.d[1]
3639; CHECK-GI-NEXT:    udiv x9, x9, x10
3640; CHECK-GI-NEXT:    mov v6.d[0], x8
3641; CHECK-GI-NEXT:    fmov x8, d2
3642; CHECK-GI-NEXT:    mov v6.d[1], x9
3643; CHECK-GI-NEXT:    fmov x9, d5
3644; CHECK-GI-NEXT:    udiv x12, x8, x9
3645; CHECK-GI-NEXT:    fmov x10, d6
3646; CHECK-GI-NEXT:    mov x13, v6.d[1]
3647; CHECK-GI-NEXT:    mul x10, x10, x11
3648; CHECK-GI-NEXT:    mul x11, x13, x14
3649; CHECK-GI-NEXT:    mov v2.d[0], x10
3650; CHECK-GI-NEXT:    mov v2.d[1], x11
3651; CHECK-GI-NEXT:    msub x8, x12, x9, x8
3652; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
3653; CHECK-GI-NEXT:    mov d1, v0.d[1]
3654; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
3655; CHECK-GI-NEXT:    fmov d2, x8
3656; CHECK-GI-NEXT:    ret
3657entry:
3658  %s = urem <3 x i64> %d, %e
3659  ret <3 x i64> %s
3660}
3661
3662define <4 x i64> @uv4i64(<4 x i64> %d, <4 x i64> %e) {
3663; CHECK-SD-LABEL: uv4i64:
3664; CHECK-SD:       // %bb.0: // %entry
3665; CHECK-SD-NEXT:    mov x8, v2.d[1]
3666; CHECK-SD-NEXT:    mov x9, v0.d[1]
3667; CHECK-SD-NEXT:    fmov x11, d2
3668; CHECK-SD-NEXT:    fmov x12, d0
3669; CHECK-SD-NEXT:    fmov x14, d3
3670; CHECK-SD-NEXT:    fmov x15, d1
3671; CHECK-SD-NEXT:    mov x17, v3.d[1]
3672; CHECK-SD-NEXT:    mov x18, v1.d[1]
3673; CHECK-SD-NEXT:    udiv x10, x9, x8
3674; CHECK-SD-NEXT:    udiv x13, x12, x11
3675; CHECK-SD-NEXT:    msub x8, x10, x8, x9
3676; CHECK-SD-NEXT:    udiv x16, x15, x14
3677; CHECK-SD-NEXT:    msub x9, x13, x11, x12
3678; CHECK-SD-NEXT:    fmov d0, x9
3679; CHECK-SD-NEXT:    mov v0.d[1], x8
3680; CHECK-SD-NEXT:    udiv x0, x18, x17
3681; CHECK-SD-NEXT:    msub x10, x16, x14, x15
3682; CHECK-SD-NEXT:    fmov d1, x10
3683; CHECK-SD-NEXT:    msub x11, x0, x17, x18
3684; CHECK-SD-NEXT:    mov v1.d[1], x11
3685; CHECK-SD-NEXT:    ret
3686;
3687; CHECK-GI-LABEL: uv4i64:
3688; CHECK-GI:       // %bb.0: // %entry
3689; CHECK-GI-NEXT:    fmov x8, d0
3690; CHECK-GI-NEXT:    fmov x9, d2
3691; CHECK-GI-NEXT:    mov x10, v2.d[1]
3692; CHECK-GI-NEXT:    mov x11, v0.d[1]
3693; CHECK-GI-NEXT:    fmov x12, d1
3694; CHECK-GI-NEXT:    fmov x13, d3
3695; CHECK-GI-NEXT:    mov x14, v3.d[1]
3696; CHECK-GI-NEXT:    mov x15, v1.d[1]
3697; CHECK-GI-NEXT:    udiv x8, x8, x9
3698; CHECK-GI-NEXT:    udiv x12, x12, x13
3699; CHECK-GI-NEXT:    mov v2.d[0], x8
3700; CHECK-GI-NEXT:    udiv x11, x11, x10
3701; CHECK-GI-NEXT:    mov v3.d[0], x12
3702; CHECK-GI-NEXT:    udiv x15, x15, x14
3703; CHECK-GI-NEXT:    mov v2.d[1], x11
3704; CHECK-GI-NEXT:    fmov x8, d2
3705; CHECK-GI-NEXT:    mov x11, v2.d[1]
3706; CHECK-GI-NEXT:    mul x8, x8, x9
3707; CHECK-GI-NEXT:    mul x10, x11, x10
3708; CHECK-GI-NEXT:    mov v2.d[0], x8
3709; CHECK-GI-NEXT:    mov v3.d[1], x15
3710; CHECK-GI-NEXT:    mov v2.d[1], x10
3711; CHECK-GI-NEXT:    fmov x9, d3
3712; CHECK-GI-NEXT:    mov x12, v3.d[1]
3713; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
3714; CHECK-GI-NEXT:    mul x9, x9, x13
3715; CHECK-GI-NEXT:    mul x11, x12, x14
3716; CHECK-GI-NEXT:    mov v3.d[0], x9
3717; CHECK-GI-NEXT:    mov v3.d[1], x11
3718; CHECK-GI-NEXT:    sub v1.2d, v1.2d, v3.2d
3719; CHECK-GI-NEXT:    ret
3720entry:
3721  %s = urem <4 x i64> %d, %e
3722  ret <4 x i64> %s
3723}
3724
3725define <2 x i128> @sv2i128(<2 x i128> %d, <2 x i128> %e) {
3726; CHECK-SD-LABEL: sv2i128:
3727; CHECK-SD:       // %bb.0: // %entry
3728; CHECK-SD-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
3729; CHECK-SD-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
3730; CHECK-SD-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
3731; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
3732; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
3733; CHECK-SD-NEXT:    .cfi_offset w19, -8
3734; CHECK-SD-NEXT:    .cfi_offset w20, -16
3735; CHECK-SD-NEXT:    .cfi_offset w21, -24
3736; CHECK-SD-NEXT:    .cfi_offset w22, -32
3737; CHECK-SD-NEXT:    .cfi_offset w23, -40
3738; CHECK-SD-NEXT:    .cfi_offset w24, -48
3739; CHECK-SD-NEXT:    .cfi_offset w30, -64
3740; CHECK-SD-NEXT:    mov x21, x3
3741; CHECK-SD-NEXT:    mov x22, x2
3742; CHECK-SD-NEXT:    mov x2, x4
3743; CHECK-SD-NEXT:    mov x3, x5
3744; CHECK-SD-NEXT:    mov x19, x7
3745; CHECK-SD-NEXT:    mov x20, x6
3746; CHECK-SD-NEXT:    bl __modti3
3747; CHECK-SD-NEXT:    mov x23, x0
3748; CHECK-SD-NEXT:    mov x24, x1
3749; CHECK-SD-NEXT:    mov x0, x22
3750; CHECK-SD-NEXT:    mov x1, x21
3751; CHECK-SD-NEXT:    mov x2, x20
3752; CHECK-SD-NEXT:    mov x3, x19
3753; CHECK-SD-NEXT:    bl __modti3
3754; CHECK-SD-NEXT:    mov x2, x0
3755; CHECK-SD-NEXT:    mov x3, x1
3756; CHECK-SD-NEXT:    mov x0, x23
3757; CHECK-SD-NEXT:    mov x1, x24
3758; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
3759; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
3760; CHECK-SD-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
3761; CHECK-SD-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
3762; CHECK-SD-NEXT:    ret
3763;
3764; CHECK-GI-LABEL: sv2i128:
3765; CHECK-GI:       // %bb.0: // %entry
3766; CHECK-GI-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
3767; CHECK-GI-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
3768; CHECK-GI-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
3769; CHECK-GI-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
3770; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
3771; CHECK-GI-NEXT:    .cfi_offset w19, -8
3772; CHECK-GI-NEXT:    .cfi_offset w20, -16
3773; CHECK-GI-NEXT:    .cfi_offset w21, -24
3774; CHECK-GI-NEXT:    .cfi_offset w22, -32
3775; CHECK-GI-NEXT:    .cfi_offset w23, -40
3776; CHECK-GI-NEXT:    .cfi_offset w24, -48
3777; CHECK-GI-NEXT:    .cfi_offset w30, -64
3778; CHECK-GI-NEXT:    mov x19, x2
3779; CHECK-GI-NEXT:    mov x20, x3
3780; CHECK-GI-NEXT:    mov x2, x4
3781; CHECK-GI-NEXT:    mov x3, x5
3782; CHECK-GI-NEXT:    mov x21, x6
3783; CHECK-GI-NEXT:    mov x22, x7
3784; CHECK-GI-NEXT:    bl __modti3
3785; CHECK-GI-NEXT:    mov x23, x0
3786; CHECK-GI-NEXT:    mov x24, x1
3787; CHECK-GI-NEXT:    mov x0, x19
3788; CHECK-GI-NEXT:    mov x1, x20
3789; CHECK-GI-NEXT:    mov x2, x21
3790; CHECK-GI-NEXT:    mov x3, x22
3791; CHECK-GI-NEXT:    bl __modti3
3792; CHECK-GI-NEXT:    mov x2, x0
3793; CHECK-GI-NEXT:    mov x3, x1
3794; CHECK-GI-NEXT:    mov x0, x23
3795; CHECK-GI-NEXT:    mov x1, x24
3796; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
3797; CHECK-GI-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
3798; CHECK-GI-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
3799; CHECK-GI-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
3800; CHECK-GI-NEXT:    ret
3801entry:
3802  %s = srem <2 x i128> %d, %e
3803  ret <2 x i128> %s
3804}
3805
3806define <3 x i128> @sv3i128(<3 x i128> %d, <3 x i128> %e) {
3807; CHECK-SD-LABEL: sv3i128:
3808; CHECK-SD:       // %bb.0: // %entry
3809; CHECK-SD-NEXT:    str x30, [sp, #-96]! // 8-byte Folded Spill
3810; CHECK-SD-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
3811; CHECK-SD-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
3812; CHECK-SD-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
3813; CHECK-SD-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
3814; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
3815; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
3816; CHECK-SD-NEXT:    .cfi_offset w19, -8
3817; CHECK-SD-NEXT:    .cfi_offset w20, -16
3818; CHECK-SD-NEXT:    .cfi_offset w21, -24
3819; CHECK-SD-NEXT:    .cfi_offset w22, -32
3820; CHECK-SD-NEXT:    .cfi_offset w23, -40
3821; CHECK-SD-NEXT:    .cfi_offset w24, -48
3822; CHECK-SD-NEXT:    .cfi_offset w25, -56
3823; CHECK-SD-NEXT:    .cfi_offset w26, -64
3824; CHECK-SD-NEXT:    .cfi_offset w27, -72
3825; CHECK-SD-NEXT:    .cfi_offset w28, -80
3826; CHECK-SD-NEXT:    .cfi_offset w30, -96
3827; CHECK-SD-NEXT:    ldp x23, x24, [sp, #112]
3828; CHECK-SD-NEXT:    mov x21, x3
3829; CHECK-SD-NEXT:    ldp x25, x26, [sp, #96]
3830; CHECK-SD-NEXT:    mov x22, x2
3831; CHECK-SD-NEXT:    mov x2, x6
3832; CHECK-SD-NEXT:    mov x3, x7
3833; CHECK-SD-NEXT:    mov x19, x5
3834; CHECK-SD-NEXT:    mov x20, x4
3835; CHECK-SD-NEXT:    bl __modti3
3836; CHECK-SD-NEXT:    mov x27, x0
3837; CHECK-SD-NEXT:    mov x28, x1
3838; CHECK-SD-NEXT:    mov x0, x22
3839; CHECK-SD-NEXT:    mov x1, x21
3840; CHECK-SD-NEXT:    mov x2, x25
3841; CHECK-SD-NEXT:    mov x3, x26
3842; CHECK-SD-NEXT:    bl __modti3
3843; CHECK-SD-NEXT:    mov x21, x0
3844; CHECK-SD-NEXT:    mov x22, x1
3845; CHECK-SD-NEXT:    mov x0, x20
3846; CHECK-SD-NEXT:    mov x1, x19
3847; CHECK-SD-NEXT:    mov x2, x23
3848; CHECK-SD-NEXT:    mov x3, x24
3849; CHECK-SD-NEXT:    bl __modti3
3850; CHECK-SD-NEXT:    mov x4, x0
3851; CHECK-SD-NEXT:    mov x5, x1
3852; CHECK-SD-NEXT:    mov x0, x27
3853; CHECK-SD-NEXT:    mov x1, x28
3854; CHECK-SD-NEXT:    mov x2, x21
3855; CHECK-SD-NEXT:    mov x3, x22
3856; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
3857; CHECK-SD-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
3858; CHECK-SD-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
3859; CHECK-SD-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
3860; CHECK-SD-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
3861; CHECK-SD-NEXT:    ldr x30, [sp], #96 // 8-byte Folded Reload
3862; CHECK-SD-NEXT:    ret
3863;
3864; CHECK-GI-LABEL: sv3i128:
3865; CHECK-GI:       // %bb.0: // %entry
3866; CHECK-GI-NEXT:    str x30, [sp, #-96]! // 8-byte Folded Spill
3867; CHECK-GI-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
3868; CHECK-GI-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
3869; CHECK-GI-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
3870; CHECK-GI-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
3871; CHECK-GI-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
3872; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
3873; CHECK-GI-NEXT:    .cfi_offset w19, -8
3874; CHECK-GI-NEXT:    .cfi_offset w20, -16
3875; CHECK-GI-NEXT:    .cfi_offset w21, -24
3876; CHECK-GI-NEXT:    .cfi_offset w22, -32
3877; CHECK-GI-NEXT:    .cfi_offset w23, -40
3878; CHECK-GI-NEXT:    .cfi_offset w24, -48
3879; CHECK-GI-NEXT:    .cfi_offset w25, -56
3880; CHECK-GI-NEXT:    .cfi_offset w26, -64
3881; CHECK-GI-NEXT:    .cfi_offset w27, -72
3882; CHECK-GI-NEXT:    .cfi_offset w28, -80
3883; CHECK-GI-NEXT:    .cfi_offset w30, -96
3884; CHECK-GI-NEXT:    ldp x23, x24, [sp, #96]
3885; CHECK-GI-NEXT:    mov x19, x2
3886; CHECK-GI-NEXT:    ldp x25, x26, [sp, #112]
3887; CHECK-GI-NEXT:    mov x20, x3
3888; CHECK-GI-NEXT:    mov x2, x6
3889; CHECK-GI-NEXT:    mov x3, x7
3890; CHECK-GI-NEXT:    mov x21, x4
3891; CHECK-GI-NEXT:    mov x22, x5
3892; CHECK-GI-NEXT:    bl __modti3
3893; CHECK-GI-NEXT:    mov x27, x0
3894; CHECK-GI-NEXT:    mov x28, x1
3895; CHECK-GI-NEXT:    mov x0, x19
3896; CHECK-GI-NEXT:    mov x1, x20
3897; CHECK-GI-NEXT:    mov x2, x23
3898; CHECK-GI-NEXT:    mov x3, x24
3899; CHECK-GI-NEXT:    bl __modti3
3900; CHECK-GI-NEXT:    mov x19, x0
3901; CHECK-GI-NEXT:    mov x20, x1
3902; CHECK-GI-NEXT:    mov x0, x21
3903; CHECK-GI-NEXT:    mov x1, x22
3904; CHECK-GI-NEXT:    mov x2, x25
3905; CHECK-GI-NEXT:    mov x3, x26
3906; CHECK-GI-NEXT:    bl __modti3
3907; CHECK-GI-NEXT:    mov x4, x0
3908; CHECK-GI-NEXT:    mov x5, x1
3909; CHECK-GI-NEXT:    mov x0, x27
3910; CHECK-GI-NEXT:    mov x1, x28
3911; CHECK-GI-NEXT:    mov x2, x19
3912; CHECK-GI-NEXT:    mov x3, x20
3913; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
3914; CHECK-GI-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
3915; CHECK-GI-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
3916; CHECK-GI-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
3917; CHECK-GI-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
3918; CHECK-GI-NEXT:    ldr x30, [sp], #96 // 8-byte Folded Reload
3919; CHECK-GI-NEXT:    ret
3920entry:
3921  %s = srem <3 x i128> %d, %e
3922  ret <3 x i128> %s
3923}
3924
3925define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) {
3926; CHECK-SD-LABEL: sv4i128:
3927; CHECK-SD:       // %bb.0: // %entry
3928; CHECK-SD-NEXT:    sub sp, sp, #128
3929; CHECK-SD-NEXT:    stp x29, x30, [sp, #32] // 16-byte Folded Spill
3930; CHECK-SD-NEXT:    stp x28, x27, [sp, #48] // 16-byte Folded Spill
3931; CHECK-SD-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
3932; CHECK-SD-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
3933; CHECK-SD-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
3934; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
3935; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
3936; CHECK-SD-NEXT:    .cfi_offset w19, -8
3937; CHECK-SD-NEXT:    .cfi_offset w20, -16
3938; CHECK-SD-NEXT:    .cfi_offset w21, -24
3939; CHECK-SD-NEXT:    .cfi_offset w22, -32
3940; CHECK-SD-NEXT:    .cfi_offset w23, -40
3941; CHECK-SD-NEXT:    .cfi_offset w24, -48
3942; CHECK-SD-NEXT:    .cfi_offset w25, -56
3943; CHECK-SD-NEXT:    .cfi_offset w26, -64
3944; CHECK-SD-NEXT:    .cfi_offset w27, -72
3945; CHECK-SD-NEXT:    .cfi_offset w28, -80
3946; CHECK-SD-NEXT:    .cfi_offset w30, -88
3947; CHECK-SD-NEXT:    .cfi_offset w29, -96
3948; CHECK-SD-NEXT:    mov x23, x3
3949; CHECK-SD-NEXT:    mov x24, x2
3950; CHECK-SD-NEXT:    stp x6, x7, [sp, #16] // 16-byte Folded Spill
3951; CHECK-SD-NEXT:    ldp x8, x26, [sp, #176]
3952; CHECK-SD-NEXT:    mov x21, x5
3953; CHECK-SD-NEXT:    ldp x2, x3, [sp, #128]
3954; CHECK-SD-NEXT:    mov x22, x4
3955; CHECK-SD-NEXT:    ldp x27, x28, [sp, #160]
3956; CHECK-SD-NEXT:    ldp x29, x19, [sp, #144]
3957; CHECK-SD-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
3958; CHECK-SD-NEXT:    bl __modti3
3959; CHECK-SD-NEXT:    mov x20, x0
3960; CHECK-SD-NEXT:    mov x25, x1
3961; CHECK-SD-NEXT:    mov x0, x24
3962; CHECK-SD-NEXT:    mov x1, x23
3963; CHECK-SD-NEXT:    mov x2, x29
3964; CHECK-SD-NEXT:    mov x3, x19
3965; CHECK-SD-NEXT:    bl __modti3
3966; CHECK-SD-NEXT:    mov x19, x0
3967; CHECK-SD-NEXT:    mov x23, x1
3968; CHECK-SD-NEXT:    mov x0, x22
3969; CHECK-SD-NEXT:    mov x1, x21
3970; CHECK-SD-NEXT:    mov x2, x27
3971; CHECK-SD-NEXT:    mov x3, x28
3972; CHECK-SD-NEXT:    bl __modti3
3973; CHECK-SD-NEXT:    mov x21, x0
3974; CHECK-SD-NEXT:    mov x22, x1
3975; CHECK-SD-NEXT:    ldr x2, [sp, #8] // 8-byte Folded Reload
3976; CHECK-SD-NEXT:    ldp x0, x1, [sp, #16] // 16-byte Folded Reload
3977; CHECK-SD-NEXT:    mov x3, x26
3978; CHECK-SD-NEXT:    bl __modti3
3979; CHECK-SD-NEXT:    mov x6, x0
3980; CHECK-SD-NEXT:    mov x7, x1
3981; CHECK-SD-NEXT:    mov x0, x20
3982; CHECK-SD-NEXT:    mov x1, x25
3983; CHECK-SD-NEXT:    mov x2, x19
3984; CHECK-SD-NEXT:    mov x3, x23
3985; CHECK-SD-NEXT:    mov x4, x21
3986; CHECK-SD-NEXT:    mov x5, x22
3987; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
3988; CHECK-SD-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
3989; CHECK-SD-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
3990; CHECK-SD-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
3991; CHECK-SD-NEXT:    ldp x28, x27, [sp, #48] // 16-byte Folded Reload
3992; CHECK-SD-NEXT:    ldp x29, x30, [sp, #32] // 16-byte Folded Reload
3993; CHECK-SD-NEXT:    add sp, sp, #128
3994; CHECK-SD-NEXT:    ret
3995;
3996; CHECK-GI-LABEL: sv4i128:
3997; CHECK-GI:       // %bb.0: // %entry
3998; CHECK-GI-NEXT:    sub sp, sp, #128
3999; CHECK-GI-NEXT:    stp x29, x30, [sp, #32] // 16-byte Folded Spill
4000; CHECK-GI-NEXT:    stp x28, x27, [sp, #48] // 16-byte Folded Spill
4001; CHECK-GI-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
4002; CHECK-GI-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
4003; CHECK-GI-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
4004; CHECK-GI-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
4005; CHECK-GI-NEXT:    .cfi_def_cfa_offset 128
4006; CHECK-GI-NEXT:    .cfi_offset w19, -8
4007; CHECK-GI-NEXT:    .cfi_offset w20, -16
4008; CHECK-GI-NEXT:    .cfi_offset w21, -24
4009; CHECK-GI-NEXT:    .cfi_offset w22, -32
4010; CHECK-GI-NEXT:    .cfi_offset w23, -40
4011; CHECK-GI-NEXT:    .cfi_offset w24, -48
4012; CHECK-GI-NEXT:    .cfi_offset w25, -56
4013; CHECK-GI-NEXT:    .cfi_offset w26, -64
4014; CHECK-GI-NEXT:    .cfi_offset w27, -72
4015; CHECK-GI-NEXT:    .cfi_offset w28, -80
4016; CHECK-GI-NEXT:    .cfi_offset w30, -88
4017; CHECK-GI-NEXT:    .cfi_offset w29, -96
4018; CHECK-GI-NEXT:    mov x19, x2
4019; CHECK-GI-NEXT:    mov x20, x3
4020; CHECK-GI-NEXT:    mov x21, x4
4021; CHECK-GI-NEXT:    ldp x2, x3, [sp, #128]
4022; CHECK-GI-NEXT:    mov x22, x5
4023; CHECK-GI-NEXT:    ldp x9, x8, [sp, #176]
4024; CHECK-GI-NEXT:    mov x23, x7
4025; CHECK-GI-NEXT:    ldp x24, x25, [sp, #144]
4026; CHECK-GI-NEXT:    ldp x26, x27, [sp, #160]
4027; CHECK-GI-NEXT:    stp x9, x6, [sp, #16] // 16-byte Folded Spill
4028; CHECK-GI-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
4029; CHECK-GI-NEXT:    bl __modti3
4030; CHECK-GI-NEXT:    mov x28, x0
4031; CHECK-GI-NEXT:    mov x29, x1
4032; CHECK-GI-NEXT:    mov x0, x19
4033; CHECK-GI-NEXT:    mov x1, x20
4034; CHECK-GI-NEXT:    mov x2, x24
4035; CHECK-GI-NEXT:    mov x3, x25
4036; CHECK-GI-NEXT:    bl __modti3
4037; CHECK-GI-NEXT:    mov x19, x0
4038; CHECK-GI-NEXT:    mov x20, x1
4039; CHECK-GI-NEXT:    mov x0, x21
4040; CHECK-GI-NEXT:    mov x1, x22
4041; CHECK-GI-NEXT:    mov x2, x26
4042; CHECK-GI-NEXT:    mov x3, x27
4043; CHECK-GI-NEXT:    bl __modti3
4044; CHECK-GI-NEXT:    mov x21, x0
4045; CHECK-GI-NEXT:    ldp x2, x0, [sp, #16] // 16-byte Folded Reload
4046; CHECK-GI-NEXT:    ldr x3, [sp, #8] // 8-byte Folded Reload
4047; CHECK-GI-NEXT:    mov x22, x1
4048; CHECK-GI-NEXT:    mov x1, x23
4049; CHECK-GI-NEXT:    bl __modti3
4050; CHECK-GI-NEXT:    mov x6, x0
4051; CHECK-GI-NEXT:    mov x7, x1
4052; CHECK-GI-NEXT:    mov x0, x28
4053; CHECK-GI-NEXT:    mov x1, x29
4054; CHECK-GI-NEXT:    mov x2, x19
4055; CHECK-GI-NEXT:    mov x3, x20
4056; CHECK-GI-NEXT:    mov x4, x21
4057; CHECK-GI-NEXT:    mov x5, x22
4058; CHECK-GI-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
4059; CHECK-GI-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
4060; CHECK-GI-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
4061; CHECK-GI-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
4062; CHECK-GI-NEXT:    ldp x28, x27, [sp, #48] // 16-byte Folded Reload
4063; CHECK-GI-NEXT:    ldp x29, x30, [sp, #32] // 16-byte Folded Reload
4064; CHECK-GI-NEXT:    add sp, sp, #128
4065; CHECK-GI-NEXT:    ret
4066entry:
4067  %s = srem <4 x i128> %d, %e
4068  ret <4 x i128> %s
4069}
4070
4071define <2 x i128> @uv2i128(<2 x i128> %d, <2 x i128> %e) {
4072; CHECK-SD-LABEL: uv2i128:
4073; CHECK-SD:       // %bb.0: // %entry
4074; CHECK-SD-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
4075; CHECK-SD-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
4076; CHECK-SD-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
4077; CHECK-SD-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
4078; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
4079; CHECK-SD-NEXT:    .cfi_offset w19, -8
4080; CHECK-SD-NEXT:    .cfi_offset w20, -16
4081; CHECK-SD-NEXT:    .cfi_offset w21, -24
4082; CHECK-SD-NEXT:    .cfi_offset w22, -32
4083; CHECK-SD-NEXT:    .cfi_offset w23, -40
4084; CHECK-SD-NEXT:    .cfi_offset w24, -48
4085; CHECK-SD-NEXT:    .cfi_offset w30, -64
4086; CHECK-SD-NEXT:    mov x21, x3
4087; CHECK-SD-NEXT:    mov x22, x2
4088; CHECK-SD-NEXT:    mov x2, x4
4089; CHECK-SD-NEXT:    mov x3, x5
4090; CHECK-SD-NEXT:    mov x19, x7
4091; CHECK-SD-NEXT:    mov x20, x6
4092; CHECK-SD-NEXT:    bl __umodti3
4093; CHECK-SD-NEXT:    mov x23, x0
4094; CHECK-SD-NEXT:    mov x24, x1
4095; CHECK-SD-NEXT:    mov x0, x22
4096; CHECK-SD-NEXT:    mov x1, x21
4097; CHECK-SD-NEXT:    mov x2, x20
4098; CHECK-SD-NEXT:    mov x3, x19
4099; CHECK-SD-NEXT:    bl __umodti3
4100; CHECK-SD-NEXT:    mov x2, x0
4101; CHECK-SD-NEXT:    mov x3, x1
4102; CHECK-SD-NEXT:    mov x0, x23
4103; CHECK-SD-NEXT:    mov x1, x24
4104; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
4105; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
4106; CHECK-SD-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
4107; CHECK-SD-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
4108; CHECK-SD-NEXT:    ret
4109;
4110; CHECK-GI-LABEL: uv2i128:
4111; CHECK-GI:       // %bb.0: // %entry
4112; CHECK-GI-NEXT:    str x30, [sp, #-64]! // 8-byte Folded Spill
4113; CHECK-GI-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
4114; CHECK-GI-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
4115; CHECK-GI-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
4116; CHECK-GI-NEXT:    .cfi_def_cfa_offset 64
4117; CHECK-GI-NEXT:    .cfi_offset w19, -8
4118; CHECK-GI-NEXT:    .cfi_offset w20, -16
4119; CHECK-GI-NEXT:    .cfi_offset w21, -24
4120; CHECK-GI-NEXT:    .cfi_offset w22, -32
4121; CHECK-GI-NEXT:    .cfi_offset w23, -40
4122; CHECK-GI-NEXT:    .cfi_offset w24, -48
4123; CHECK-GI-NEXT:    .cfi_offset w30, -64
4124; CHECK-GI-NEXT:    mov x19, x2
4125; CHECK-GI-NEXT:    mov x20, x3
4126; CHECK-GI-NEXT:    mov x2, x4
4127; CHECK-GI-NEXT:    mov x3, x5
4128; CHECK-GI-NEXT:    mov x21, x6
4129; CHECK-GI-NEXT:    mov x22, x7
4130; CHECK-GI-NEXT:    bl __umodti3
4131; CHECK-GI-NEXT:    mov x23, x0
4132; CHECK-GI-NEXT:    mov x24, x1
4133; CHECK-GI-NEXT:    mov x0, x19
4134; CHECK-GI-NEXT:    mov x1, x20
4135; CHECK-GI-NEXT:    mov x2, x21
4136; CHECK-GI-NEXT:    mov x3, x22
4137; CHECK-GI-NEXT:    bl __umodti3
4138; CHECK-GI-NEXT:    mov x2, x0
4139; CHECK-GI-NEXT:    mov x3, x1
4140; CHECK-GI-NEXT:    mov x0, x23
4141; CHECK-GI-NEXT:    mov x1, x24
4142; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
4143; CHECK-GI-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
4144; CHECK-GI-NEXT:    ldp x24, x23, [sp, #16] // 16-byte Folded Reload
4145; CHECK-GI-NEXT:    ldr x30, [sp], #64 // 8-byte Folded Reload
4146; CHECK-GI-NEXT:    ret
4147entry:
4148  %s = urem <2 x i128> %d, %e
4149  ret <2 x i128> %s
4150}
4151
4152define <3 x i128> @uv3i128(<3 x i128> %d, <3 x i128> %e) {
4153; CHECK-SD-LABEL: uv3i128:
4154; CHECK-SD:       // %bb.0: // %entry
4155; CHECK-SD-NEXT:    str x30, [sp, #-96]! // 8-byte Folded Spill
4156; CHECK-SD-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
4157; CHECK-SD-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
4158; CHECK-SD-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
4159; CHECK-SD-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
4160; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
4161; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
4162; CHECK-SD-NEXT:    .cfi_offset w19, -8
4163; CHECK-SD-NEXT:    .cfi_offset w20, -16
4164; CHECK-SD-NEXT:    .cfi_offset w21, -24
4165; CHECK-SD-NEXT:    .cfi_offset w22, -32
4166; CHECK-SD-NEXT:    .cfi_offset w23, -40
4167; CHECK-SD-NEXT:    .cfi_offset w24, -48
4168; CHECK-SD-NEXT:    .cfi_offset w25, -56
4169; CHECK-SD-NEXT:    .cfi_offset w26, -64
4170; CHECK-SD-NEXT:    .cfi_offset w27, -72
4171; CHECK-SD-NEXT:    .cfi_offset w28, -80
4172; CHECK-SD-NEXT:    .cfi_offset w30, -96
4173; CHECK-SD-NEXT:    ldp x23, x24, [sp, #112]
4174; CHECK-SD-NEXT:    mov x21, x3
4175; CHECK-SD-NEXT:    ldp x25, x26, [sp, #96]
4176; CHECK-SD-NEXT:    mov x22, x2
4177; CHECK-SD-NEXT:    mov x2, x6
4178; CHECK-SD-NEXT:    mov x3, x7
4179; CHECK-SD-NEXT:    mov x19, x5
4180; CHECK-SD-NEXT:    mov x20, x4
4181; CHECK-SD-NEXT:    bl __umodti3
4182; CHECK-SD-NEXT:    mov x27, x0
4183; CHECK-SD-NEXT:    mov x28, x1
4184; CHECK-SD-NEXT:    mov x0, x22
4185; CHECK-SD-NEXT:    mov x1, x21
4186; CHECK-SD-NEXT:    mov x2, x25
4187; CHECK-SD-NEXT:    mov x3, x26
4188; CHECK-SD-NEXT:    bl __umodti3
4189; CHECK-SD-NEXT:    mov x21, x0
4190; CHECK-SD-NEXT:    mov x22, x1
4191; CHECK-SD-NEXT:    mov x0, x20
4192; CHECK-SD-NEXT:    mov x1, x19
4193; CHECK-SD-NEXT:    mov x2, x23
4194; CHECK-SD-NEXT:    mov x3, x24
4195; CHECK-SD-NEXT:    bl __umodti3
4196; CHECK-SD-NEXT:    mov x4, x0
4197; CHECK-SD-NEXT:    mov x5, x1
4198; CHECK-SD-NEXT:    mov x0, x27
4199; CHECK-SD-NEXT:    mov x1, x28
4200; CHECK-SD-NEXT:    mov x2, x21
4201; CHECK-SD-NEXT:    mov x3, x22
4202; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
4203; CHECK-SD-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
4204; CHECK-SD-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
4205; CHECK-SD-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
4206; CHECK-SD-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
4207; CHECK-SD-NEXT:    ldr x30, [sp], #96 // 8-byte Folded Reload
4208; CHECK-SD-NEXT:    ret
4209;
4210; CHECK-GI-LABEL: uv3i128:
4211; CHECK-GI:       // %bb.0: // %entry
4212; CHECK-GI-NEXT:    str x30, [sp, #-96]! // 8-byte Folded Spill
4213; CHECK-GI-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
4214; CHECK-GI-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
4215; CHECK-GI-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
4216; CHECK-GI-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
4217; CHECK-GI-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
4218; CHECK-GI-NEXT:    .cfi_def_cfa_offset 96
4219; CHECK-GI-NEXT:    .cfi_offset w19, -8
4220; CHECK-GI-NEXT:    .cfi_offset w20, -16
4221; CHECK-GI-NEXT:    .cfi_offset w21, -24
4222; CHECK-GI-NEXT:    .cfi_offset w22, -32
4223; CHECK-GI-NEXT:    .cfi_offset w23, -40
4224; CHECK-GI-NEXT:    .cfi_offset w24, -48
4225; CHECK-GI-NEXT:    .cfi_offset w25, -56
4226; CHECK-GI-NEXT:    .cfi_offset w26, -64
4227; CHECK-GI-NEXT:    .cfi_offset w27, -72
4228; CHECK-GI-NEXT:    .cfi_offset w28, -80
4229; CHECK-GI-NEXT:    .cfi_offset w30, -96
4230; CHECK-GI-NEXT:    ldp x23, x24, [sp, #96]
4231; CHECK-GI-NEXT:    mov x19, x2
4232; CHECK-GI-NEXT:    ldp x25, x26, [sp, #112]
4233; CHECK-GI-NEXT:    mov x20, x3
4234; CHECK-GI-NEXT:    mov x2, x6
4235; CHECK-GI-NEXT:    mov x3, x7
4236; CHECK-GI-NEXT:    mov x21, x4
4237; CHECK-GI-NEXT:    mov x22, x5
4238; CHECK-GI-NEXT:    bl __umodti3
4239; CHECK-GI-NEXT:    mov x27, x0
4240; CHECK-GI-NEXT:    mov x28, x1
4241; CHECK-GI-NEXT:    mov x0, x19
4242; CHECK-GI-NEXT:    mov x1, x20
4243; CHECK-GI-NEXT:    mov x2, x23
4244; CHECK-GI-NEXT:    mov x3, x24
4245; CHECK-GI-NEXT:    bl __umodti3
4246; CHECK-GI-NEXT:    mov x19, x0
4247; CHECK-GI-NEXT:    mov x20, x1
4248; CHECK-GI-NEXT:    mov x0, x21
4249; CHECK-GI-NEXT:    mov x1, x22
4250; CHECK-GI-NEXT:    mov x2, x25
4251; CHECK-GI-NEXT:    mov x3, x26
4252; CHECK-GI-NEXT:    bl __umodti3
4253; CHECK-GI-NEXT:    mov x4, x0
4254; CHECK-GI-NEXT:    mov x5, x1
4255; CHECK-GI-NEXT:    mov x0, x27
4256; CHECK-GI-NEXT:    mov x1, x28
4257; CHECK-GI-NEXT:    mov x2, x19
4258; CHECK-GI-NEXT:    mov x3, x20
4259; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
4260; CHECK-GI-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
4261; CHECK-GI-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
4262; CHECK-GI-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
4263; CHECK-GI-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
4264; CHECK-GI-NEXT:    ldr x30, [sp], #96 // 8-byte Folded Reload
4265; CHECK-GI-NEXT:    ret
4266entry:
4267  %s = urem <3 x i128> %d, %e
4268  ret <3 x i128> %s
4269}
4270
4271define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) {
4272; CHECK-SD-LABEL: uv4i128:
4273; CHECK-SD:       // %bb.0: // %entry
4274; CHECK-SD-NEXT:    sub sp, sp, #128
4275; CHECK-SD-NEXT:    stp x29, x30, [sp, #32] // 16-byte Folded Spill
4276; CHECK-SD-NEXT:    stp x28, x27, [sp, #48] // 16-byte Folded Spill
4277; CHECK-SD-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
4278; CHECK-SD-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
4279; CHECK-SD-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
4280; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
4281; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
4282; CHECK-SD-NEXT:    .cfi_offset w19, -8
4283; CHECK-SD-NEXT:    .cfi_offset w20, -16
4284; CHECK-SD-NEXT:    .cfi_offset w21, -24
4285; CHECK-SD-NEXT:    .cfi_offset w22, -32
4286; CHECK-SD-NEXT:    .cfi_offset w23, -40
4287; CHECK-SD-NEXT:    .cfi_offset w24, -48
4288; CHECK-SD-NEXT:    .cfi_offset w25, -56
4289; CHECK-SD-NEXT:    .cfi_offset w26, -64
4290; CHECK-SD-NEXT:    .cfi_offset w27, -72
4291; CHECK-SD-NEXT:    .cfi_offset w28, -80
4292; CHECK-SD-NEXT:    .cfi_offset w30, -88
4293; CHECK-SD-NEXT:    .cfi_offset w29, -96
4294; CHECK-SD-NEXT:    mov x23, x3
4295; CHECK-SD-NEXT:    mov x24, x2
4296; CHECK-SD-NEXT:    stp x6, x7, [sp, #16] // 16-byte Folded Spill
4297; CHECK-SD-NEXT:    ldp x8, x26, [sp, #176]
4298; CHECK-SD-NEXT:    mov x21, x5
4299; CHECK-SD-NEXT:    ldp x2, x3, [sp, #128]
4300; CHECK-SD-NEXT:    mov x22, x4
4301; CHECK-SD-NEXT:    ldp x27, x28, [sp, #160]
4302; CHECK-SD-NEXT:    ldp x29, x19, [sp, #144]
4303; CHECK-SD-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
4304; CHECK-SD-NEXT:    bl __umodti3
4305; CHECK-SD-NEXT:    mov x20, x0
4306; CHECK-SD-NEXT:    mov x25, x1
4307; CHECK-SD-NEXT:    mov x0, x24
4308; CHECK-SD-NEXT:    mov x1, x23
4309; CHECK-SD-NEXT:    mov x2, x29
4310; CHECK-SD-NEXT:    mov x3, x19
4311; CHECK-SD-NEXT:    bl __umodti3
4312; CHECK-SD-NEXT:    mov x19, x0
4313; CHECK-SD-NEXT:    mov x23, x1
4314; CHECK-SD-NEXT:    mov x0, x22
4315; CHECK-SD-NEXT:    mov x1, x21
4316; CHECK-SD-NEXT:    mov x2, x27
4317; CHECK-SD-NEXT:    mov x3, x28
4318; CHECK-SD-NEXT:    bl __umodti3
4319; CHECK-SD-NEXT:    mov x21, x0
4320; CHECK-SD-NEXT:    mov x22, x1
4321; CHECK-SD-NEXT:    ldr x2, [sp, #8] // 8-byte Folded Reload
4322; CHECK-SD-NEXT:    ldp x0, x1, [sp, #16] // 16-byte Folded Reload
4323; CHECK-SD-NEXT:    mov x3, x26
4324; CHECK-SD-NEXT:    bl __umodti3
4325; CHECK-SD-NEXT:    mov x6, x0
4326; CHECK-SD-NEXT:    mov x7, x1
4327; CHECK-SD-NEXT:    mov x0, x20
4328; CHECK-SD-NEXT:    mov x1, x25
4329; CHECK-SD-NEXT:    mov x2, x19
4330; CHECK-SD-NEXT:    mov x3, x23
4331; CHECK-SD-NEXT:    mov x4, x21
4332; CHECK-SD-NEXT:    mov x5, x22
4333; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
4334; CHECK-SD-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
4335; CHECK-SD-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
4336; CHECK-SD-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
4337; CHECK-SD-NEXT:    ldp x28, x27, [sp, #48] // 16-byte Folded Reload
4338; CHECK-SD-NEXT:    ldp x29, x30, [sp, #32] // 16-byte Folded Reload
4339; CHECK-SD-NEXT:    add sp, sp, #128
4340; CHECK-SD-NEXT:    ret
4341;
4342; CHECK-GI-LABEL: uv4i128:
4343; CHECK-GI:       // %bb.0: // %entry
4344; CHECK-GI-NEXT:    sub sp, sp, #128
4345; CHECK-GI-NEXT:    stp x29, x30, [sp, #32] // 16-byte Folded Spill
4346; CHECK-GI-NEXT:    stp x28, x27, [sp, #48] // 16-byte Folded Spill
4347; CHECK-GI-NEXT:    stp x26, x25, [sp, #64] // 16-byte Folded Spill
4348; CHECK-GI-NEXT:    stp x24, x23, [sp, #80] // 16-byte Folded Spill
4349; CHECK-GI-NEXT:    stp x22, x21, [sp, #96] // 16-byte Folded Spill
4350; CHECK-GI-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
4351; CHECK-GI-NEXT:    .cfi_def_cfa_offset 128
4352; CHECK-GI-NEXT:    .cfi_offset w19, -8
4353; CHECK-GI-NEXT:    .cfi_offset w20, -16
4354; CHECK-GI-NEXT:    .cfi_offset w21, -24
4355; CHECK-GI-NEXT:    .cfi_offset w22, -32
4356; CHECK-GI-NEXT:    .cfi_offset w23, -40
4357; CHECK-GI-NEXT:    .cfi_offset w24, -48
4358; CHECK-GI-NEXT:    .cfi_offset w25, -56
4359; CHECK-GI-NEXT:    .cfi_offset w26, -64
4360; CHECK-GI-NEXT:    .cfi_offset w27, -72
4361; CHECK-GI-NEXT:    .cfi_offset w28, -80
4362; CHECK-GI-NEXT:    .cfi_offset w30, -88
4363; CHECK-GI-NEXT:    .cfi_offset w29, -96
4364; CHECK-GI-NEXT:    mov x19, x2
4365; CHECK-GI-NEXT:    mov x20, x3
4366; CHECK-GI-NEXT:    mov x21, x4
4367; CHECK-GI-NEXT:    ldp x2, x3, [sp, #128]
4368; CHECK-GI-NEXT:    mov x22, x5
4369; CHECK-GI-NEXT:    ldp x9, x8, [sp, #176]
4370; CHECK-GI-NEXT:    mov x23, x7
4371; CHECK-GI-NEXT:    ldp x24, x25, [sp, #144]
4372; CHECK-GI-NEXT:    ldp x26, x27, [sp, #160]
4373; CHECK-GI-NEXT:    stp x9, x6, [sp, #16] // 16-byte Folded Spill
4374; CHECK-GI-NEXT:    str x8, [sp, #8] // 8-byte Folded Spill
4375; CHECK-GI-NEXT:    bl __umodti3
4376; CHECK-GI-NEXT:    mov x28, x0
4377; CHECK-GI-NEXT:    mov x29, x1
4378; CHECK-GI-NEXT:    mov x0, x19
4379; CHECK-GI-NEXT:    mov x1, x20
4380; CHECK-GI-NEXT:    mov x2, x24
4381; CHECK-GI-NEXT:    mov x3, x25
4382; CHECK-GI-NEXT:    bl __umodti3
4383; CHECK-GI-NEXT:    mov x19, x0
4384; CHECK-GI-NEXT:    mov x20, x1
4385; CHECK-GI-NEXT:    mov x0, x21
4386; CHECK-GI-NEXT:    mov x1, x22
4387; CHECK-GI-NEXT:    mov x2, x26
4388; CHECK-GI-NEXT:    mov x3, x27
4389; CHECK-GI-NEXT:    bl __umodti3
4390; CHECK-GI-NEXT:    mov x21, x0
4391; CHECK-GI-NEXT:    ldp x2, x0, [sp, #16] // 16-byte Folded Reload
4392; CHECK-GI-NEXT:    ldr x3, [sp, #8] // 8-byte Folded Reload
4393; CHECK-GI-NEXT:    mov x22, x1
4394; CHECK-GI-NEXT:    mov x1, x23
4395; CHECK-GI-NEXT:    bl __umodti3
4396; CHECK-GI-NEXT:    mov x6, x0
4397; CHECK-GI-NEXT:    mov x7, x1
4398; CHECK-GI-NEXT:    mov x0, x28
4399; CHECK-GI-NEXT:    mov x1, x29
4400; CHECK-GI-NEXT:    mov x2, x19
4401; CHECK-GI-NEXT:    mov x3, x20
4402; CHECK-GI-NEXT:    mov x4, x21
4403; CHECK-GI-NEXT:    mov x5, x22
4404; CHECK-GI-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
4405; CHECK-GI-NEXT:    ldp x22, x21, [sp, #96] // 16-byte Folded Reload
4406; CHECK-GI-NEXT:    ldp x24, x23, [sp, #80] // 16-byte Folded Reload
4407; CHECK-GI-NEXT:    ldp x26, x25, [sp, #64] // 16-byte Folded Reload
4408; CHECK-GI-NEXT:    ldp x28, x27, [sp, #48] // 16-byte Folded Reload
4409; CHECK-GI-NEXT:    ldp x29, x30, [sp, #32] // 16-byte Folded Reload
4410; CHECK-GI-NEXT:    add sp, sp, #128
4411; CHECK-GI-NEXT:    ret
4412entry:
4413  %s = urem <4 x i128> %d, %e
4414  ret <4 x i128> %s
4415}
4416