xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; SREM
10;
11
12define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
13; CHECK-LABEL: srem_v4i8:
14; CHECK:       // %bb.0:
15; CHECK-NEXT:    ptrue p0.h, vl4
16; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
17; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
18; CHECK-NEXT:    ptrue p1.s, vl4
19; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
20; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
21; CHECK-NEXT:    sunpklo z2.s, z1.h
22; CHECK-NEXT:    sunpklo z3.s, z0.h
23; CHECK-NEXT:    sdivr z2.s, p1/m, z2.s, z3.s
24; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
25; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
26; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
27; CHECK-NEXT:    ret
28;
29; NONEON-NOSVE-LABEL: srem_v4i8:
30; NONEON-NOSVE:       // %bb.0:
31; NONEON-NOSVE-NEXT:    sub sp, sp, #32
32; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
33; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
34; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #22]
35; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
36; NONEON-NOSVE-NEXT:    ldrsb w11, [sp, #20]
37; NONEON-NOSVE-NEXT:    ldrsb w12, [sp, #12]
38; NONEON-NOSVE-NEXT:    ldrsb w14, [sp, #18]
39; NONEON-NOSVE-NEXT:    ldrsb w15, [sp, #10]
40; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
41; NONEON-NOSVE-NEXT:    ldrsb w17, [sp, #16]
42; NONEON-NOSVE-NEXT:    ldrsb w18, [sp, #8]
43; NONEON-NOSVE-NEXT:    sdiv w13, w12, w11
44; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
45; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
46; NONEON-NOSVE-NEXT:    sdiv w16, w15, w14
47; NONEON-NOSVE-NEXT:    msub w9, w13, w11, w12
48; NONEON-NOSVE-NEXT:    strh w9, [sp, #28]
49; NONEON-NOSVE-NEXT:    sdiv w0, w18, w17
50; NONEON-NOSVE-NEXT:    msub w10, w16, w14, w15
51; NONEON-NOSVE-NEXT:    strh w10, [sp, #26]
52; NONEON-NOSVE-NEXT:    msub w8, w0, w17, w18
53; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
54; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
55; NONEON-NOSVE-NEXT:    add sp, sp, #32
56; NONEON-NOSVE-NEXT:    ret
57  %res = srem <4 x i8> %op1, %op2
58  ret <4 x i8> %res
59}
60
61define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
62; CHECK-LABEL: srem_v8i8:
63; CHECK:       // %bb.0:
64; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
65; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
66; CHECK-NEXT:    sunpklo z2.h, z1.b
67; CHECK-NEXT:    sunpklo z3.h, z0.b
68; CHECK-NEXT:    ptrue p0.s, vl4
69; CHECK-NEXT:    sunpklo z4.s, z2.h
70; CHECK-NEXT:    sunpklo z5.s, z3.h
71; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
72; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
73; CHECK-NEXT:    sunpklo z2.s, z2.h
74; CHECK-NEXT:    sunpklo z3.s, z3.h
75; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
76; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
77; CHECK-NEXT:    ptrue p0.h, vl4
78; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
79; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
80; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
81; CHECK-NEXT:    ptrue p0.b, vl8
82; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
83; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
84; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
85; CHECK-NEXT:    ret
86;
87; NONEON-NOSVE-LABEL: srem_v8i8:
88; NONEON-NOSVE:       // %bb.0:
89; NONEON-NOSVE-NEXT:    sub sp, sp, #32
90; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
91; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
92; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #23]
93; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
94; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
95; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
96; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
97; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
98; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #22]
99; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
100; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
101; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
102; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
103; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #21]
104; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
105; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
106; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
107; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
108; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #20]
109; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
110; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
111; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
112; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
113; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #19]
114; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
115; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
116; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
117; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
118; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #18]
119; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
120; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
121; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
122; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
123; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #17]
124; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
125; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
126; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
127; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
128; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #16]
129; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
130; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
131; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
132; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
133; NONEON-NOSVE-NEXT:    add sp, sp, #32
134; NONEON-NOSVE-NEXT:    ret
135  %res = srem <8 x i8> %op1, %op2
136  ret <8 x i8> %res
137}
138
139define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
140; CHECK-LABEL: srem_v16i8:
141; CHECK:       // %bb.0:
142; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
143; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
144; CHECK-NEXT:    sunpklo z2.h, z1.b
145; CHECK-NEXT:    sunpklo z3.h, z0.b
146; CHECK-NEXT:    ptrue p0.s, vl4
147; CHECK-NEXT:    sunpklo z4.s, z2.h
148; CHECK-NEXT:    sunpklo z5.s, z3.h
149; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
150; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
151; CHECK-NEXT:    sunpklo z2.s, z2.h
152; CHECK-NEXT:    sunpklo z3.s, z3.h
153; CHECK-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
154; CHECK-NEXT:    mov z5.d, z0.d
155; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
156; CHECK-NEXT:    sunpklo z5.h, z5.b
157; CHECK-NEXT:    sunpklo z7.s, z5.h
158; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
159; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
160; CHECK-NEXT:    mov z3.d, z1.d
161; CHECK-NEXT:    sunpklo z5.s, z5.h
162; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
163; CHECK-NEXT:    sunpklo z3.h, z3.b
164; CHECK-NEXT:    sunpklo z6.s, z3.h
165; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
166; CHECK-NEXT:    sunpklo z3.s, z3.h
167; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
168; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
169; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
170; CHECK-NEXT:    ptrue p0.h, vl4
171; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
172; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
173; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
174; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
175; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
176; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
177; CHECK-NEXT:    ptrue p0.b, vl8
178; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
179; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
180; CHECK-NEXT:    ptrue p0.b, vl16
181; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
182; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
183; CHECK-NEXT:    ret
184;
185; NONEON-NOSVE-LABEL: srem_v16i8:
186; NONEON-NOSVE:       // %bb.0:
187; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
188; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
189; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #31]
190; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
191; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
192; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
193; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
194; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
195; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #30]
196; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
197; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
198; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
199; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
200; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #29]
201; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
202; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
203; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
204; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
205; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #28]
206; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
207; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
208; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
209; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
210; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #27]
211; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
212; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
213; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
214; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
215; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #26]
216; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
217; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
218; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
219; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
220; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #25]
221; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
222; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
223; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
224; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
225; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #24]
226; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
227; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
228; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #7]
229; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
230; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #23]
231; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
232; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
233; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #6]
234; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
235; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #22]
236; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
237; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
238; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #5]
239; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
240; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #21]
241; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
242; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
243; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #4]
244; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
245; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #20]
246; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
247; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
248; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #3]
249; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
250; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #19]
251; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
252; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
253; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #2]
254; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
255; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #18]
256; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
257; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
258; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #1]
259; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
260; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #17]
261; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
262; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
263; NONEON-NOSVE-NEXT:    ldrsb w9, [sp]
264; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
265; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #16]
266; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
267; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
268; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
269; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
270; NONEON-NOSVE-NEXT:    add sp, sp, #48
271; NONEON-NOSVE-NEXT:    ret
272  %res = srem <16 x i8> %op1, %op2
273  ret <16 x i8> %res
274}
275
276define void @srem_v32i8(ptr %a, ptr %b) {
277; CHECK-LABEL: srem_v32i8:
278; CHECK:       // %bb.0:
279; CHECK-NEXT:    ldr q0, [x0, #16]
280; CHECK-NEXT:    ldr q1, [x1, #16]
281; CHECK-NEXT:    ptrue p0.s, vl4
282; CHECK-NEXT:    sunpklo z3.h, z1.b
283; CHECK-NEXT:    sunpklo z4.h, z0.b
284; CHECK-NEXT:    sunpklo z2.s, z3.h
285; CHECK-NEXT:    sunpklo z5.s, z4.h
286; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
287; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
288; CHECK-NEXT:    sunpklo z3.s, z3.h
289; CHECK-NEXT:    sunpklo z4.s, z4.h
290; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z5.s
291; CHECK-NEXT:    movprfx z5, z4
292; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z3.s
293; CHECK-NEXT:    mov z3.d, z1.d
294; CHECK-NEXT:    mov z4.d, z0.d
295; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
296; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
297; CHECK-NEXT:    sunpklo z7.h, z3.b
298; CHECK-NEXT:    sunpklo z16.h, z4.b
299; CHECK-NEXT:    sunpklo z3.s, z7.h
300; CHECK-NEXT:    sunpklo z4.s, z16.h
301; CHECK-NEXT:    ext z7.b, z7.b, z7.b, #8
302; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
303; CHECK-NEXT:    sunpklo z7.s, z7.h
304; CHECK-NEXT:    movprfx z6, z4
305; CHECK-NEXT:    sdiv z6.s, p0/m, z6.s, z3.s
306; CHECK-NEXT:    ldr q3, [x0]
307; CHECK-NEXT:    ldr q4, [x1]
308; CHECK-NEXT:    sunpklo z16.s, z16.h
309; CHECK-NEXT:    sunpklo z17.h, z4.b
310; CHECK-NEXT:    sunpklo z18.h, z3.b
311; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
312; CHECK-NEXT:    sunpklo z19.s, z17.h
313; CHECK-NEXT:    sunpklo z20.s, z18.h
314; CHECK-NEXT:    ext z17.b, z17.b, z17.b, #8
315; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
316; CHECK-NEXT:    sunpklo z17.s, z17.h
317; CHECK-NEXT:    sunpklo z18.s, z18.h
318; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
319; CHECK-NEXT:    mov z20.d, z3.d
320; CHECK-NEXT:    ext z20.b, z20.b, z3.b, #8
321; CHECK-NEXT:    sunpklo z20.h, z20.b
322; CHECK-NEXT:    sunpklo z22.s, z20.h
323; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
324; CHECK-NEXT:    sdivr z17.s, p0/m, z17.s, z18.s
325; CHECK-NEXT:    mov z18.d, z4.d
326; CHECK-NEXT:    sunpklo z20.s, z20.h
327; CHECK-NEXT:    ext z18.b, z18.b, z4.b, #8
328; CHECK-NEXT:    sunpklo z18.h, z18.b
329; CHECK-NEXT:    sunpklo z21.s, z18.h
330; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
331; CHECK-NEXT:    sunpklo z18.s, z18.h
332; CHECK-NEXT:    sdivr z21.s, p0/m, z21.s, z22.s
333; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
334; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
335; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
336; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
337; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z20.s
338; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
339; CHECK-NEXT:    ptrue p0.h, vl4
340; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
341; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
342; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
343; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
344; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
345; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
346; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
347; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
348; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
349; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
350; CHECK-NEXT:    ptrue p0.b, vl8
351; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
352; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
353; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
354; CHECK-NEXT:    ptrue p0.b, vl16
355; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
356; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
357; CHECK-NEXT:    stp q2, q0, [x0]
358; CHECK-NEXT:    ret
359;
360; NONEON-NOSVE-LABEL: srem_v32i8:
361; NONEON-NOSVE:       // %bb.0:
362; NONEON-NOSVE-NEXT:    sub sp, sp, #96
363; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
364; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
365; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
366; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
367; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
368; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #63]
369; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #47]
370; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
371; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
372; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #46]
373; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
374; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #62]
375; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
376; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
377; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #45]
378; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
379; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #61]
380; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
381; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
382; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #44]
383; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
384; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #60]
385; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
386; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
387; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #43]
388; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
389; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #59]
390; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
391; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
392; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #42]
393; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
394; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #58]
395; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
396; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
397; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #41]
398; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
399; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #57]
400; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
401; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
402; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #40]
403; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
404; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #56]
405; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
406; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
407; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #39]
408; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
409; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #55]
410; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
411; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
412; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #38]
413; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
414; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #54]
415; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
416; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
417; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #37]
418; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
419; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #53]
420; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
421; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
422; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #36]
423; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
424; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #52]
425; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
426; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
427; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #35]
428; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
429; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #51]
430; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
431; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
432; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #34]
433; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
434; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #50]
435; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
436; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
437; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #33]
438; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
439; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #49]
440; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
441; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
442; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #32]
443; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
444; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #48]
445; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
446; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
447; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
448; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
449; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #31]
450; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
451; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
452; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
453; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
454; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #30]
455; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
456; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
457; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
458; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
459; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #29]
460; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
461; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
462; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
463; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
464; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #28]
465; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
466; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
467; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
468; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
469; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #27]
470; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
471; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
472; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
473; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
474; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #26]
475; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
476; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
477; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
478; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
479; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #25]
480; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
481; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
482; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
483; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
484; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #24]
485; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
486; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
487; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #7]
488; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
489; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #23]
490; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
491; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
492; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #6]
493; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
494; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #22]
495; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
496; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
497; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #5]
498; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
499; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #21]
500; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
501; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
502; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #4]
503; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
504; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #20]
505; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
506; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
507; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #3]
508; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
509; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #19]
510; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
511; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
512; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #2]
513; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
514; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #18]
515; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
516; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
517; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #1]
518; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
519; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #17]
520; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
521; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
522; NONEON-NOSVE-NEXT:    ldrsb w9, [sp]
523; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
524; NONEON-NOSVE-NEXT:    ldrsb w8, [sp, #16]
525; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
526; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
527; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
528; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
529; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
530; NONEON-NOSVE-NEXT:    add sp, sp, #96
531; NONEON-NOSVE-NEXT:    ret
532  %op1 = load <32 x i8>, ptr %a
533  %op2 = load <32 x i8>, ptr %b
534  %res = srem <32 x i8> %op1, %op2
535  store <32 x i8> %res, ptr %a
536  ret void
537}
538
539define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
540; CHECK-LABEL: srem_v4i16:
541; CHECK:       // %bb.0:
542; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
543; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
544; CHECK-NEXT:    sunpklo z2.s, z1.h
545; CHECK-NEXT:    sunpklo z3.s, z0.h
546; CHECK-NEXT:    ptrue p0.s, vl4
547; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
548; CHECK-NEXT:    ptrue p0.h, vl4
549; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
550; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
551; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
552; CHECK-NEXT:    ret
553;
554; NONEON-NOSVE-LABEL: srem_v4i16:
555; NONEON-NOSVE:       // %bb.0:
556; NONEON-NOSVE-NEXT:    sub sp, sp, #32
557; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
558; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
559; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #22]
560; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
561; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
562; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
563; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
564; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
565; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #20]
566; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
567; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
568; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
569; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
570; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #18]
571; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
572; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
573; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
574; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
575; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #16]
576; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
577; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
578; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
579; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
580; NONEON-NOSVE-NEXT:    add sp, sp, #32
581; NONEON-NOSVE-NEXT:    ret
582  %res = srem <4 x i16> %op1, %op2
583  ret <4 x i16> %res
584}
585
586define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
587; CHECK-LABEL: srem_v8i16:
588; CHECK:       // %bb.0:
589; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
590; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
591; CHECK-NEXT:    sunpklo z2.s, z1.h
592; CHECK-NEXT:    sunpklo z3.s, z0.h
593; CHECK-NEXT:    ptrue p0.s, vl4
594; CHECK-NEXT:    mov z4.d, z0.d
595; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
596; CHECK-NEXT:    mov z3.d, z1.d
597; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
598; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
599; CHECK-NEXT:    sunpklo z4.s, z4.h
600; CHECK-NEXT:    sunpklo z3.s, z3.h
601; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
602; CHECK-NEXT:    ptrue p0.h, vl4
603; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
604; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
605; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
606; CHECK-NEXT:    ptrue p0.h, vl8
607; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
608; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
609; CHECK-NEXT:    ret
610;
611; NONEON-NOSVE-LABEL: srem_v8i16:
612; NONEON-NOSVE:       // %bb.0:
613; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
614; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
615; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #30]
616; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
617; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
618; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
619; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
620; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
621; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #28]
622; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
623; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
624; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
625; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
626; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #26]
627; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
628; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
629; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
630; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
631; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #24]
632; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
633; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
634; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #6]
635; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
636; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #22]
637; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
638; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
639; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #4]
640; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
641; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #20]
642; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
643; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
644; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #2]
645; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
646; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #18]
647; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
648; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
649; NONEON-NOSVE-NEXT:    ldrsh w9, [sp]
650; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
651; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #16]
652; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
653; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
654; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
655; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
656; NONEON-NOSVE-NEXT:    add sp, sp, #48
657; NONEON-NOSVE-NEXT:    ret
658  %res = srem <8 x i16> %op1, %op2
659  ret <8 x i16> %res
660}
661
662define void @srem_v16i16(ptr %a, ptr %b) {
663; CHECK-LABEL: srem_v16i16:
664; CHECK:       // %bb.0:
665; CHECK-NEXT:    ldp q4, q1, [x1]
666; CHECK-NEXT:    ptrue p0.s, vl4
667; CHECK-NEXT:    ldr q0, [x0, #16]
668; CHECK-NEXT:    sunpklo z2.s, z1.h
669; CHECK-NEXT:    sunpklo z3.s, z0.h
670; CHECK-NEXT:    sunpklo z5.s, z4.h
671; CHECK-NEXT:    mov z16.d, z0.d
672; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
673; CHECK-NEXT:    ldr q3, [x0]
674; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
675; CHECK-NEXT:    sunpklo z6.s, z3.h
676; CHECK-NEXT:    mov z7.d, z3.d
677; CHECK-NEXT:    sunpklo z16.s, z16.h
678; CHECK-NEXT:    ext z7.b, z7.b, z3.b, #8
679; CHECK-NEXT:    sunpklo z7.s, z7.h
680; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
681; CHECK-NEXT:    mov z6.d, z4.d
682; CHECK-NEXT:    ext z6.b, z6.b, z4.b, #8
683; CHECK-NEXT:    sunpklo z6.s, z6.h
684; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
685; CHECK-NEXT:    mov z7.d, z1.d
686; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
687; CHECK-NEXT:    sunpklo z7.s, z7.h
688; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
689; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
690; CHECK-NEXT:    ptrue p0.h, vl4
691; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
692; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
693; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
694; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
695; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
696; CHECK-NEXT:    ptrue p0.h, vl8
697; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
698; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
699; CHECK-NEXT:    stp q2, q0, [x0]
700; CHECK-NEXT:    ret
701;
702; NONEON-NOSVE-LABEL: srem_v16i16:
703; NONEON-NOSVE:       // %bb.0:
704; NONEON-NOSVE-NEXT:    sub sp, sp, #96
705; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
706; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
707; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
708; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
709; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
710; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #62]
711; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #46]
712; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
713; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
714; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #44]
715; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
716; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #60]
717; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
718; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
719; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #42]
720; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
721; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #58]
722; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
723; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
724; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #40]
725; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
726; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #56]
727; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
728; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
729; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #38]
730; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
731; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #54]
732; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
733; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
734; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #36]
735; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
736; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #52]
737; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
738; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
739; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #34]
740; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
741; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #50]
742; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
743; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
744; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #32]
745; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
746; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #48]
747; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
748; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
749; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
750; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
751; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #30]
752; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
753; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
754; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
755; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
756; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #28]
757; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
758; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
759; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
760; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
761; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #26]
762; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
763; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
764; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
765; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
766; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #24]
767; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
768; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
769; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #6]
770; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
771; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #22]
772; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
773; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
774; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #4]
775; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
776; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #20]
777; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
778; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
779; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #2]
780; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
781; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #18]
782; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
783; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
784; NONEON-NOSVE-NEXT:    ldrsh w9, [sp]
785; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
786; NONEON-NOSVE-NEXT:    ldrsh w8, [sp, #16]
787; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
788; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
789; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
790; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
791; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
792; NONEON-NOSVE-NEXT:    add sp, sp, #96
793; NONEON-NOSVE-NEXT:    ret
794  %op1 = load <16 x i16>, ptr %a
795  %op2 = load <16 x i16>, ptr %b
796  %res = srem <16 x i16> %op1, %op2
797  store <16 x i16> %res, ptr %a
798  ret void
799}
800
801define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
802; CHECK-LABEL: srem_v2i32:
803; CHECK:       // %bb.0:
804; CHECK-NEXT:    ptrue p0.s, vl2
805; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
806; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
807; CHECK-NEXT:    movprfx z2, z0
808; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
809; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
810; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
811; CHECK-NEXT:    ret
812;
813; NONEON-NOSVE-LABEL: srem_v2i32:
814; NONEON-NOSVE:       // %bb.0:
815; NONEON-NOSVE-NEXT:    sub sp, sp, #32
816; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
817; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
818; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #8]
819; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
820; NONEON-NOSVE-NEXT:    sdiv w10, w11, w8
821; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
822; NONEON-NOSVE-NEXT:    str w8, [sp, #28]
823; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
824; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
825; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
826; NONEON-NOSVE-NEXT:    str w8, [sp, #24]
827; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
828; NONEON-NOSVE-NEXT:    add sp, sp, #32
829; NONEON-NOSVE-NEXT:    ret
830  %res = srem <2 x i32> %op1, %op2
831  ret <2 x i32> %res
832}
833
834define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
835; CHECK-LABEL: srem_v4i32:
836; CHECK:       // %bb.0:
837; CHECK-NEXT:    ptrue p0.s, vl4
838; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
839; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
840; CHECK-NEXT:    movprfx z2, z0
841; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
842; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
843; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
844; CHECK-NEXT:    ret
845;
846; NONEON-NOSVE-LABEL: srem_v4i32:
847; NONEON-NOSVE:       // %bb.0:
848; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
849; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
850; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #8]
851; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
852; NONEON-NOSVE-NEXT:    sdiv w10, w11, w8
853; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
854; NONEON-NOSVE-NEXT:    str w8, [sp, #44]
855; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
856; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
857; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
858; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
859; NONEON-NOSVE-NEXT:    ldr w9, [sp, #4]
860; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
861; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
862; NONEON-NOSVE-NEXT:    ldr w9, [sp]
863; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #36]
864; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
865; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
866; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
867; NONEON-NOSVE-NEXT:    str w8, [sp, #32]
868; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
869; NONEON-NOSVE-NEXT:    add sp, sp, #48
870; NONEON-NOSVE-NEXT:    ret
871  %res = srem <4 x i32> %op1, %op2
872  ret <4 x i32> %res
873}
874
875define void @srem_v8i32(ptr %a, ptr %b) {
876; CHECK-LABEL: srem_v8i32:
877; CHECK:       // %bb.0:
878; CHECK-NEXT:    ldp q0, q3, [x1]
879; CHECK-NEXT:    ptrue p0.s, vl4
880; CHECK-NEXT:    ldp q1, q2, [x0]
881; CHECK-NEXT:    movprfx z4, z1
882; CHECK-NEXT:    sdiv z4.s, p0/m, z4.s, z0.s
883; CHECK-NEXT:    movprfx z5, z2
884; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z3.s
885; CHECK-NEXT:    msb z0.s, p0/m, z4.s, z1.s
886; CHECK-NEXT:    movprfx z1, z2
887; CHECK-NEXT:    mls z1.s, p0/m, z5.s, z3.s
888; CHECK-NEXT:    stp q0, q1, [x0]
889; CHECK-NEXT:    ret
890;
891; NONEON-NOSVE-LABEL: srem_v8i32:
892; NONEON-NOSVE:       // %bb.0:
893; NONEON-NOSVE-NEXT:    sub sp, sp, #96
894; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
895; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
896; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
897; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
898; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
899; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #40]
900; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
901; NONEON-NOSVE-NEXT:    sdiv w10, w11, w8
902; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
903; NONEON-NOSVE-NEXT:    str w8, [sp, #92]
904; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
905; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
906; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
907; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
908; NONEON-NOSVE-NEXT:    ldr w9, [sp, #36]
909; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
910; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
911; NONEON-NOSVE-NEXT:    ldr w9, [sp, #32]
912; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #84]
913; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
914; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
915; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
916; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
917; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12]
918; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
919; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
920; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #76]
921; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
922; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #4]
923; NONEON-NOSVE-NEXT:    sdiv w10, w11, w8
924; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
925; NONEON-NOSVE-NEXT:    str w8, [sp, #72]
926; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
927; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
928; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
929; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
930; NONEON-NOSVE-NEXT:    ldr w9, [sp]
931; NONEON-NOSVE-NEXT:    sdiv w10, w9, w8
932; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
933; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
934; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
935; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
936; NONEON-NOSVE-NEXT:    add sp, sp, #96
937; NONEON-NOSVE-NEXT:    ret
938  %op1 = load <8 x i32>, ptr %a
939  %op2 = load <8 x i32>, ptr %b
940  %res = srem <8 x i32> %op1, %op2
941  store <8 x i32> %res, ptr %a
942  ret void
943}
944
945define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
946; CHECK-LABEL: srem_v1i64:
947; CHECK:       // %bb.0:
948; CHECK-NEXT:    ptrue p0.d, vl1
949; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
950; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
951; CHECK-NEXT:    movprfx z2, z0
952; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
953; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
954; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
955; CHECK-NEXT:    ret
956;
957; NONEON-NOSVE-LABEL: srem_v1i64:
958; NONEON-NOSVE:       // %bb.0:
959; NONEON-NOSVE-NEXT:    sub sp, sp, #16
960; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
961; NONEON-NOSVE-NEXT:    fmov x8, d1
962; NONEON-NOSVE-NEXT:    fmov x9, d0
963; NONEON-NOSVE-NEXT:    sdiv x10, x9, x8
964; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
965; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
966; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
967; NONEON-NOSVE-NEXT:    add sp, sp, #16
968; NONEON-NOSVE-NEXT:    ret
969  %res = srem <1 x i64> %op1, %op2
970  ret <1 x i64> %res
971}
972
973define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
974; CHECK-LABEL: srem_v2i64:
975; CHECK:       // %bb.0:
976; CHECK-NEXT:    ptrue p0.d, vl2
977; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
978; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
979; CHECK-NEXT:    movprfx z2, z0
980; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
981; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
982; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
983; CHECK-NEXT:    ret
984;
985; NONEON-NOSVE-LABEL: srem_v2i64:
986; NONEON-NOSVE:       // %bb.0:
987; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
988; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
989; NONEON-NOSVE-NEXT:    ldp x9, x11, [sp]
990; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
991; NONEON-NOSVE-NEXT:    sdiv x10, x11, x8
992; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x11
993; NONEON-NOSVE-NEXT:    str x8, [sp, #40]
994; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
995; NONEON-NOSVE-NEXT:    sdiv x10, x9, x8
996; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
997; NONEON-NOSVE-NEXT:    str x8, [sp, #32]
998; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
999; NONEON-NOSVE-NEXT:    add sp, sp, #48
1000; NONEON-NOSVE-NEXT:    ret
1001  %res = srem <2 x i64> %op1, %op2
1002  ret <2 x i64> %res
1003}
1004
1005define void @srem_v4i64(ptr %a, ptr %b) {
1006; CHECK-LABEL: srem_v4i64:
1007; CHECK:       // %bb.0:
1008; CHECK-NEXT:    ldp q0, q3, [x1]
1009; CHECK-NEXT:    ptrue p0.d, vl2
1010; CHECK-NEXT:    ldp q1, q2, [x0]
1011; CHECK-NEXT:    movprfx z4, z1
1012; CHECK-NEXT:    sdiv z4.d, p0/m, z4.d, z0.d
1013; CHECK-NEXT:    movprfx z5, z2
1014; CHECK-NEXT:    sdiv z5.d, p0/m, z5.d, z3.d
1015; CHECK-NEXT:    msb z0.d, p0/m, z4.d, z1.d
1016; CHECK-NEXT:    movprfx z1, z2
1017; CHECK-NEXT:    mls z1.d, p0/m, z5.d, z3.d
1018; CHECK-NEXT:    stp q0, q1, [x0]
1019; CHECK-NEXT:    ret
1020;
1021; NONEON-NOSVE-LABEL: srem_v4i64:
1022; NONEON-NOSVE:       // %bb.0:
1023; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1024; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1025; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1026; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1027; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1028; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1029; NONEON-NOSVE-NEXT:    ldp x9, x11, [sp, #32]
1030; NONEON-NOSVE-NEXT:    ldr x8, [sp, #56]
1031; NONEON-NOSVE-NEXT:    sdiv x10, x11, x8
1032; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x11
1033; NONEON-NOSVE-NEXT:    str x8, [sp, #88]
1034; NONEON-NOSVE-NEXT:    ldr x8, [sp, #48]
1035; NONEON-NOSVE-NEXT:    sdiv x10, x9, x8
1036; NONEON-NOSVE-NEXT:    msub x11, x10, x8, x9
1037; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
1038; NONEON-NOSVE-NEXT:    ldr x9, [sp, #8]
1039; NONEON-NOSVE-NEXT:    sdiv x10, x9, x8
1040; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
1041; NONEON-NOSVE-NEXT:    ldr x9, [sp]
1042; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #72]
1043; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
1044; NONEON-NOSVE-NEXT:    sdiv x10, x9, x8
1045; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
1046; NONEON-NOSVE-NEXT:    str x8, [sp, #64]
1047; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1048; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1049; NONEON-NOSVE-NEXT:    add sp, sp, #96
1050; NONEON-NOSVE-NEXT:    ret
1051  %op1 = load <4 x i64>, ptr %a
1052  %op2 = load <4 x i64>, ptr %b
1053  %res = srem <4 x i64> %op1, %op2
1054  store <4 x i64> %res, ptr %a
1055  ret void
1056}
1057
1058;
1059; UREM
1060;
1061
1062define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
1063; CHECK-LABEL: urem_v4i8:
1064; CHECK:       // %bb.0:
1065; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1066; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1067; CHECK-NEXT:    ptrue p0.s, vl4
1068; CHECK-NEXT:    and z0.h, z0.h, #0xff
1069; CHECK-NEXT:    and z1.h, z1.h, #0xff
1070; CHECK-NEXT:    uunpklo z2.s, z1.h
1071; CHECK-NEXT:    uunpklo z3.s, z0.h
1072; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
1073; CHECK-NEXT:    ptrue p0.h, vl4
1074; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
1075; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
1076; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1077; CHECK-NEXT:    ret
1078;
1079; NONEON-NOSVE-LABEL: urem_v4i8:
1080; NONEON-NOSVE:       // %bb.0:
1081; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1082; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1083; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1084; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
1085; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
1086; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #20]
1087; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #12]
1088; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #18]
1089; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #10]
1090; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1091; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #16]
1092; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #8]
1093; NONEON-NOSVE-NEXT:    udiv w13, w12, w11
1094; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1095; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
1096; NONEON-NOSVE-NEXT:    udiv w16, w15, w14
1097; NONEON-NOSVE-NEXT:    msub w9, w13, w11, w12
1098; NONEON-NOSVE-NEXT:    strh w9, [sp, #28]
1099; NONEON-NOSVE-NEXT:    udiv w0, w18, w17
1100; NONEON-NOSVE-NEXT:    msub w10, w16, w14, w15
1101; NONEON-NOSVE-NEXT:    strh w10, [sp, #26]
1102; NONEON-NOSVE-NEXT:    msub w8, w0, w17, w18
1103; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
1104; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1105; NONEON-NOSVE-NEXT:    add sp, sp, #32
1106; NONEON-NOSVE-NEXT:    ret
1107  %res = urem <4 x i8> %op1, %op2
1108  ret <4 x i8> %res
1109}
1110
1111define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
1112; CHECK-LABEL: urem_v8i8:
1113; CHECK:       // %bb.0:
1114; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1115; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1116; CHECK-NEXT:    uunpklo z2.h, z1.b
1117; CHECK-NEXT:    uunpklo z3.h, z0.b
1118; CHECK-NEXT:    ptrue p0.s, vl4
1119; CHECK-NEXT:    uunpklo z4.s, z2.h
1120; CHECK-NEXT:    uunpklo z5.s, z3.h
1121; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
1122; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
1123; CHECK-NEXT:    uunpklo z2.s, z2.h
1124; CHECK-NEXT:    uunpklo z3.s, z3.h
1125; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
1126; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
1127; CHECK-NEXT:    ptrue p0.h, vl4
1128; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
1129; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
1130; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
1131; CHECK-NEXT:    ptrue p0.b, vl8
1132; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
1133; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
1134; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1135; CHECK-NEXT:    ret
1136;
1137; NONEON-NOSVE-LABEL: urem_v8i8:
1138; NONEON-NOSVE:       // %bb.0:
1139; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1140; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1141; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1142; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
1143; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
1144; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1145; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1146; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
1147; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
1148; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
1149; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1150; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1151; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
1152; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
1153; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
1154; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1155; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1156; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
1157; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
1158; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
1159; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1160; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1161; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
1162; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
1163; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
1164; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1165; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1166; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
1167; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
1168; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
1169; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1170; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1171; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
1172; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
1173; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
1174; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1175; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1176; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
1177; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
1178; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
1179; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1180; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1181; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
1182; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1183; NONEON-NOSVE-NEXT:    add sp, sp, #32
1184; NONEON-NOSVE-NEXT:    ret
1185  %res = urem <8 x i8> %op1, %op2
1186  ret <8 x i8> %res
1187}
1188
1189define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
1190; CHECK-LABEL: urem_v16i8:
1191; CHECK:       // %bb.0:
1192; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1193; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1194; CHECK-NEXT:    uunpklo z2.h, z1.b
1195; CHECK-NEXT:    uunpklo z3.h, z0.b
1196; CHECK-NEXT:    ptrue p0.s, vl4
1197; CHECK-NEXT:    uunpklo z4.s, z2.h
1198; CHECK-NEXT:    uunpklo z5.s, z3.h
1199; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
1200; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
1201; CHECK-NEXT:    uunpklo z2.s, z2.h
1202; CHECK-NEXT:    uunpklo z3.s, z3.h
1203; CHECK-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
1204; CHECK-NEXT:    mov z5.d, z0.d
1205; CHECK-NEXT:    ext z5.b, z5.b, z0.b, #8
1206; CHECK-NEXT:    uunpklo z5.h, z5.b
1207; CHECK-NEXT:    uunpklo z7.s, z5.h
1208; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
1209; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
1210; CHECK-NEXT:    mov z3.d, z1.d
1211; CHECK-NEXT:    uunpklo z5.s, z5.h
1212; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
1213; CHECK-NEXT:    uunpklo z3.h, z3.b
1214; CHECK-NEXT:    uunpklo z6.s, z3.h
1215; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
1216; CHECK-NEXT:    uunpklo z3.s, z3.h
1217; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
1218; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
1219; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
1220; CHECK-NEXT:    ptrue p0.h, vl4
1221; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
1222; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
1223; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
1224; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
1225; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
1226; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
1227; CHECK-NEXT:    ptrue p0.b, vl8
1228; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
1229; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
1230; CHECK-NEXT:    ptrue p0.b, vl16
1231; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
1232; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1233; CHECK-NEXT:    ret
1234;
1235; NONEON-NOSVE-LABEL: urem_v16i8:
1236; NONEON-NOSVE:       // %bb.0:
1237; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
1238; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1239; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
1240; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
1241; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1242; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1243; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
1244; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
1245; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
1246; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1247; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1248; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
1249; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
1250; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
1251; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1252; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1253; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
1254; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
1255; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
1256; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1257; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1258; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
1259; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
1260; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
1261; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1262; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1263; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
1264; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
1265; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
1266; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1267; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1268; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
1269; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
1270; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
1271; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1272; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1273; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
1274; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
1275; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
1276; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1277; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1278; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
1279; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
1280; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
1281; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1282; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1283; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
1284; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
1285; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
1286; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1287; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1288; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
1289; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
1290; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
1291; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1292; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1293; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
1294; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
1295; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
1296; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1297; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1298; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
1299; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
1300; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
1301; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1302; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1303; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
1304; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
1305; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
1306; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1307; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1308; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
1309; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
1310; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
1311; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1312; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1313; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
1314; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
1315; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
1316; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1317; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1318; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
1319; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1320; NONEON-NOSVE-NEXT:    add sp, sp, #48
1321; NONEON-NOSVE-NEXT:    ret
1322  %res = urem <16 x i8> %op1, %op2
1323  ret <16 x i8> %res
1324}
1325
1326define void @urem_v32i8(ptr %a, ptr %b) {
1327; CHECK-LABEL: urem_v32i8:
1328; CHECK:       // %bb.0:
1329; CHECK-NEXT:    ldr q0, [x0, #16]
1330; CHECK-NEXT:    ldr q1, [x1, #16]
1331; CHECK-NEXT:    ptrue p0.s, vl4
1332; CHECK-NEXT:    uunpklo z3.h, z1.b
1333; CHECK-NEXT:    uunpklo z4.h, z0.b
1334; CHECK-NEXT:    uunpklo z2.s, z3.h
1335; CHECK-NEXT:    uunpklo z5.s, z4.h
1336; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
1337; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
1338; CHECK-NEXT:    uunpklo z3.s, z3.h
1339; CHECK-NEXT:    uunpklo z4.s, z4.h
1340; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z5.s
1341; CHECK-NEXT:    movprfx z5, z4
1342; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z3.s
1343; CHECK-NEXT:    mov z3.d, z1.d
1344; CHECK-NEXT:    mov z4.d, z0.d
1345; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
1346; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
1347; CHECK-NEXT:    uunpklo z7.h, z3.b
1348; CHECK-NEXT:    uunpklo z16.h, z4.b
1349; CHECK-NEXT:    uunpklo z3.s, z7.h
1350; CHECK-NEXT:    uunpklo z4.s, z16.h
1351; CHECK-NEXT:    ext z7.b, z7.b, z7.b, #8
1352; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
1353; CHECK-NEXT:    uunpklo z7.s, z7.h
1354; CHECK-NEXT:    movprfx z6, z4
1355; CHECK-NEXT:    udiv z6.s, p0/m, z6.s, z3.s
1356; CHECK-NEXT:    ldr q3, [x0]
1357; CHECK-NEXT:    ldr q4, [x1]
1358; CHECK-NEXT:    uunpklo z16.s, z16.h
1359; CHECK-NEXT:    uunpklo z17.h, z4.b
1360; CHECK-NEXT:    uunpklo z18.h, z3.b
1361; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
1362; CHECK-NEXT:    uunpklo z19.s, z17.h
1363; CHECK-NEXT:    uunpklo z20.s, z18.h
1364; CHECK-NEXT:    ext z17.b, z17.b, z17.b, #8
1365; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
1366; CHECK-NEXT:    uunpklo z17.s, z17.h
1367; CHECK-NEXT:    uunpklo z18.s, z18.h
1368; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
1369; CHECK-NEXT:    mov z20.d, z3.d
1370; CHECK-NEXT:    ext z20.b, z20.b, z3.b, #8
1371; CHECK-NEXT:    uunpklo z20.h, z20.b
1372; CHECK-NEXT:    uunpklo z22.s, z20.h
1373; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
1374; CHECK-NEXT:    udivr z17.s, p0/m, z17.s, z18.s
1375; CHECK-NEXT:    mov z18.d, z4.d
1376; CHECK-NEXT:    uunpklo z20.s, z20.h
1377; CHECK-NEXT:    ext z18.b, z18.b, z4.b, #8
1378; CHECK-NEXT:    uunpklo z18.h, z18.b
1379; CHECK-NEXT:    uunpklo z21.s, z18.h
1380; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
1381; CHECK-NEXT:    uunpklo z18.s, z18.h
1382; CHECK-NEXT:    udivr z21.s, p0/m, z21.s, z22.s
1383; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
1384; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
1385; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
1386; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
1387; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z20.s
1388; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
1389; CHECK-NEXT:    ptrue p0.h, vl4
1390; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
1391; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
1392; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
1393; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
1394; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
1395; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
1396; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
1397; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
1398; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
1399; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
1400; CHECK-NEXT:    ptrue p0.b, vl8
1401; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
1402; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
1403; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
1404; CHECK-NEXT:    ptrue p0.b, vl16
1405; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
1406; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
1407; CHECK-NEXT:    stp q2, q0, [x0]
1408; CHECK-NEXT:    ret
1409;
1410; NONEON-NOSVE-LABEL: urem_v32i8:
1411; NONEON-NOSVE:       // %bb.0:
1412; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1413; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1414; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1415; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1416; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1417; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1418; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
1419; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #47]
1420; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1421; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1422; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
1423; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
1424; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
1425; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1426; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1427; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #45]
1428; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
1429; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
1430; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1431; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1432; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
1433; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
1434; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
1435; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1436; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1437; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #43]
1438; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
1439; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
1440; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1441; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1442; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
1443; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
1444; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
1445; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1446; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1447; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
1448; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
1449; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
1450; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1451; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1452; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
1453; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
1454; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
1455; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1456; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1457; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #39]
1458; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
1459; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
1460; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1461; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1462; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
1463; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
1464; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
1465; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1466; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1467; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #37]
1468; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
1469; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
1470; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1471; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1472; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
1473; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
1474; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
1475; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1476; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1477; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #35]
1478; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
1479; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
1480; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1481; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1482; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
1483; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
1484; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
1485; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1486; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1487; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #33]
1488; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
1489; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
1490; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1491; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1492; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
1493; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
1494; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
1495; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1496; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1497; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
1498; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
1499; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
1500; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1501; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1502; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
1503; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
1504; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
1505; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1506; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1507; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
1508; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
1509; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
1510; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1511; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1512; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
1513; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
1514; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
1515; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1516; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1517; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
1518; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
1519; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
1520; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1521; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1522; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
1523; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
1524; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
1525; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1526; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1527; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
1528; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
1529; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
1530; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1531; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1532; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
1533; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
1534; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
1535; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1536; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1537; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
1538; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
1539; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
1540; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1541; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1542; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
1543; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
1544; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
1545; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1546; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1547; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
1548; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
1549; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
1550; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1551; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1552; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
1553; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
1554; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
1555; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1556; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1557; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
1558; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
1559; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
1560; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1561; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1562; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
1563; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
1564; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
1565; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1566; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1567; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
1568; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
1569; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
1570; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1571; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1572; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
1573; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
1574; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
1575; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1576; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1577; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
1578; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1579; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1580; NONEON-NOSVE-NEXT:    add sp, sp, #96
1581; NONEON-NOSVE-NEXT:    ret
1582  %op1 = load <32 x i8>, ptr %a
1583  %op2 = load <32 x i8>, ptr %b
1584  %res = urem <32 x i8> %op1, %op2
1585  store <32 x i8> %res, ptr %a
1586  ret void
1587}
1588
1589define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
1590; CHECK-LABEL: urem_v4i16:
1591; CHECK:       // %bb.0:
1592; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1593; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1594; CHECK-NEXT:    uunpklo z2.s, z1.h
1595; CHECK-NEXT:    uunpklo z3.s, z0.h
1596; CHECK-NEXT:    ptrue p0.s, vl4
1597; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
1598; CHECK-NEXT:    ptrue p0.h, vl4
1599; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
1600; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
1601; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1602; CHECK-NEXT:    ret
1603;
1604; NONEON-NOSVE-LABEL: urem_v4i16:
1605; NONEON-NOSVE:       // %bb.0:
1606; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1607; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1608; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1609; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
1610; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
1611; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1612; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1613; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
1614; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
1615; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
1616; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1617; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1618; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
1619; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
1620; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
1621; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1622; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1623; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
1624; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
1625; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
1626; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1627; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1628; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
1629; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1630; NONEON-NOSVE-NEXT:    add sp, sp, #32
1631; NONEON-NOSVE-NEXT:    ret
1632  %res = urem <4 x i16> %op1, %op2
1633  ret <4 x i16> %res
1634}
1635
1636define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
1637; CHECK-LABEL: urem_v8i16:
1638; CHECK:       // %bb.0:
1639; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1640; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1641; CHECK-NEXT:    uunpklo z2.s, z1.h
1642; CHECK-NEXT:    uunpklo z3.s, z0.h
1643; CHECK-NEXT:    ptrue p0.s, vl4
1644; CHECK-NEXT:    mov z4.d, z0.d
1645; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
1646; CHECK-NEXT:    mov z3.d, z1.d
1647; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
1648; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
1649; CHECK-NEXT:    uunpklo z4.s, z4.h
1650; CHECK-NEXT:    uunpklo z3.s, z3.h
1651; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
1652; CHECK-NEXT:    ptrue p0.h, vl4
1653; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
1654; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
1655; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
1656; CHECK-NEXT:    ptrue p0.h, vl8
1657; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
1658; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1659; CHECK-NEXT:    ret
1660;
1661; NONEON-NOSVE-LABEL: urem_v8i16:
1662; NONEON-NOSVE:       // %bb.0:
1663; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
1664; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1665; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
1666; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
1667; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1668; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1669; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
1670; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
1671; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
1672; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1673; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1674; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
1675; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
1676; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
1677; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1678; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1679; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
1680; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
1681; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
1682; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1683; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1684; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
1685; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
1686; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
1687; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1688; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1689; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
1690; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
1691; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
1692; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1693; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1694; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
1695; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
1696; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
1697; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1698; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1699; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
1700; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
1701; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
1702; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1703; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1704; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
1705; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1706; NONEON-NOSVE-NEXT:    add sp, sp, #48
1707; NONEON-NOSVE-NEXT:    ret
1708  %res = urem <8 x i16> %op1, %op2
1709  ret <8 x i16> %res
1710}
1711
1712define void @urem_v16i16(ptr %a, ptr %b) {
1713; CHECK-LABEL: urem_v16i16:
1714; CHECK:       // %bb.0:
1715; CHECK-NEXT:    ldp q4, q1, [x1]
1716; CHECK-NEXT:    ptrue p0.s, vl4
1717; CHECK-NEXT:    ldr q0, [x0, #16]
1718; CHECK-NEXT:    uunpklo z2.s, z1.h
1719; CHECK-NEXT:    uunpklo z3.s, z0.h
1720; CHECK-NEXT:    uunpklo z5.s, z4.h
1721; CHECK-NEXT:    mov z16.d, z0.d
1722; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
1723; CHECK-NEXT:    ldr q3, [x0]
1724; CHECK-NEXT:    ext z16.b, z16.b, z0.b, #8
1725; CHECK-NEXT:    uunpklo z6.s, z3.h
1726; CHECK-NEXT:    mov z7.d, z3.d
1727; CHECK-NEXT:    uunpklo z16.s, z16.h
1728; CHECK-NEXT:    ext z7.b, z7.b, z3.b, #8
1729; CHECK-NEXT:    uunpklo z7.s, z7.h
1730; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
1731; CHECK-NEXT:    mov z6.d, z4.d
1732; CHECK-NEXT:    ext z6.b, z6.b, z4.b, #8
1733; CHECK-NEXT:    uunpklo z6.s, z6.h
1734; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
1735; CHECK-NEXT:    mov z7.d, z1.d
1736; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
1737; CHECK-NEXT:    uunpklo z7.s, z7.h
1738; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
1739; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
1740; CHECK-NEXT:    ptrue p0.h, vl4
1741; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
1742; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
1743; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
1744; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
1745; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
1746; CHECK-NEXT:    ptrue p0.h, vl8
1747; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
1748; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
1749; CHECK-NEXT:    stp q2, q0, [x0]
1750; CHECK-NEXT:    ret
1751;
1752; NONEON-NOSVE-LABEL: urem_v16i16:
1753; NONEON-NOSVE:       // %bb.0:
1754; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1755; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1756; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1757; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1758; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1759; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1760; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
1761; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
1762; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1763; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1764; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #44]
1765; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
1766; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
1767; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1768; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1769; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
1770; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
1771; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
1772; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1773; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1774; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #40]
1775; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
1776; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
1777; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1778; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1779; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #38]
1780; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
1781; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
1782; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1783; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1784; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #36]
1785; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
1786; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
1787; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1788; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1789; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #34]
1790; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
1791; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
1792; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1793; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1794; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
1795; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
1796; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
1797; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1798; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1799; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
1800; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
1801; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
1802; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1803; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1804; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
1805; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
1806; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
1807; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1808; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1809; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
1810; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
1811; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
1812; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1813; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1814; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
1815; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
1816; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
1817; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1818; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1819; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
1820; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
1821; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
1822; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1823; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1824; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
1825; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
1826; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
1827; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1828; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1829; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
1830; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
1831; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
1832; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1833; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1834; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
1835; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
1836; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
1837; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1838; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1839; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
1840; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1841; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1842; NONEON-NOSVE-NEXT:    add sp, sp, #96
1843; NONEON-NOSVE-NEXT:    ret
1844  %op1 = load <16 x i16>, ptr %a
1845  %op2 = load <16 x i16>, ptr %b
1846  %res = urem <16 x i16> %op1, %op2
1847  store <16 x i16> %res, ptr %a
1848  ret void
1849}
1850
1851define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
1852; CHECK-LABEL: urem_v2i32:
1853; CHECK:       // %bb.0:
1854; CHECK-NEXT:    ptrue p0.s, vl2
1855; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1856; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1857; CHECK-NEXT:    movprfx z2, z0
1858; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
1859; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
1860; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1861; CHECK-NEXT:    ret
1862;
1863; NONEON-NOSVE-LABEL: urem_v2i32:
1864; NONEON-NOSVE:       // %bb.0:
1865; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1866; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1867; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1868; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #8]
1869; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
1870; NONEON-NOSVE-NEXT:    udiv w10, w11, w8
1871; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
1872; NONEON-NOSVE-NEXT:    str w8, [sp, #28]
1873; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
1874; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1875; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1876; NONEON-NOSVE-NEXT:    str w8, [sp, #24]
1877; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1878; NONEON-NOSVE-NEXT:    add sp, sp, #32
1879; NONEON-NOSVE-NEXT:    ret
1880  %res = urem <2 x i32> %op1, %op2
1881  ret <2 x i32> %res
1882}
1883
1884define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
1885; CHECK-LABEL: urem_v4i32:
1886; CHECK:       // %bb.0:
1887; CHECK-NEXT:    ptrue p0.s, vl4
1888; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1889; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1890; CHECK-NEXT:    movprfx z2, z0
1891; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
1892; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
1893; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1894; CHECK-NEXT:    ret
1895;
1896; NONEON-NOSVE-LABEL: urem_v4i32:
1897; NONEON-NOSVE:       // %bb.0:
1898; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
1899; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1900; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #8]
1901; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
1902; NONEON-NOSVE-NEXT:    udiv w10, w11, w8
1903; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
1904; NONEON-NOSVE-NEXT:    str w8, [sp, #44]
1905; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
1906; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1907; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
1908; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
1909; NONEON-NOSVE-NEXT:    ldr w9, [sp, #4]
1910; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1911; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1912; NONEON-NOSVE-NEXT:    ldr w9, [sp]
1913; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #36]
1914; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
1915; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1916; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1917; NONEON-NOSVE-NEXT:    str w8, [sp, #32]
1918; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1919; NONEON-NOSVE-NEXT:    add sp, sp, #48
1920; NONEON-NOSVE-NEXT:    ret
1921  %res = urem <4 x i32> %op1, %op2
1922  ret <4 x i32> %res
1923}
1924
1925define void @urem_v8i32(ptr %a, ptr %b) {
1926; CHECK-LABEL: urem_v8i32:
1927; CHECK:       // %bb.0:
1928; CHECK-NEXT:    ldp q0, q3, [x1]
1929; CHECK-NEXT:    ptrue p0.s, vl4
1930; CHECK-NEXT:    ldp q1, q2, [x0]
1931; CHECK-NEXT:    movprfx z4, z1
1932; CHECK-NEXT:    udiv z4.s, p0/m, z4.s, z0.s
1933; CHECK-NEXT:    movprfx z5, z2
1934; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z3.s
1935; CHECK-NEXT:    msb z0.s, p0/m, z4.s, z1.s
1936; CHECK-NEXT:    movprfx z1, z2
1937; CHECK-NEXT:    mls z1.s, p0/m, z5.s, z3.s
1938; CHECK-NEXT:    stp q0, q1, [x0]
1939; CHECK-NEXT:    ret
1940;
1941; NONEON-NOSVE-LABEL: urem_v8i32:
1942; NONEON-NOSVE:       // %bb.0:
1943; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1944; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1945; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1946; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1947; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1948; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1949; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #40]
1950; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
1951; NONEON-NOSVE-NEXT:    udiv w10, w11, w8
1952; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
1953; NONEON-NOSVE-NEXT:    str w8, [sp, #92]
1954; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
1955; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1956; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
1957; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
1958; NONEON-NOSVE-NEXT:    ldr w9, [sp, #36]
1959; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1960; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1961; NONEON-NOSVE-NEXT:    ldr w9, [sp, #32]
1962; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #84]
1963; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
1964; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1965; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
1966; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
1967; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12]
1968; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1969; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1970; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #76]
1971; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
1972; NONEON-NOSVE-NEXT:    ldp w9, w11, [sp, #4]
1973; NONEON-NOSVE-NEXT:    udiv w10, w11, w8
1974; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w11
1975; NONEON-NOSVE-NEXT:    str w8, [sp, #72]
1976; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
1977; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1978; NONEON-NOSVE-NEXT:    msub w11, w10, w8, w9
1979; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
1980; NONEON-NOSVE-NEXT:    ldr w9, [sp]
1981; NONEON-NOSVE-NEXT:    udiv w10, w9, w8
1982; NONEON-NOSVE-NEXT:    msub w8, w10, w8, w9
1983; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
1984; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1985; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1986; NONEON-NOSVE-NEXT:    add sp, sp, #96
1987; NONEON-NOSVE-NEXT:    ret
1988  %op1 = load <8 x i32>, ptr %a
1989  %op2 = load <8 x i32>, ptr %b
1990  %res = urem <8 x i32> %op1, %op2
1991  store <8 x i32> %res, ptr %a
1992  ret void
1993}
1994
1995define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
1996; CHECK-LABEL: urem_v1i64:
1997; CHECK:       // %bb.0:
1998; CHECK-NEXT:    ptrue p0.d, vl1
1999; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
2000; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2001; CHECK-NEXT:    movprfx z2, z0
2002; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
2003; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
2004; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2005; CHECK-NEXT:    ret
2006;
2007; NONEON-NOSVE-LABEL: urem_v1i64:
2008; NONEON-NOSVE:       // %bb.0:
2009; NONEON-NOSVE-NEXT:    sub sp, sp, #16
2010; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2011; NONEON-NOSVE-NEXT:    fmov x8, d1
2012; NONEON-NOSVE-NEXT:    fmov x9, d0
2013; NONEON-NOSVE-NEXT:    udiv x10, x9, x8
2014; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
2015; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
2016; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2017; NONEON-NOSVE-NEXT:    add sp, sp, #16
2018; NONEON-NOSVE-NEXT:    ret
2019  %res = urem <1 x i64> %op1, %op2
2020  ret <1 x i64> %res
2021}
2022
2023define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
2024; CHECK-LABEL: urem_v2i64:
2025; CHECK:       // %bb.0:
2026; CHECK-NEXT:    ptrue p0.d, vl2
2027; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
2028; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2029; CHECK-NEXT:    movprfx z2, z0
2030; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
2031; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
2032; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2033; CHECK-NEXT:    ret
2034;
2035; NONEON-NOSVE-LABEL: urem_v2i64:
2036; NONEON-NOSVE:       // %bb.0:
2037; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
2038; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
2039; NONEON-NOSVE-NEXT:    ldp x9, x11, [sp]
2040; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
2041; NONEON-NOSVE-NEXT:    udiv x10, x11, x8
2042; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x11
2043; NONEON-NOSVE-NEXT:    str x8, [sp, #40]
2044; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
2045; NONEON-NOSVE-NEXT:    udiv x10, x9, x8
2046; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
2047; NONEON-NOSVE-NEXT:    str x8, [sp, #32]
2048; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
2049; NONEON-NOSVE-NEXT:    add sp, sp, #48
2050; NONEON-NOSVE-NEXT:    ret
2051  %res = urem <2 x i64> %op1, %op2
2052  ret <2 x i64> %res
2053}
2054
2055define void @urem_v4i64(ptr %a, ptr %b) {
2056; CHECK-LABEL: urem_v4i64:
2057; CHECK:       // %bb.0:
2058; CHECK-NEXT:    ldp q0, q3, [x1]
2059; CHECK-NEXT:    ptrue p0.d, vl2
2060; CHECK-NEXT:    ldp q1, q2, [x0]
2061; CHECK-NEXT:    movprfx z4, z1
2062; CHECK-NEXT:    udiv z4.d, p0/m, z4.d, z0.d
2063; CHECK-NEXT:    movprfx z5, z2
2064; CHECK-NEXT:    udiv z5.d, p0/m, z5.d, z3.d
2065; CHECK-NEXT:    msb z0.d, p0/m, z4.d, z1.d
2066; CHECK-NEXT:    movprfx z1, z2
2067; CHECK-NEXT:    mls z1.d, p0/m, z5.d, z3.d
2068; CHECK-NEXT:    stp q0, q1, [x0]
2069; CHECK-NEXT:    ret
2070;
2071; NONEON-NOSVE-LABEL: urem_v4i64:
2072; NONEON-NOSVE:       // %bb.0:
2073; NONEON-NOSVE-NEXT:    sub sp, sp, #96
2074; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
2075; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
2076; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
2077; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
2078; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
2079; NONEON-NOSVE-NEXT:    ldp x9, x11, [sp, #32]
2080; NONEON-NOSVE-NEXT:    ldr x8, [sp, #56]
2081; NONEON-NOSVE-NEXT:    udiv x10, x11, x8
2082; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x11
2083; NONEON-NOSVE-NEXT:    str x8, [sp, #88]
2084; NONEON-NOSVE-NEXT:    ldr x8, [sp, #48]
2085; NONEON-NOSVE-NEXT:    udiv x10, x9, x8
2086; NONEON-NOSVE-NEXT:    msub x11, x10, x8, x9
2087; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
2088; NONEON-NOSVE-NEXT:    ldr x9, [sp, #8]
2089; NONEON-NOSVE-NEXT:    udiv x10, x9, x8
2090; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
2091; NONEON-NOSVE-NEXT:    ldr x9, [sp]
2092; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #72]
2093; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
2094; NONEON-NOSVE-NEXT:    udiv x10, x9, x8
2095; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
2096; NONEON-NOSVE-NEXT:    str x8, [sp, #64]
2097; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
2098; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2099; NONEON-NOSVE-NEXT:    add sp, sp, #96
2100; NONEON-NOSVE-NEXT:    ret
2101  %op1 = load <4 x i64>, ptr %a
2102  %op2 = load <4 x i64>, ptr %b
2103  %res = urem <4 x i64> %op1, %op2
2104  store <4 x i64> %res, ptr %a
2105  ret void
2106}
2107