xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll (revision a574ef61766d49db4350b6f06a108f36bccb25bb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
3; arm64 has its own copy of this because of the intrinsics
4
5define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) {
6; CHECK-LABEL: mul8xi8:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    mul v0.8b, v0.8b, v1.8b
9; CHECK-NEXT:    ret
10	%tmp3 = mul <8 x i8> %A, %B;
11	ret <8 x i8> %tmp3
12}
13
14define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) {
15; CHECK-LABEL: mul16xi8:
16; CHECK:       // %bb.0:
17; CHECK-NEXT:    mul v0.16b, v0.16b, v1.16b
18; CHECK-NEXT:    ret
19	%tmp3 = mul <16 x i8> %A, %B;
20	ret <16 x i8> %tmp3
21}
22
23define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) {
24; CHECK-LABEL: mul4xi16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    mul v0.4h, v0.4h, v1.4h
27; CHECK-NEXT:    ret
28	%tmp3 = mul <4 x i16> %A, %B;
29	ret <4 x i16> %tmp3
30}
31
32define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) {
33; CHECK-LABEL: mul8xi16:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
36; CHECK-NEXT:    ret
37	%tmp3 = mul <8 x i16> %A, %B;
38	ret <8 x i16> %tmp3
39}
40
41define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) {
42; CHECK-LABEL: mul2xi32:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
45; CHECK-NEXT:    ret
46	%tmp3 = mul <2 x i32> %A, %B;
47	ret <2 x i32> %tmp3
48}
49
50define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
51; CHECK-LABEL: mul4x32:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
54; CHECK-NEXT:    ret
55	%tmp3 = mul <4 x i32> %A, %B;
56	ret <4 x i32> %tmp3
57}
58
59define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
60; CHECK-LABEL: mul1xi64:
61; CHECK:       // %bb.0:
62; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
63; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
64; CHECK-NEXT:    fmov x8, d1
65; CHECK-NEXT:    fmov x9, d0
66; CHECK-NEXT:    mul x8, x9, x8
67; CHECK-NEXT:    fmov d0, x8
68; CHECK-NEXT:    ret
69  %tmp3 = mul <1 x i64> %A, %B;
70  ret <1 x i64> %tmp3
71}
72
73define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) {
74; CHECK-LABEL: mul2xi64:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    fmov x10, d1
77; CHECK-NEXT:    fmov x11, d0
78; CHECK-NEXT:    mov x8, v1.d[1]
79; CHECK-NEXT:    mov x9, v0.d[1]
80; CHECK-NEXT:    mul x10, x11, x10
81; CHECK-NEXT:    mul x8, x9, x8
82; CHECK-NEXT:    fmov d0, x10
83; CHECK-NEXT:    mov v0.d[1], x8
84; CHECK-NEXT:    ret
85  %tmp3 = mul <2 x i64> %A, %B;
86  ret <2 x i64> %tmp3
87}
88
89 define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) {
90; CHECK-LABEL: mul2xfloat:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.2s
93; CHECK-NEXT:    ret
94	%tmp3 = fmul <2 x float> %A, %B;
95	ret <2 x float> %tmp3
96}
97
98define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) {
99; CHECK-LABEL: mul4xfloat:
100; CHECK:       // %bb.0:
101; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
102; CHECK-NEXT:    ret
103	%tmp3 = fmul <4 x float> %A, %B;
104	ret <4 x float> %tmp3
105}
106define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) {
107; CHECK-LABEL: mul2xdouble:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
110; CHECK-NEXT:    ret
111	%tmp3 = fmul <2 x double> %A, %B;
112	ret <2 x double> %tmp3
113}
114
115
116 define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) {
117; CHECK-LABEL: div2xfloat:
118; CHECK:       // %bb.0:
119; CHECK-NEXT:    fdiv v0.2s, v0.2s, v1.2s
120; CHECK-NEXT:    ret
121	%tmp3 = fdiv <2 x float> %A, %B;
122	ret <2 x float> %tmp3
123}
124
125define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) {
126; CHECK-LABEL: div4xfloat:
127; CHECK:       // %bb.0:
128; CHECK-NEXT:    fdiv v0.4s, v0.4s, v1.4s
129; CHECK-NEXT:    ret
130	%tmp3 = fdiv <4 x float> %A, %B;
131	ret <4 x float> %tmp3
132}
133define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
134; CHECK-LABEL: div2xdouble:
135; CHECK:       // %bb.0:
136; CHECK-NEXT:    fdiv v0.2d, v0.2d, v1.2d
137; CHECK-NEXT:    ret
138	%tmp3 = fdiv <2 x double> %A, %B;
139	ret <2 x double> %tmp3
140}
141
142define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
143; CHECK-LABEL: sdiv1x8:
144; CHECK:       // %bb.0:
145; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
146; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
147; CHECK-NEXT:    smov w8, v1.b[0]
148; CHECK-NEXT:    smov w9, v0.b[0]
149; CHECK-NEXT:    sdiv w8, w9, w8
150; CHECK-NEXT:    fmov s0, w8
151; CHECK-NEXT:    ret
152	%tmp3 = sdiv <1 x i8> %A, %B;
153	ret <1 x i8> %tmp3
154}
155
156define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
157; CHECK-LABEL: sdiv8x8:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
160; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
161; CHECK-NEXT:    smov w8, v1.b[1]
162; CHECK-NEXT:    smov w9, v0.b[1]
163; CHECK-NEXT:    smov w10, v0.b[0]
164; CHECK-NEXT:    smov w11, v0.b[2]
165; CHECK-NEXT:    smov w12, v0.b[3]
166; CHECK-NEXT:    smov w13, v0.b[4]
167; CHECK-NEXT:    smov w14, v0.b[5]
168; CHECK-NEXT:    sdiv w8, w9, w8
169; CHECK-NEXT:    smov w9, v1.b[0]
170; CHECK-NEXT:    sdiv w9, w10, w9
171; CHECK-NEXT:    smov w10, v1.b[2]
172; CHECK-NEXT:    sdiv w10, w11, w10
173; CHECK-NEXT:    smov w11, v1.b[3]
174; CHECK-NEXT:    fmov s2, w9
175; CHECK-NEXT:    smov w9, v1.b[6]
176; CHECK-NEXT:    mov v2.b[1], w8
177; CHECK-NEXT:    sdiv w11, w12, w11
178; CHECK-NEXT:    smov w12, v1.b[4]
179; CHECK-NEXT:    mov v2.b[2], w10
180; CHECK-NEXT:    smov w10, v0.b[6]
181; CHECK-NEXT:    sdiv w12, w13, w12
182; CHECK-NEXT:    smov w13, v1.b[5]
183; CHECK-NEXT:    mov v2.b[3], w11
184; CHECK-NEXT:    smov w11, v0.b[7]
185; CHECK-NEXT:    sdiv w8, w14, w13
186; CHECK-NEXT:    mov v2.b[4], w12
187; CHECK-NEXT:    sdiv w9, w10, w9
188; CHECK-NEXT:    smov w10, v1.b[7]
189; CHECK-NEXT:    mov v2.b[5], w8
190; CHECK-NEXT:    sdiv w8, w11, w10
191; CHECK-NEXT:    mov v2.b[6], w9
192; CHECK-NEXT:    mov v2.b[7], w8
193; CHECK-NEXT:    fmov d0, d2
194; CHECK-NEXT:    ret
195	%tmp3 = sdiv <8 x i8> %A, %B;
196	ret <8 x i8> %tmp3
197}
198
199define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
200; CHECK-LABEL: sdiv16x8:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    smov w8, v1.b[1]
203; CHECK-NEXT:    smov w9, v0.b[1]
204; CHECK-NEXT:    smov w10, v0.b[0]
205; CHECK-NEXT:    smov w11, v0.b[2]
206; CHECK-NEXT:    smov w12, v0.b[3]
207; CHECK-NEXT:    smov w13, v0.b[4]
208; CHECK-NEXT:    smov w14, v0.b[5]
209; CHECK-NEXT:    smov w15, v0.b[6]
210; CHECK-NEXT:    smov w16, v0.b[7]
211; CHECK-NEXT:    smov w17, v0.b[8]
212; CHECK-NEXT:    smov w18, v0.b[9]
213; CHECK-NEXT:    sdiv w8, w9, w8
214; CHECK-NEXT:    smov w9, v1.b[0]
215; CHECK-NEXT:    sdiv w9, w10, w9
216; CHECK-NEXT:    smov w10, v1.b[2]
217; CHECK-NEXT:    sdiv w10, w11, w10
218; CHECK-NEXT:    smov w11, v1.b[3]
219; CHECK-NEXT:    fmov s2, w9
220; CHECK-NEXT:    smov w9, v1.b[10]
221; CHECK-NEXT:    mov v2.b[1], w8
222; CHECK-NEXT:    sdiv w11, w12, w11
223; CHECK-NEXT:    smov w12, v1.b[4]
224; CHECK-NEXT:    mov v2.b[2], w10
225; CHECK-NEXT:    smov w10, v0.b[10]
226; CHECK-NEXT:    sdiv w12, w13, w12
227; CHECK-NEXT:    smov w13, v1.b[5]
228; CHECK-NEXT:    mov v2.b[3], w11
229; CHECK-NEXT:    smov w11, v0.b[11]
230; CHECK-NEXT:    sdiv w13, w14, w13
231; CHECK-NEXT:    smov w14, v1.b[6]
232; CHECK-NEXT:    mov v2.b[4], w12
233; CHECK-NEXT:    smov w12, v0.b[12]
234; CHECK-NEXT:    sdiv w14, w15, w14
235; CHECK-NEXT:    smov w15, v1.b[7]
236; CHECK-NEXT:    mov v2.b[5], w13
237; CHECK-NEXT:    smov w13, v0.b[13]
238; CHECK-NEXT:    sdiv w15, w16, w15
239; CHECK-NEXT:    smov w16, v1.b[8]
240; CHECK-NEXT:    mov v2.b[6], w14
241; CHECK-NEXT:    sdiv w16, w17, w16
242; CHECK-NEXT:    smov w17, v1.b[9]
243; CHECK-NEXT:    mov v2.b[7], w15
244; CHECK-NEXT:    sdiv w8, w18, w17
245; CHECK-NEXT:    mov v2.b[8], w16
246; CHECK-NEXT:    sdiv w9, w10, w9
247; CHECK-NEXT:    smov w10, v1.b[11]
248; CHECK-NEXT:    mov v2.b[9], w8
249; CHECK-NEXT:    sdiv w10, w11, w10
250; CHECK-NEXT:    smov w11, v1.b[12]
251; CHECK-NEXT:    mov v2.b[10], w9
252; CHECK-NEXT:    smov w9, v1.b[14]
253; CHECK-NEXT:    sdiv w11, w12, w11
254; CHECK-NEXT:    smov w12, v1.b[13]
255; CHECK-NEXT:    mov v2.b[11], w10
256; CHECK-NEXT:    smov w10, v1.b[15]
257; CHECK-NEXT:    sdiv w8, w13, w12
258; CHECK-NEXT:    smov w12, v0.b[14]
259; CHECK-NEXT:    mov v2.b[12], w11
260; CHECK-NEXT:    smov w11, v0.b[15]
261; CHECK-NEXT:    sdiv w9, w12, w9
262; CHECK-NEXT:    mov v2.b[13], w8
263; CHECK-NEXT:    sdiv w8, w11, w10
264; CHECK-NEXT:    mov v2.b[14], w9
265; CHECK-NEXT:    mov v2.b[15], w8
266; CHECK-NEXT:    mov v0.16b, v2.16b
267; CHECK-NEXT:    ret
268	%tmp3 = sdiv <16 x i8> %A, %B;
269	ret <16 x i8> %tmp3
270}
271
272define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
273; CHECK-LABEL: sdiv1x16:
274; CHECK:       // %bb.0:
275; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
276; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
277; CHECK-NEXT:    smov w8, v1.h[0]
278; CHECK-NEXT:    smov w9, v0.h[0]
279; CHECK-NEXT:    sdiv w8, w9, w8
280; CHECK-NEXT:    fmov s0, w8
281; CHECK-NEXT:    ret
282	%tmp3 = sdiv <1 x i16> %A, %B;
283	ret <1 x i16> %tmp3
284}
285
286define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
287; CHECK-LABEL: sdiv4x16:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
290; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
291; CHECK-NEXT:    smov w8, v1.h[1]
292; CHECK-NEXT:    smov w9, v0.h[1]
293; CHECK-NEXT:    smov w10, v0.h[0]
294; CHECK-NEXT:    smov w11, v0.h[2]
295; CHECK-NEXT:    smov w12, v0.h[3]
296; CHECK-NEXT:    sdiv w8, w9, w8
297; CHECK-NEXT:    smov w9, v1.h[0]
298; CHECK-NEXT:    sdiv w9, w10, w9
299; CHECK-NEXT:    smov w10, v1.h[2]
300; CHECK-NEXT:    sdiv w10, w11, w10
301; CHECK-NEXT:    smov w11, v1.h[3]
302; CHECK-NEXT:    fmov s0, w9
303; CHECK-NEXT:    mov v0.h[1], w8
304; CHECK-NEXT:    sdiv w8, w12, w11
305; CHECK-NEXT:    mov v0.h[2], w10
306; CHECK-NEXT:    mov v0.h[3], w8
307; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
308; CHECK-NEXT:    ret
309	%tmp3 = sdiv <4 x i16> %A, %B;
310	ret <4 x i16> %tmp3
311}
312
313define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
314; CHECK-LABEL: sdiv8x16:
315; CHECK:       // %bb.0:
316; CHECK-NEXT:    smov w8, v1.h[1]
317; CHECK-NEXT:    smov w9, v0.h[1]
318; CHECK-NEXT:    smov w10, v0.h[0]
319; CHECK-NEXT:    smov w11, v0.h[2]
320; CHECK-NEXT:    smov w12, v0.h[3]
321; CHECK-NEXT:    smov w13, v0.h[4]
322; CHECK-NEXT:    smov w14, v0.h[5]
323; CHECK-NEXT:    sdiv w8, w9, w8
324; CHECK-NEXT:    smov w9, v1.h[0]
325; CHECK-NEXT:    sdiv w9, w10, w9
326; CHECK-NEXT:    smov w10, v1.h[2]
327; CHECK-NEXT:    sdiv w10, w11, w10
328; CHECK-NEXT:    smov w11, v1.h[3]
329; CHECK-NEXT:    fmov s2, w9
330; CHECK-NEXT:    smov w9, v1.h[6]
331; CHECK-NEXT:    mov v2.h[1], w8
332; CHECK-NEXT:    sdiv w11, w12, w11
333; CHECK-NEXT:    smov w12, v1.h[4]
334; CHECK-NEXT:    mov v2.h[2], w10
335; CHECK-NEXT:    smov w10, v0.h[6]
336; CHECK-NEXT:    sdiv w12, w13, w12
337; CHECK-NEXT:    smov w13, v1.h[5]
338; CHECK-NEXT:    mov v2.h[3], w11
339; CHECK-NEXT:    smov w11, v0.h[7]
340; CHECK-NEXT:    sdiv w8, w14, w13
341; CHECK-NEXT:    mov v2.h[4], w12
342; CHECK-NEXT:    sdiv w9, w10, w9
343; CHECK-NEXT:    smov w10, v1.h[7]
344; CHECK-NEXT:    mov v2.h[5], w8
345; CHECK-NEXT:    sdiv w8, w11, w10
346; CHECK-NEXT:    mov v2.h[6], w9
347; CHECK-NEXT:    mov v2.h[7], w8
348; CHECK-NEXT:    mov v0.16b, v2.16b
349; CHECK-NEXT:    ret
350	%tmp3 = sdiv <8 x i16> %A, %B;
351	ret <8 x i16> %tmp3
352}
353
354define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
355; CHECK-LABEL: sdiv1x32:
356; CHECK:       // %bb.0:
357; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
358; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
359; CHECK-NEXT:    fmov w8, s1
360; CHECK-NEXT:    fmov w9, s0
361; CHECK-NEXT:    sdiv w8, w9, w8
362; CHECK-NEXT:    fmov s0, w8
363; CHECK-NEXT:    ret
364	%tmp3 = sdiv <1 x i32> %A, %B;
365	ret <1 x i32> %tmp3
366}
367
368define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
369; CHECK-LABEL: sdiv2x32:
370; CHECK:       // %bb.0:
371; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
372; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
373; CHECK-NEXT:    fmov w8, s1
374; CHECK-NEXT:    fmov w9, s0
375; CHECK-NEXT:    mov w10, v0.s[1]
376; CHECK-NEXT:    sdiv w8, w9, w8
377; CHECK-NEXT:    mov w9, v1.s[1]
378; CHECK-NEXT:    sdiv w9, w10, w9
379; CHECK-NEXT:    fmov s0, w8
380; CHECK-NEXT:    mov v0.s[1], w9
381; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
382; CHECK-NEXT:    ret
383	%tmp3 = sdiv <2 x i32> %A, %B;
384	ret <2 x i32> %tmp3
385}
386
387define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
388; CHECK-LABEL: sdiv4x32:
389; CHECK:       // %bb.0:
390; CHECK-NEXT:    mov w8, v1.s[1]
391; CHECK-NEXT:    mov w9, v0.s[1]
392; CHECK-NEXT:    fmov w10, s0
393; CHECK-NEXT:    mov w11, v0.s[2]
394; CHECK-NEXT:    mov w12, v0.s[3]
395; CHECK-NEXT:    sdiv w8, w9, w8
396; CHECK-NEXT:    fmov w9, s1
397; CHECK-NEXT:    sdiv w9, w10, w9
398; CHECK-NEXT:    mov w10, v1.s[2]
399; CHECK-NEXT:    sdiv w10, w11, w10
400; CHECK-NEXT:    mov w11, v1.s[3]
401; CHECK-NEXT:    fmov s0, w9
402; CHECK-NEXT:    mov v0.s[1], w8
403; CHECK-NEXT:    sdiv w8, w12, w11
404; CHECK-NEXT:    mov v0.s[2], w10
405; CHECK-NEXT:    mov v0.s[3], w8
406; CHECK-NEXT:    ret
407	%tmp3 = sdiv <4 x i32> %A, %B;
408	ret <4 x i32> %tmp3
409}
410
411define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
412; CHECK-LABEL: sdiv1x64:
413; CHECK:       // %bb.0:
414; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
415; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
416; CHECK-NEXT:    fmov x8, d1
417; CHECK-NEXT:    fmov x9, d0
418; CHECK-NEXT:    sdiv x8, x9, x8
419; CHECK-NEXT:    fmov d0, x8
420; CHECK-NEXT:    ret
421	%tmp3 = sdiv <1 x i64> %A, %B;
422	ret <1 x i64> %tmp3
423}
424
425define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
426; CHECK-LABEL: sdiv2x64:
427; CHECK:       // %bb.0:
428; CHECK-NEXT:    fmov x8, d1
429; CHECK-NEXT:    fmov x9, d0
430; CHECK-NEXT:    mov x10, v0.d[1]
431; CHECK-NEXT:    sdiv x8, x9, x8
432; CHECK-NEXT:    mov x9, v1.d[1]
433; CHECK-NEXT:    sdiv x9, x10, x9
434; CHECK-NEXT:    fmov d0, x8
435; CHECK-NEXT:    mov v0.d[1], x9
436; CHECK-NEXT:    ret
437	%tmp3 = sdiv <2 x i64> %A, %B;
438	ret <2 x i64> %tmp3
439}
440
441define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
442; CHECK-LABEL: udiv1x8:
443; CHECK:       // %bb.0:
444; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
445; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
446; CHECK-NEXT:    umov w8, v1.b[0]
447; CHECK-NEXT:    umov w9, v0.b[0]
448; CHECK-NEXT:    udiv w8, w9, w8
449; CHECK-NEXT:    fmov s0, w8
450; CHECK-NEXT:    ret
451	%tmp3 = udiv <1 x i8> %A, %B;
452	ret <1 x i8> %tmp3
453}
454
455define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
456; CHECK-LABEL: udiv8x8:
457; CHECK:       // %bb.0:
458; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
459; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
460; CHECK-NEXT:    umov w8, v1.b[1]
461; CHECK-NEXT:    umov w9, v0.b[1]
462; CHECK-NEXT:    umov w10, v0.b[0]
463; CHECK-NEXT:    umov w11, v0.b[2]
464; CHECK-NEXT:    umov w12, v0.b[3]
465; CHECK-NEXT:    umov w13, v0.b[4]
466; CHECK-NEXT:    umov w14, v0.b[5]
467; CHECK-NEXT:    udiv w8, w9, w8
468; CHECK-NEXT:    umov w9, v1.b[0]
469; CHECK-NEXT:    udiv w9, w10, w9
470; CHECK-NEXT:    umov w10, v1.b[2]
471; CHECK-NEXT:    udiv w10, w11, w10
472; CHECK-NEXT:    umov w11, v1.b[3]
473; CHECK-NEXT:    fmov s2, w9
474; CHECK-NEXT:    umov w9, v1.b[6]
475; CHECK-NEXT:    mov v2.b[1], w8
476; CHECK-NEXT:    udiv w11, w12, w11
477; CHECK-NEXT:    umov w12, v1.b[4]
478; CHECK-NEXT:    mov v2.b[2], w10
479; CHECK-NEXT:    umov w10, v0.b[6]
480; CHECK-NEXT:    udiv w12, w13, w12
481; CHECK-NEXT:    umov w13, v1.b[5]
482; CHECK-NEXT:    mov v2.b[3], w11
483; CHECK-NEXT:    umov w11, v0.b[7]
484; CHECK-NEXT:    udiv w8, w14, w13
485; CHECK-NEXT:    mov v2.b[4], w12
486; CHECK-NEXT:    udiv w9, w10, w9
487; CHECK-NEXT:    umov w10, v1.b[7]
488; CHECK-NEXT:    mov v2.b[5], w8
489; CHECK-NEXT:    udiv w8, w11, w10
490; CHECK-NEXT:    mov v2.b[6], w9
491; CHECK-NEXT:    mov v2.b[7], w8
492; CHECK-NEXT:    fmov d0, d2
493; CHECK-NEXT:    ret
494	%tmp3 = udiv <8 x i8> %A, %B;
495	ret <8 x i8> %tmp3
496}
497
498define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
499; CHECK-LABEL: udiv16x8:
500; CHECK:       // %bb.0:
501; CHECK-NEXT:    umov w8, v1.b[1]
502; CHECK-NEXT:    umov w9, v0.b[1]
503; CHECK-NEXT:    umov w10, v0.b[0]
504; CHECK-NEXT:    umov w11, v0.b[2]
505; CHECK-NEXT:    umov w12, v0.b[3]
506; CHECK-NEXT:    umov w13, v0.b[4]
507; CHECK-NEXT:    umov w14, v0.b[5]
508; CHECK-NEXT:    umov w15, v0.b[6]
509; CHECK-NEXT:    umov w16, v0.b[7]
510; CHECK-NEXT:    umov w17, v0.b[8]
511; CHECK-NEXT:    umov w18, v0.b[9]
512; CHECK-NEXT:    udiv w8, w9, w8
513; CHECK-NEXT:    umov w9, v1.b[0]
514; CHECK-NEXT:    udiv w9, w10, w9
515; CHECK-NEXT:    umov w10, v1.b[2]
516; CHECK-NEXT:    udiv w10, w11, w10
517; CHECK-NEXT:    umov w11, v1.b[3]
518; CHECK-NEXT:    fmov s2, w9
519; CHECK-NEXT:    umov w9, v1.b[10]
520; CHECK-NEXT:    mov v2.b[1], w8
521; CHECK-NEXT:    udiv w11, w12, w11
522; CHECK-NEXT:    umov w12, v1.b[4]
523; CHECK-NEXT:    mov v2.b[2], w10
524; CHECK-NEXT:    umov w10, v0.b[10]
525; CHECK-NEXT:    udiv w12, w13, w12
526; CHECK-NEXT:    umov w13, v1.b[5]
527; CHECK-NEXT:    mov v2.b[3], w11
528; CHECK-NEXT:    umov w11, v0.b[11]
529; CHECK-NEXT:    udiv w13, w14, w13
530; CHECK-NEXT:    umov w14, v1.b[6]
531; CHECK-NEXT:    mov v2.b[4], w12
532; CHECK-NEXT:    umov w12, v0.b[12]
533; CHECK-NEXT:    udiv w14, w15, w14
534; CHECK-NEXT:    umov w15, v1.b[7]
535; CHECK-NEXT:    mov v2.b[5], w13
536; CHECK-NEXT:    umov w13, v0.b[13]
537; CHECK-NEXT:    udiv w15, w16, w15
538; CHECK-NEXT:    umov w16, v1.b[8]
539; CHECK-NEXT:    mov v2.b[6], w14
540; CHECK-NEXT:    udiv w16, w17, w16
541; CHECK-NEXT:    umov w17, v1.b[9]
542; CHECK-NEXT:    mov v2.b[7], w15
543; CHECK-NEXT:    udiv w8, w18, w17
544; CHECK-NEXT:    mov v2.b[8], w16
545; CHECK-NEXT:    udiv w9, w10, w9
546; CHECK-NEXT:    umov w10, v1.b[11]
547; CHECK-NEXT:    mov v2.b[9], w8
548; CHECK-NEXT:    udiv w10, w11, w10
549; CHECK-NEXT:    umov w11, v1.b[12]
550; CHECK-NEXT:    mov v2.b[10], w9
551; CHECK-NEXT:    umov w9, v1.b[14]
552; CHECK-NEXT:    udiv w11, w12, w11
553; CHECK-NEXT:    umov w12, v1.b[13]
554; CHECK-NEXT:    mov v2.b[11], w10
555; CHECK-NEXT:    umov w10, v1.b[15]
556; CHECK-NEXT:    udiv w8, w13, w12
557; CHECK-NEXT:    umov w12, v0.b[14]
558; CHECK-NEXT:    mov v2.b[12], w11
559; CHECK-NEXT:    umov w11, v0.b[15]
560; CHECK-NEXT:    udiv w9, w12, w9
561; CHECK-NEXT:    mov v2.b[13], w8
562; CHECK-NEXT:    udiv w8, w11, w10
563; CHECK-NEXT:    mov v2.b[14], w9
564; CHECK-NEXT:    mov v2.b[15], w8
565; CHECK-NEXT:    mov v0.16b, v2.16b
566; CHECK-NEXT:    ret
567	%tmp3 = udiv <16 x i8> %A, %B;
568	ret <16 x i8> %tmp3
569}
570
571define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
572; CHECK-LABEL: udiv1x16:
573; CHECK:       // %bb.0:
574; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
575; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
576; CHECK-NEXT:    umov w8, v1.h[0]
577; CHECK-NEXT:    umov w9, v0.h[0]
578; CHECK-NEXT:    udiv w8, w9, w8
579; CHECK-NEXT:    fmov s0, w8
580; CHECK-NEXT:    ret
581	%tmp3 = udiv <1 x i16> %A, %B;
582	ret <1 x i16> %tmp3
583}
584
585define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
586; CHECK-LABEL: udiv4x16:
587; CHECK:       // %bb.0:
588; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
589; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
590; CHECK-NEXT:    umov w8, v1.h[1]
591; CHECK-NEXT:    umov w9, v0.h[1]
592; CHECK-NEXT:    umov w10, v0.h[0]
593; CHECK-NEXT:    umov w11, v0.h[2]
594; CHECK-NEXT:    umov w12, v0.h[3]
595; CHECK-NEXT:    udiv w8, w9, w8
596; CHECK-NEXT:    umov w9, v1.h[0]
597; CHECK-NEXT:    udiv w9, w10, w9
598; CHECK-NEXT:    umov w10, v1.h[2]
599; CHECK-NEXT:    udiv w10, w11, w10
600; CHECK-NEXT:    umov w11, v1.h[3]
601; CHECK-NEXT:    fmov s0, w9
602; CHECK-NEXT:    mov v0.h[1], w8
603; CHECK-NEXT:    udiv w8, w12, w11
604; CHECK-NEXT:    mov v0.h[2], w10
605; CHECK-NEXT:    mov v0.h[3], w8
606; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
607; CHECK-NEXT:    ret
608	%tmp3 = udiv <4 x i16> %A, %B;
609	ret <4 x i16> %tmp3
610}
611
612define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
613; CHECK-LABEL: udiv8x16:
614; CHECK:       // %bb.0:
615; CHECK-NEXT:    umov w8, v1.h[1]
616; CHECK-NEXT:    umov w9, v0.h[1]
617; CHECK-NEXT:    umov w10, v0.h[0]
618; CHECK-NEXT:    umov w11, v0.h[2]
619; CHECK-NEXT:    umov w12, v0.h[3]
620; CHECK-NEXT:    umov w13, v0.h[4]
621; CHECK-NEXT:    umov w14, v0.h[5]
622; CHECK-NEXT:    udiv w8, w9, w8
623; CHECK-NEXT:    umov w9, v1.h[0]
624; CHECK-NEXT:    udiv w9, w10, w9
625; CHECK-NEXT:    umov w10, v1.h[2]
626; CHECK-NEXT:    udiv w10, w11, w10
627; CHECK-NEXT:    umov w11, v1.h[3]
628; CHECK-NEXT:    fmov s2, w9
629; CHECK-NEXT:    umov w9, v1.h[6]
630; CHECK-NEXT:    mov v2.h[1], w8
631; CHECK-NEXT:    udiv w11, w12, w11
632; CHECK-NEXT:    umov w12, v1.h[4]
633; CHECK-NEXT:    mov v2.h[2], w10
634; CHECK-NEXT:    umov w10, v0.h[6]
635; CHECK-NEXT:    udiv w12, w13, w12
636; CHECK-NEXT:    umov w13, v1.h[5]
637; CHECK-NEXT:    mov v2.h[3], w11
638; CHECK-NEXT:    umov w11, v0.h[7]
639; CHECK-NEXT:    udiv w8, w14, w13
640; CHECK-NEXT:    mov v2.h[4], w12
641; CHECK-NEXT:    udiv w9, w10, w9
642; CHECK-NEXT:    umov w10, v1.h[7]
643; CHECK-NEXT:    mov v2.h[5], w8
644; CHECK-NEXT:    udiv w8, w11, w10
645; CHECK-NEXT:    mov v2.h[6], w9
646; CHECK-NEXT:    mov v2.h[7], w8
647; CHECK-NEXT:    mov v0.16b, v2.16b
648; CHECK-NEXT:    ret
649	%tmp3 = udiv <8 x i16> %A, %B;
650	ret <8 x i16> %tmp3
651}
652
653define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
654; CHECK-LABEL: udiv1x32:
655; CHECK:       // %bb.0:
656; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
657; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
658; CHECK-NEXT:    fmov w8, s1
659; CHECK-NEXT:    fmov w9, s0
660; CHECK-NEXT:    udiv w8, w9, w8
661; CHECK-NEXT:    fmov s0, w8
662; CHECK-NEXT:    ret
663	%tmp3 = udiv <1 x i32> %A, %B;
664	ret <1 x i32> %tmp3
665}
666
667define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
668; CHECK-LABEL: udiv2x32:
669; CHECK:       // %bb.0:
670; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
671; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
672; CHECK-NEXT:    fmov w8, s1
673; CHECK-NEXT:    fmov w9, s0
674; CHECK-NEXT:    mov w10, v0.s[1]
675; CHECK-NEXT:    udiv w8, w9, w8
676; CHECK-NEXT:    mov w9, v1.s[1]
677; CHECK-NEXT:    udiv w9, w10, w9
678; CHECK-NEXT:    fmov s0, w8
679; CHECK-NEXT:    mov v0.s[1], w9
680; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
681; CHECK-NEXT:    ret
682	%tmp3 = udiv <2 x i32> %A, %B;
683	ret <2 x i32> %tmp3
684}
685
686define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
687; CHECK-LABEL: udiv4x32:
688; CHECK:       // %bb.0:
689; CHECK-NEXT:    mov w8, v1.s[1]
690; CHECK-NEXT:    mov w9, v0.s[1]
691; CHECK-NEXT:    fmov w10, s0
692; CHECK-NEXT:    mov w11, v0.s[2]
693; CHECK-NEXT:    mov w12, v0.s[3]
694; CHECK-NEXT:    udiv w8, w9, w8
695; CHECK-NEXT:    fmov w9, s1
696; CHECK-NEXT:    udiv w9, w10, w9
697; CHECK-NEXT:    mov w10, v1.s[2]
698; CHECK-NEXT:    udiv w10, w11, w10
699; CHECK-NEXT:    mov w11, v1.s[3]
700; CHECK-NEXT:    fmov s0, w9
701; CHECK-NEXT:    mov v0.s[1], w8
702; CHECK-NEXT:    udiv w8, w12, w11
703; CHECK-NEXT:    mov v0.s[2], w10
704; CHECK-NEXT:    mov v0.s[3], w8
705; CHECK-NEXT:    ret
706	%tmp3 = udiv <4 x i32> %A, %B;
707	ret <4 x i32> %tmp3
708}
709
710define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
711; CHECK-LABEL: udiv1x64:
712; CHECK:       // %bb.0:
713; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
714; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
715; CHECK-NEXT:    fmov x8, d1
716; CHECK-NEXT:    fmov x9, d0
717; CHECK-NEXT:    udiv x8, x9, x8
718; CHECK-NEXT:    fmov d0, x8
719; CHECK-NEXT:    ret
720	%tmp3 = udiv <1 x i64> %A, %B;
721	ret <1 x i64> %tmp3
722}
723
724define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
725; CHECK-LABEL: udiv2x64:
726; CHECK:       // %bb.0:
727; CHECK-NEXT:    fmov x8, d1
728; CHECK-NEXT:    fmov x9, d0
729; CHECK-NEXT:    mov x10, v0.d[1]
730; CHECK-NEXT:    udiv x8, x9, x8
731; CHECK-NEXT:    mov x9, v1.d[1]
732; CHECK-NEXT:    udiv x9, x10, x9
733; CHECK-NEXT:    fmov d0, x8
734; CHECK-NEXT:    mov v0.d[1], x9
735; CHECK-NEXT:    ret
736	%tmp3 = udiv <2 x i64> %A, %B;
737	ret <2 x i64> %tmp3
738}
739
740define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
741; CHECK-LABEL: srem1x8:
742; CHECK:       // %bb.0:
743; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
744; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
745; CHECK-NEXT:    smov w8, v1.b[0]
746; CHECK-NEXT:    smov w9, v0.b[0]
747; CHECK-NEXT:    sdiv w10, w9, w8
748; CHECK-NEXT:    msub w8, w10, w8, w9
749; CHECK-NEXT:    fmov s0, w8
750; CHECK-NEXT:    ret
751	%tmp3 = srem <1 x i8> %A, %B;
752	ret <1 x i8> %tmp3
753}
754
755define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
756; CHECK-LABEL: srem8x8:
757; CHECK:       // %bb.0:
758; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
759; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
760; CHECK-NEXT:    smov w11, v1.b[0]
761; CHECK-NEXT:    smov w12, v0.b[0]
762; CHECK-NEXT:    smov w8, v1.b[1]
763; CHECK-NEXT:    smov w9, v0.b[1]
764; CHECK-NEXT:    smov w14, v1.b[2]
765; CHECK-NEXT:    smov w15, v0.b[2]
766; CHECK-NEXT:    smov w17, v1.b[3]
767; CHECK-NEXT:    smov w18, v0.b[3]
768; CHECK-NEXT:    smov w1, v1.b[4]
769; CHECK-NEXT:    smov w2, v0.b[4]
770; CHECK-NEXT:    smov w4, v1.b[5]
771; CHECK-NEXT:    smov w5, v0.b[5]
772; CHECK-NEXT:    sdiv w13, w12, w11
773; CHECK-NEXT:    sdiv w10, w9, w8
774; CHECK-NEXT:    msub w11, w13, w11, w12
775; CHECK-NEXT:    smov w13, v1.b[7]
776; CHECK-NEXT:    fmov s2, w11
777; CHECK-NEXT:    smov w11, v0.b[6]
778; CHECK-NEXT:    sdiv w16, w15, w14
779; CHECK-NEXT:    msub w8, w10, w8, w9
780; CHECK-NEXT:    smov w10, v1.b[6]
781; CHECK-NEXT:    mov v2.b[1], w8
782; CHECK-NEXT:    sdiv w0, w18, w17
783; CHECK-NEXT:    msub w8, w16, w14, w15
784; CHECK-NEXT:    smov w14, v0.b[7]
785; CHECK-NEXT:    mov v2.b[2], w8
786; CHECK-NEXT:    sdiv w3, w2, w1
787; CHECK-NEXT:    msub w8, w0, w17, w18
788; CHECK-NEXT:    mov v2.b[3], w8
789; CHECK-NEXT:    sdiv w9, w5, w4
790; CHECK-NEXT:    msub w8, w3, w1, w2
791; CHECK-NEXT:    mov v2.b[4], w8
792; CHECK-NEXT:    sdiv w12, w11, w10
793; CHECK-NEXT:    msub w8, w9, w4, w5
794; CHECK-NEXT:    mov v2.b[5], w8
795; CHECK-NEXT:    sdiv w9, w14, w13
796; CHECK-NEXT:    msub w8, w12, w10, w11
797; CHECK-NEXT:    mov v2.b[6], w8
798; CHECK-NEXT:    msub w8, w9, w13, w14
799; CHECK-NEXT:    mov v2.b[7], w8
800; CHECK-NEXT:    fmov d0, d2
801; CHECK-NEXT:    ret
802	%tmp3 = srem <8 x i8> %A, %B;
803	ret <8 x i8> %tmp3
804}
805
806define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
807; CHECK-LABEL: srem16x8:
808; CHECK:       // %bb.0:
809; CHECK-NEXT:    stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
810; CHECK-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
811; CHECK-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
812; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
813; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
814; CHECK-NEXT:    .cfi_def_cfa_offset 80
815; CHECK-NEXT:    .cfi_offset w19, -8
816; CHECK-NEXT:    .cfi_offset w20, -16
817; CHECK-NEXT:    .cfi_offset w21, -24
818; CHECK-NEXT:    .cfi_offset w22, -32
819; CHECK-NEXT:    .cfi_offset w23, -40
820; CHECK-NEXT:    .cfi_offset w24, -48
821; CHECK-NEXT:    .cfi_offset w25, -56
822; CHECK-NEXT:    .cfi_offset w26, -64
823; CHECK-NEXT:    .cfi_offset w27, -72
824; CHECK-NEXT:    .cfi_offset w28, -80
825; CHECK-NEXT:    smov w11, v1.b[0]
826; CHECK-NEXT:    smov w12, v0.b[0]
827; CHECK-NEXT:    smov w8, v1.b[1]
828; CHECK-NEXT:    smov w9, v0.b[1]
829; CHECK-NEXT:    smov w14, v1.b[2]
830; CHECK-NEXT:    smov w15, v0.b[2]
831; CHECK-NEXT:    smov w17, v1.b[3]
832; CHECK-NEXT:    smov w18, v0.b[3]
833; CHECK-NEXT:    smov w1, v1.b[4]
834; CHECK-NEXT:    smov w2, v0.b[4]
835; CHECK-NEXT:    smov w4, v1.b[5]
836; CHECK-NEXT:    smov w5, v0.b[5]
837; CHECK-NEXT:    sdiv w13, w12, w11
838; CHECK-NEXT:    smov w7, v1.b[6]
839; CHECK-NEXT:    smov w19, v0.b[6]
840; CHECK-NEXT:    smov w21, v1.b[7]
841; CHECK-NEXT:    smov w22, v0.b[7]
842; CHECK-NEXT:    smov w24, v1.b[8]
843; CHECK-NEXT:    smov w25, v0.b[8]
844; CHECK-NEXT:    smov w27, v1.b[9]
845; CHECK-NEXT:    smov w28, v0.b[9]
846; CHECK-NEXT:    sdiv w10, w9, w8
847; CHECK-NEXT:    msub w11, w13, w11, w12
848; CHECK-NEXT:    smov w13, v1.b[11]
849; CHECK-NEXT:    fmov s2, w11
850; CHECK-NEXT:    smov w11, v0.b[10]
851; CHECK-NEXT:    sdiv w16, w15, w14
852; CHECK-NEXT:    msub w8, w10, w8, w9
853; CHECK-NEXT:    smov w10, v1.b[10]
854; CHECK-NEXT:    mov v2.b[1], w8
855; CHECK-NEXT:    sdiv w0, w18, w17
856; CHECK-NEXT:    msub w8, w16, w14, w15
857; CHECK-NEXT:    smov w14, v0.b[11]
858; CHECK-NEXT:    smov w16, v1.b[12]
859; CHECK-NEXT:    mov v2.b[2], w8
860; CHECK-NEXT:    sdiv w3, w2, w1
861; CHECK-NEXT:    msub w8, w0, w17, w18
862; CHECK-NEXT:    smov w17, v0.b[12]
863; CHECK-NEXT:    smov w0, v1.b[13]
864; CHECK-NEXT:    mov v2.b[3], w8
865; CHECK-NEXT:    sdiv w6, w5, w4
866; CHECK-NEXT:    msub w8, w3, w1, w2
867; CHECK-NEXT:    smov w1, v0.b[13]
868; CHECK-NEXT:    mov v2.b[4], w8
869; CHECK-NEXT:    sdiv w20, w19, w7
870; CHECK-NEXT:    msub w8, w6, w4, w5
871; CHECK-NEXT:    mov v2.b[5], w8
872; CHECK-NEXT:    sdiv w23, w22, w21
873; CHECK-NEXT:    msub w8, w20, w7, w19
874; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
875; CHECK-NEXT:    mov v2.b[6], w8
876; CHECK-NEXT:    sdiv w26, w25, w24
877; CHECK-NEXT:    msub w8, w23, w21, w22
878; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
879; CHECK-NEXT:    mov v2.b[7], w8
880; CHECK-NEXT:    sdiv w9, w28, w27
881; CHECK-NEXT:    msub w8, w26, w24, w25
882; CHECK-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload
883; CHECK-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload
884; CHECK-NEXT:    mov v2.b[8], w8
885; CHECK-NEXT:    sdiv w12, w11, w10
886; CHECK-NEXT:    msub w8, w9, w27, w28
887; CHECK-NEXT:    mov v2.b[9], w8
888; CHECK-NEXT:    sdiv w15, w14, w13
889; CHECK-NEXT:    msub w8, w12, w10, w11
890; CHECK-NEXT:    smov w10, v1.b[14]
891; CHECK-NEXT:    smov w11, v0.b[14]
892; CHECK-NEXT:    mov v2.b[10], w8
893; CHECK-NEXT:    sdiv w18, w17, w16
894; CHECK-NEXT:    msub w8, w15, w13, w14
895; CHECK-NEXT:    smov w13, v1.b[15]
896; CHECK-NEXT:    smov w14, v0.b[15]
897; CHECK-NEXT:    mov v2.b[11], w8
898; CHECK-NEXT:    sdiv w9, w1, w0
899; CHECK-NEXT:    msub w8, w18, w16, w17
900; CHECK-NEXT:    mov v2.b[12], w8
901; CHECK-NEXT:    sdiv w12, w11, w10
902; CHECK-NEXT:    msub w8, w9, w0, w1
903; CHECK-NEXT:    mov v2.b[13], w8
904; CHECK-NEXT:    sdiv w9, w14, w13
905; CHECK-NEXT:    msub w8, w12, w10, w11
906; CHECK-NEXT:    mov v2.b[14], w8
907; CHECK-NEXT:    msub w8, w9, w13, w14
908; CHECK-NEXT:    mov v2.b[15], w8
909; CHECK-NEXT:    mov v0.16b, v2.16b
910; CHECK-NEXT:    ldp x28, x27, [sp], #80 // 16-byte Folded Reload
911; CHECK-NEXT:    ret
912	%tmp3 = srem <16 x i8> %A, %B;
913	ret <16 x i8> %tmp3
914}
915
916define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
917; CHECK-LABEL: srem1x16:
918; CHECK:       // %bb.0:
919; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
920; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
921; CHECK-NEXT:    smov w8, v1.h[0]
922; CHECK-NEXT:    smov w9, v0.h[0]
923; CHECK-NEXT:    sdiv w10, w9, w8
924; CHECK-NEXT:    msub w8, w10, w8, w9
925; CHECK-NEXT:    fmov s0, w8
926; CHECK-NEXT:    ret
927	%tmp3 = srem <1 x i16> %A, %B;
928	ret <1 x i16> %tmp3
929}
930
931define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
932; CHECK-LABEL: srem4x16:
933; CHECK:       // %bb.0:
934; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
935; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
936; CHECK-NEXT:    smov w11, v1.h[0]
937; CHECK-NEXT:    smov w12, v0.h[0]
938; CHECK-NEXT:    smov w8, v1.h[1]
939; CHECK-NEXT:    smov w9, v0.h[1]
940; CHECK-NEXT:    smov w14, v1.h[2]
941; CHECK-NEXT:    smov w15, v0.h[2]
942; CHECK-NEXT:    smov w17, v1.h[3]
943; CHECK-NEXT:    smov w18, v0.h[3]
944; CHECK-NEXT:    sdiv w13, w12, w11
945; CHECK-NEXT:    sdiv w10, w9, w8
946; CHECK-NEXT:    msub w11, w13, w11, w12
947; CHECK-NEXT:    fmov s0, w11
948; CHECK-NEXT:    sdiv w16, w15, w14
949; CHECK-NEXT:    msub w8, w10, w8, w9
950; CHECK-NEXT:    mov v0.h[1], w8
951; CHECK-NEXT:    sdiv w9, w18, w17
952; CHECK-NEXT:    msub w8, w16, w14, w15
953; CHECK-NEXT:    mov v0.h[2], w8
954; CHECK-NEXT:    msub w8, w9, w17, w18
955; CHECK-NEXT:    mov v0.h[3], w8
956; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
957; CHECK-NEXT:    ret
958	%tmp3 = srem <4 x i16> %A, %B;
959	ret <4 x i16> %tmp3
960}
961
962define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
963; CHECK-LABEL: srem8x16:
964; CHECK:       // %bb.0:
965; CHECK-NEXT:    smov w11, v1.h[0]
966; CHECK-NEXT:    smov w12, v0.h[0]
967; CHECK-NEXT:    smov w8, v1.h[1]
968; CHECK-NEXT:    smov w9, v0.h[1]
969; CHECK-NEXT:    smov w14, v1.h[2]
970; CHECK-NEXT:    smov w15, v0.h[2]
971; CHECK-NEXT:    smov w17, v1.h[3]
972; CHECK-NEXT:    smov w18, v0.h[3]
973; CHECK-NEXT:    smov w1, v1.h[4]
974; CHECK-NEXT:    smov w2, v0.h[4]
975; CHECK-NEXT:    smov w4, v1.h[5]
976; CHECK-NEXT:    smov w5, v0.h[5]
977; CHECK-NEXT:    sdiv w13, w12, w11
978; CHECK-NEXT:    sdiv w10, w9, w8
979; CHECK-NEXT:    msub w11, w13, w11, w12
980; CHECK-NEXT:    smov w13, v1.h[7]
981; CHECK-NEXT:    fmov s2, w11
982; CHECK-NEXT:    smov w11, v0.h[6]
983; CHECK-NEXT:    sdiv w16, w15, w14
984; CHECK-NEXT:    msub w8, w10, w8, w9
985; CHECK-NEXT:    smov w10, v1.h[6]
986; CHECK-NEXT:    mov v2.h[1], w8
987; CHECK-NEXT:    sdiv w0, w18, w17
988; CHECK-NEXT:    msub w8, w16, w14, w15
989; CHECK-NEXT:    smov w14, v0.h[7]
990; CHECK-NEXT:    mov v2.h[2], w8
991; CHECK-NEXT:    sdiv w3, w2, w1
992; CHECK-NEXT:    msub w8, w0, w17, w18
993; CHECK-NEXT:    mov v2.h[3], w8
994; CHECK-NEXT:    sdiv w9, w5, w4
995; CHECK-NEXT:    msub w8, w3, w1, w2
996; CHECK-NEXT:    mov v2.h[4], w8
997; CHECK-NEXT:    sdiv w12, w11, w10
998; CHECK-NEXT:    msub w8, w9, w4, w5
999; CHECK-NEXT:    mov v2.h[5], w8
1000; CHECK-NEXT:    sdiv w9, w14, w13
1001; CHECK-NEXT:    msub w8, w12, w10, w11
1002; CHECK-NEXT:    mov v2.h[6], w8
1003; CHECK-NEXT:    msub w8, w9, w13, w14
1004; CHECK-NEXT:    mov v2.h[7], w8
1005; CHECK-NEXT:    mov v0.16b, v2.16b
1006; CHECK-NEXT:    ret
1007	%tmp3 = srem <8 x i16> %A, %B;
1008	ret <8 x i16> %tmp3
1009}
1010
1011define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
1012; CHECK-LABEL: srem1x32:
1013; CHECK:       // %bb.0:
1014; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1015; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1016; CHECK-NEXT:    fmov w8, s1
1017; CHECK-NEXT:    fmov w9, s0
1018; CHECK-NEXT:    sdiv w10, w9, w8
1019; CHECK-NEXT:    msub w8, w10, w8, w9
1020; CHECK-NEXT:    fmov s0, w8
1021; CHECK-NEXT:    ret
1022	%tmp3 = srem <1 x i32> %A, %B;
1023	ret <1 x i32> %tmp3
1024}
1025
1026define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
1027; CHECK-LABEL: srem2x32:
1028; CHECK:       // %bb.0:
1029; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1030; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1031; CHECK-NEXT:    fmov w8, s1
1032; CHECK-NEXT:    fmov w9, s0
1033; CHECK-NEXT:    mov w11, v1.s[1]
1034; CHECK-NEXT:    mov w12, v0.s[1]
1035; CHECK-NEXT:    sdiv w10, w9, w8
1036; CHECK-NEXT:    sdiv w13, w12, w11
1037; CHECK-NEXT:    msub w8, w10, w8, w9
1038; CHECK-NEXT:    fmov s0, w8
1039; CHECK-NEXT:    msub w9, w13, w11, w12
1040; CHECK-NEXT:    mov v0.s[1], w9
1041; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1042; CHECK-NEXT:    ret
1043	%tmp3 = srem <2 x i32> %A, %B;
1044	ret <2 x i32> %tmp3
1045}
1046
1047define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
1048; CHECK-LABEL: srem4x32:
1049; CHECK:       // %bb.0:
1050; CHECK-NEXT:    fmov w11, s1
1051; CHECK-NEXT:    fmov w12, s0
1052; CHECK-NEXT:    mov w8, v1.s[1]
1053; CHECK-NEXT:    mov w9, v0.s[1]
1054; CHECK-NEXT:    mov w14, v1.s[2]
1055; CHECK-NEXT:    mov w15, v0.s[2]
1056; CHECK-NEXT:    mov w17, v1.s[3]
1057; CHECK-NEXT:    mov w18, v0.s[3]
1058; CHECK-NEXT:    sdiv w13, w12, w11
1059; CHECK-NEXT:    sdiv w10, w9, w8
1060; CHECK-NEXT:    msub w11, w13, w11, w12
1061; CHECK-NEXT:    fmov s0, w11
1062; CHECK-NEXT:    sdiv w16, w15, w14
1063; CHECK-NEXT:    msub w8, w10, w8, w9
1064; CHECK-NEXT:    mov v0.s[1], w8
1065; CHECK-NEXT:    sdiv w9, w18, w17
1066; CHECK-NEXT:    msub w8, w16, w14, w15
1067; CHECK-NEXT:    mov v0.s[2], w8
1068; CHECK-NEXT:    msub w8, w9, w17, w18
1069; CHECK-NEXT:    mov v0.s[3], w8
1070; CHECK-NEXT:    ret
1071	%tmp3 = srem <4 x i32> %A, %B;
1072	ret <4 x i32> %tmp3
1073}
1074
1075define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
1076; CHECK-LABEL: srem1x64:
1077; CHECK:       // %bb.0:
1078; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1079; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1080; CHECK-NEXT:    fmov x8, d1
1081; CHECK-NEXT:    fmov x9, d0
1082; CHECK-NEXT:    sdiv x10, x9, x8
1083; CHECK-NEXT:    msub x8, x10, x8, x9
1084; CHECK-NEXT:    fmov d0, x8
1085; CHECK-NEXT:    ret
1086	%tmp3 = srem <1 x i64> %A, %B;
1087	ret <1 x i64> %tmp3
1088}
1089
1090define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
1091; CHECK-LABEL: srem2x64:
1092; CHECK:       // %bb.0:
1093; CHECK-NEXT:    fmov x8, d1
1094; CHECK-NEXT:    fmov x9, d0
1095; CHECK-NEXT:    mov x11, v1.d[1]
1096; CHECK-NEXT:    mov x12, v0.d[1]
1097; CHECK-NEXT:    sdiv x10, x9, x8
1098; CHECK-NEXT:    sdiv x13, x12, x11
1099; CHECK-NEXT:    msub x8, x10, x8, x9
1100; CHECK-NEXT:    fmov d0, x8
1101; CHECK-NEXT:    msub x9, x13, x11, x12
1102; CHECK-NEXT:    mov v0.d[1], x9
1103; CHECK-NEXT:    ret
1104	%tmp3 = srem <2 x i64> %A, %B;
1105	ret <2 x i64> %tmp3
1106}
1107
1108define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
1109; CHECK-LABEL: urem1x8:
1110; CHECK:       // %bb.0:
1111; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1112; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1113; CHECK-NEXT:    umov w8, v1.b[0]
1114; CHECK-NEXT:    umov w9, v0.b[0]
1115; CHECK-NEXT:    udiv w10, w9, w8
1116; CHECK-NEXT:    msub w8, w10, w8, w9
1117; CHECK-NEXT:    fmov s0, w8
1118; CHECK-NEXT:    ret
1119	%tmp3 = urem <1 x i8> %A, %B;
1120	ret <1 x i8> %tmp3
1121}
1122
1123define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
1124; CHECK-LABEL: urem8x8:
1125; CHECK:       // %bb.0:
1126; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1127; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1128; CHECK-NEXT:    umov w11, v1.b[0]
1129; CHECK-NEXT:    umov w12, v0.b[0]
1130; CHECK-NEXT:    umov w8, v1.b[1]
1131; CHECK-NEXT:    umov w9, v0.b[1]
1132; CHECK-NEXT:    umov w14, v1.b[2]
1133; CHECK-NEXT:    umov w15, v0.b[2]
1134; CHECK-NEXT:    umov w17, v1.b[3]
1135; CHECK-NEXT:    umov w18, v0.b[3]
1136; CHECK-NEXT:    umov w1, v1.b[4]
1137; CHECK-NEXT:    umov w2, v0.b[4]
1138; CHECK-NEXT:    umov w4, v1.b[5]
1139; CHECK-NEXT:    umov w5, v0.b[5]
1140; CHECK-NEXT:    udiv w13, w12, w11
1141; CHECK-NEXT:    udiv w10, w9, w8
1142; CHECK-NEXT:    msub w11, w13, w11, w12
1143; CHECK-NEXT:    umov w13, v1.b[7]
1144; CHECK-NEXT:    fmov s2, w11
1145; CHECK-NEXT:    umov w11, v0.b[6]
1146; CHECK-NEXT:    udiv w16, w15, w14
1147; CHECK-NEXT:    msub w8, w10, w8, w9
1148; CHECK-NEXT:    umov w10, v1.b[6]
1149; CHECK-NEXT:    mov v2.b[1], w8
1150; CHECK-NEXT:    udiv w0, w18, w17
1151; CHECK-NEXT:    msub w8, w16, w14, w15
1152; CHECK-NEXT:    umov w14, v0.b[7]
1153; CHECK-NEXT:    mov v2.b[2], w8
1154; CHECK-NEXT:    udiv w3, w2, w1
1155; CHECK-NEXT:    msub w8, w0, w17, w18
1156; CHECK-NEXT:    mov v2.b[3], w8
1157; CHECK-NEXT:    udiv w9, w5, w4
1158; CHECK-NEXT:    msub w8, w3, w1, w2
1159; CHECK-NEXT:    mov v2.b[4], w8
1160; CHECK-NEXT:    udiv w12, w11, w10
1161; CHECK-NEXT:    msub w8, w9, w4, w5
1162; CHECK-NEXT:    mov v2.b[5], w8
1163; CHECK-NEXT:    udiv w9, w14, w13
1164; CHECK-NEXT:    msub w8, w12, w10, w11
1165; CHECK-NEXT:    mov v2.b[6], w8
1166; CHECK-NEXT:    msub w8, w9, w13, w14
1167; CHECK-NEXT:    mov v2.b[7], w8
1168; CHECK-NEXT:    fmov d0, d2
1169; CHECK-NEXT:    ret
1170	%tmp3 = urem <8 x i8> %A, %B;
1171	ret <8 x i8> %tmp3
1172}
1173
1174define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
1175; CHECK-LABEL: urem16x8:
1176; CHECK:       // %bb.0:
1177; CHECK-NEXT:    stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
1178; CHECK-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
1179; CHECK-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
1180; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
1181; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
1182; CHECK-NEXT:    .cfi_def_cfa_offset 80
1183; CHECK-NEXT:    .cfi_offset w19, -8
1184; CHECK-NEXT:    .cfi_offset w20, -16
1185; CHECK-NEXT:    .cfi_offset w21, -24
1186; CHECK-NEXT:    .cfi_offset w22, -32
1187; CHECK-NEXT:    .cfi_offset w23, -40
1188; CHECK-NEXT:    .cfi_offset w24, -48
1189; CHECK-NEXT:    .cfi_offset w25, -56
1190; CHECK-NEXT:    .cfi_offset w26, -64
1191; CHECK-NEXT:    .cfi_offset w27, -72
1192; CHECK-NEXT:    .cfi_offset w28, -80
1193; CHECK-NEXT:    umov w11, v1.b[0]
1194; CHECK-NEXT:    umov w12, v0.b[0]
1195; CHECK-NEXT:    umov w8, v1.b[1]
1196; CHECK-NEXT:    umov w9, v0.b[1]
1197; CHECK-NEXT:    umov w14, v1.b[2]
1198; CHECK-NEXT:    umov w15, v0.b[2]
1199; CHECK-NEXT:    umov w17, v1.b[3]
1200; CHECK-NEXT:    umov w18, v0.b[3]
1201; CHECK-NEXT:    umov w1, v1.b[4]
1202; CHECK-NEXT:    umov w2, v0.b[4]
1203; CHECK-NEXT:    umov w4, v1.b[5]
1204; CHECK-NEXT:    umov w5, v0.b[5]
1205; CHECK-NEXT:    udiv w13, w12, w11
1206; CHECK-NEXT:    umov w7, v1.b[6]
1207; CHECK-NEXT:    umov w19, v0.b[6]
1208; CHECK-NEXT:    umov w21, v1.b[7]
1209; CHECK-NEXT:    umov w22, v0.b[7]
1210; CHECK-NEXT:    umov w24, v1.b[8]
1211; CHECK-NEXT:    umov w25, v0.b[8]
1212; CHECK-NEXT:    umov w27, v1.b[9]
1213; CHECK-NEXT:    umov w28, v0.b[9]
1214; CHECK-NEXT:    udiv w10, w9, w8
1215; CHECK-NEXT:    msub w11, w13, w11, w12
1216; CHECK-NEXT:    umov w13, v1.b[11]
1217; CHECK-NEXT:    fmov s2, w11
1218; CHECK-NEXT:    umov w11, v0.b[10]
1219; CHECK-NEXT:    udiv w16, w15, w14
1220; CHECK-NEXT:    msub w8, w10, w8, w9
1221; CHECK-NEXT:    umov w10, v1.b[10]
1222; CHECK-NEXT:    mov v2.b[1], w8
1223; CHECK-NEXT:    udiv w0, w18, w17
1224; CHECK-NEXT:    msub w8, w16, w14, w15
1225; CHECK-NEXT:    umov w14, v0.b[11]
1226; CHECK-NEXT:    umov w16, v1.b[12]
1227; CHECK-NEXT:    mov v2.b[2], w8
1228; CHECK-NEXT:    udiv w3, w2, w1
1229; CHECK-NEXT:    msub w8, w0, w17, w18
1230; CHECK-NEXT:    umov w17, v0.b[12]
1231; CHECK-NEXT:    umov w0, v1.b[13]
1232; CHECK-NEXT:    mov v2.b[3], w8
1233; CHECK-NEXT:    udiv w6, w5, w4
1234; CHECK-NEXT:    msub w8, w3, w1, w2
1235; CHECK-NEXT:    umov w1, v0.b[13]
1236; CHECK-NEXT:    mov v2.b[4], w8
1237; CHECK-NEXT:    udiv w20, w19, w7
1238; CHECK-NEXT:    msub w8, w6, w4, w5
1239; CHECK-NEXT:    mov v2.b[5], w8
1240; CHECK-NEXT:    udiv w23, w22, w21
1241; CHECK-NEXT:    msub w8, w20, w7, w19
1242; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
1243; CHECK-NEXT:    mov v2.b[6], w8
1244; CHECK-NEXT:    udiv w26, w25, w24
1245; CHECK-NEXT:    msub w8, w23, w21, w22
1246; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
1247; CHECK-NEXT:    mov v2.b[7], w8
1248; CHECK-NEXT:    udiv w9, w28, w27
1249; CHECK-NEXT:    msub w8, w26, w24, w25
1250; CHECK-NEXT:    ldp x24, x23, [sp, #32] // 16-byte Folded Reload
1251; CHECK-NEXT:    ldp x26, x25, [sp, #16] // 16-byte Folded Reload
1252; CHECK-NEXT:    mov v2.b[8], w8
1253; CHECK-NEXT:    udiv w12, w11, w10
1254; CHECK-NEXT:    msub w8, w9, w27, w28
1255; CHECK-NEXT:    mov v2.b[9], w8
1256; CHECK-NEXT:    udiv w15, w14, w13
1257; CHECK-NEXT:    msub w8, w12, w10, w11
1258; CHECK-NEXT:    umov w10, v1.b[14]
1259; CHECK-NEXT:    umov w11, v0.b[14]
1260; CHECK-NEXT:    mov v2.b[10], w8
1261; CHECK-NEXT:    udiv w18, w17, w16
1262; CHECK-NEXT:    msub w8, w15, w13, w14
1263; CHECK-NEXT:    umov w13, v1.b[15]
1264; CHECK-NEXT:    umov w14, v0.b[15]
1265; CHECK-NEXT:    mov v2.b[11], w8
1266; CHECK-NEXT:    udiv w9, w1, w0
1267; CHECK-NEXT:    msub w8, w18, w16, w17
1268; CHECK-NEXT:    mov v2.b[12], w8
1269; CHECK-NEXT:    udiv w12, w11, w10
1270; CHECK-NEXT:    msub w8, w9, w0, w1
1271; CHECK-NEXT:    mov v2.b[13], w8
1272; CHECK-NEXT:    udiv w9, w14, w13
1273; CHECK-NEXT:    msub w8, w12, w10, w11
1274; CHECK-NEXT:    mov v2.b[14], w8
1275; CHECK-NEXT:    msub w8, w9, w13, w14
1276; CHECK-NEXT:    mov v2.b[15], w8
1277; CHECK-NEXT:    mov v0.16b, v2.16b
1278; CHECK-NEXT:    ldp x28, x27, [sp], #80 // 16-byte Folded Reload
1279; CHECK-NEXT:    ret
1280	%tmp3 = urem <16 x i8> %A, %B;
1281	ret <16 x i8> %tmp3
1282}
1283
1284define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
1285; CHECK-LABEL: urem1x16:
1286; CHECK:       // %bb.0:
1287; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1288; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1289; CHECK-NEXT:    umov w8, v1.h[0]
1290; CHECK-NEXT:    umov w9, v0.h[0]
1291; CHECK-NEXT:    udiv w10, w9, w8
1292; CHECK-NEXT:    msub w8, w10, w8, w9
1293; CHECK-NEXT:    fmov s0, w8
1294; CHECK-NEXT:    ret
1295	%tmp3 = urem <1 x i16> %A, %B;
1296	ret <1 x i16> %tmp3
1297}
1298
1299define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
1300; CHECK-LABEL: urem4x16:
1301; CHECK:       // %bb.0:
1302; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1303; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1304; CHECK-NEXT:    umov w11, v1.h[0]
1305; CHECK-NEXT:    umov w12, v0.h[0]
1306; CHECK-NEXT:    umov w8, v1.h[1]
1307; CHECK-NEXT:    umov w9, v0.h[1]
1308; CHECK-NEXT:    umov w14, v1.h[2]
1309; CHECK-NEXT:    umov w15, v0.h[2]
1310; CHECK-NEXT:    umov w17, v1.h[3]
1311; CHECK-NEXT:    umov w18, v0.h[3]
1312; CHECK-NEXT:    udiv w13, w12, w11
1313; CHECK-NEXT:    udiv w10, w9, w8
1314; CHECK-NEXT:    msub w11, w13, w11, w12
1315; CHECK-NEXT:    fmov s0, w11
1316; CHECK-NEXT:    udiv w16, w15, w14
1317; CHECK-NEXT:    msub w8, w10, w8, w9
1318; CHECK-NEXT:    mov v0.h[1], w8
1319; CHECK-NEXT:    udiv w9, w18, w17
1320; CHECK-NEXT:    msub w8, w16, w14, w15
1321; CHECK-NEXT:    mov v0.h[2], w8
1322; CHECK-NEXT:    msub w8, w9, w17, w18
1323; CHECK-NEXT:    mov v0.h[3], w8
1324; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1325; CHECK-NEXT:    ret
1326	%tmp3 = urem <4 x i16> %A, %B;
1327	ret <4 x i16> %tmp3
1328}
1329
1330define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
1331; CHECK-LABEL: urem8x16:
1332; CHECK:       // %bb.0:
1333; CHECK-NEXT:    umov w11, v1.h[0]
1334; CHECK-NEXT:    umov w12, v0.h[0]
1335; CHECK-NEXT:    umov w8, v1.h[1]
1336; CHECK-NEXT:    umov w9, v0.h[1]
1337; CHECK-NEXT:    umov w14, v1.h[2]
1338; CHECK-NEXT:    umov w15, v0.h[2]
1339; CHECK-NEXT:    umov w17, v1.h[3]
1340; CHECK-NEXT:    umov w18, v0.h[3]
1341; CHECK-NEXT:    umov w1, v1.h[4]
1342; CHECK-NEXT:    umov w2, v0.h[4]
1343; CHECK-NEXT:    umov w4, v1.h[5]
1344; CHECK-NEXT:    umov w5, v0.h[5]
1345; CHECK-NEXT:    udiv w13, w12, w11
1346; CHECK-NEXT:    udiv w10, w9, w8
1347; CHECK-NEXT:    msub w11, w13, w11, w12
1348; CHECK-NEXT:    umov w13, v1.h[7]
1349; CHECK-NEXT:    fmov s2, w11
1350; CHECK-NEXT:    umov w11, v0.h[6]
1351; CHECK-NEXT:    udiv w16, w15, w14
1352; CHECK-NEXT:    msub w8, w10, w8, w9
1353; CHECK-NEXT:    umov w10, v1.h[6]
1354; CHECK-NEXT:    mov v2.h[1], w8
1355; CHECK-NEXT:    udiv w0, w18, w17
1356; CHECK-NEXT:    msub w8, w16, w14, w15
1357; CHECK-NEXT:    umov w14, v0.h[7]
1358; CHECK-NEXT:    mov v2.h[2], w8
1359; CHECK-NEXT:    udiv w3, w2, w1
1360; CHECK-NEXT:    msub w8, w0, w17, w18
1361; CHECK-NEXT:    mov v2.h[3], w8
1362; CHECK-NEXT:    udiv w9, w5, w4
1363; CHECK-NEXT:    msub w8, w3, w1, w2
1364; CHECK-NEXT:    mov v2.h[4], w8
1365; CHECK-NEXT:    udiv w12, w11, w10
1366; CHECK-NEXT:    msub w8, w9, w4, w5
1367; CHECK-NEXT:    mov v2.h[5], w8
1368; CHECK-NEXT:    udiv w9, w14, w13
1369; CHECK-NEXT:    msub w8, w12, w10, w11
1370; CHECK-NEXT:    mov v2.h[6], w8
1371; CHECK-NEXT:    msub w8, w9, w13, w14
1372; CHECK-NEXT:    mov v2.h[7], w8
1373; CHECK-NEXT:    mov v0.16b, v2.16b
1374; CHECK-NEXT:    ret
1375	%tmp3 = urem <8 x i16> %A, %B;
1376	ret <8 x i16> %tmp3
1377}
1378
1379define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
1380; CHECK-LABEL: urem1x32:
1381; CHECK:       // %bb.0:
1382; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1383; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1384; CHECK-NEXT:    fmov w8, s1
1385; CHECK-NEXT:    fmov w9, s0
1386; CHECK-NEXT:    udiv w10, w9, w8
1387; CHECK-NEXT:    msub w8, w10, w8, w9
1388; CHECK-NEXT:    fmov s0, w8
1389; CHECK-NEXT:    ret
1390	%tmp3 = urem <1 x i32> %A, %B;
1391	ret <1 x i32> %tmp3
1392}
1393
1394define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
1395; CHECK-LABEL: urem2x32:
1396; CHECK:       // %bb.0:
1397; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1398; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1399; CHECK-NEXT:    fmov w8, s1
1400; CHECK-NEXT:    fmov w9, s0
1401; CHECK-NEXT:    mov w11, v1.s[1]
1402; CHECK-NEXT:    mov w12, v0.s[1]
1403; CHECK-NEXT:    udiv w10, w9, w8
1404; CHECK-NEXT:    udiv w13, w12, w11
1405; CHECK-NEXT:    msub w8, w10, w8, w9
1406; CHECK-NEXT:    fmov s0, w8
1407; CHECK-NEXT:    msub w9, w13, w11, w12
1408; CHECK-NEXT:    mov v0.s[1], w9
1409; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1410; CHECK-NEXT:    ret
1411	%tmp3 = urem <2 x i32> %A, %B;
1412	ret <2 x i32> %tmp3
1413}
1414
1415define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
1416; CHECK-LABEL: urem4x32:
1417; CHECK:       // %bb.0:
1418; CHECK-NEXT:    fmov w11, s1
1419; CHECK-NEXT:    fmov w12, s0
1420; CHECK-NEXT:    mov w8, v1.s[1]
1421; CHECK-NEXT:    mov w9, v0.s[1]
1422; CHECK-NEXT:    mov w14, v1.s[2]
1423; CHECK-NEXT:    mov w15, v0.s[2]
1424; CHECK-NEXT:    mov w17, v1.s[3]
1425; CHECK-NEXT:    mov w18, v0.s[3]
1426; CHECK-NEXT:    udiv w13, w12, w11
1427; CHECK-NEXT:    udiv w10, w9, w8
1428; CHECK-NEXT:    msub w11, w13, w11, w12
1429; CHECK-NEXT:    fmov s0, w11
1430; CHECK-NEXT:    udiv w16, w15, w14
1431; CHECK-NEXT:    msub w8, w10, w8, w9
1432; CHECK-NEXT:    mov v0.s[1], w8
1433; CHECK-NEXT:    udiv w9, w18, w17
1434; CHECK-NEXT:    msub w8, w16, w14, w15
1435; CHECK-NEXT:    mov v0.s[2], w8
1436; CHECK-NEXT:    msub w8, w9, w17, w18
1437; CHECK-NEXT:    mov v0.s[3], w8
1438; CHECK-NEXT:    ret
1439	%tmp3 = urem <4 x i32> %A, %B;
1440	ret <4 x i32> %tmp3
1441}
1442
1443define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
1444; CHECK-LABEL: urem1x64:
1445; CHECK:       // %bb.0:
1446; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1447; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1448; CHECK-NEXT:    fmov x8, d1
1449; CHECK-NEXT:    fmov x9, d0
1450; CHECK-NEXT:    udiv x10, x9, x8
1451; CHECK-NEXT:    msub x8, x10, x8, x9
1452; CHECK-NEXT:    fmov d0, x8
1453; CHECK-NEXT:    ret
1454	%tmp3 = urem <1 x i64> %A, %B;
1455	ret <1 x i64> %tmp3
1456}
1457
1458define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) {
1459; CHECK-LABEL: urem2x64:
1460; CHECK:       // %bb.0:
1461; CHECK-NEXT:    fmov x8, d1
1462; CHECK-NEXT:    fmov x9, d0
1463; CHECK-NEXT:    mov x11, v1.d[1]
1464; CHECK-NEXT:    mov x12, v0.d[1]
1465; CHECK-NEXT:    udiv x10, x9, x8
1466; CHECK-NEXT:    udiv x13, x12, x11
1467; CHECK-NEXT:    msub x8, x10, x8, x9
1468; CHECK-NEXT:    fmov d0, x8
1469; CHECK-NEXT:    msub x9, x13, x11, x12
1470; CHECK-NEXT:    mov v0.d[1], x9
1471; CHECK-NEXT:    ret
1472	%tmp3 = urem <2 x i64> %A, %B;
1473	ret <2 x i64> %tmp3
1474}
1475
1476define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
1477; CHECK-LABEL: frem2f32:
1478; CHECK:       // %bb.0:
1479; CHECK-NEXT:    sub sp, sp, #64
1480; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
1481; CHECK-NEXT:    .cfi_def_cfa_offset 64
1482; CHECK-NEXT:    .cfi_offset w30, -16
1483; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
1484; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1485; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
1486; CHECK-NEXT:    mov s0, v0.s[1]
1487; CHECK-NEXT:    mov s1, v1.s[1]
1488; CHECK-NEXT:    bl fmodf
1489; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1490; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1491; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
1492; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
1493; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
1494; CHECK-NEXT:    bl fmodf
1495; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1496; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1497; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1498; CHECK-NEXT:    mov v0.s[1], v1.s[0]
1499; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1500; CHECK-NEXT:    add sp, sp, #64
1501; CHECK-NEXT:    ret
1502	%tmp3 = frem <2 x float> %A, %B;
1503	ret <2 x float> %tmp3
1504}
1505
1506define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
1507; CHECK-LABEL: frem4f32:
1508; CHECK:       // %bb.0:
1509; CHECK-NEXT:    sub sp, sp, #64
1510; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
1511; CHECK-NEXT:    .cfi_def_cfa_offset 64
1512; CHECK-NEXT:    .cfi_offset w30, -16
1513; CHECK-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
1514; CHECK-NEXT:    mov s0, v0.s[1]
1515; CHECK-NEXT:    mov s1, v1.s[1]
1516; CHECK-NEXT:    bl fmodf
1517; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1518; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
1519; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1520; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
1521; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
1522; CHECK-NEXT:    bl fmodf
1523; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1524; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1525; CHECK-NEXT:    mov v0.s[1], v1.s[0]
1526; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
1527; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1528; CHECK-NEXT:    mov s0, v0.s[2]
1529; CHECK-NEXT:    mov s1, v1.s[2]
1530; CHECK-NEXT:    bl fmodf
1531; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1532; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1533; CHECK-NEXT:    mov v1.s[2], v0.s[0]
1534; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
1535; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
1536; CHECK-NEXT:    mov s0, v0.s[3]
1537; CHECK-NEXT:    mov s1, v1.s[3]
1538; CHECK-NEXT:    bl fmodf
1539; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
1540; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
1541; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1542; CHECK-NEXT:    mov v1.s[3], v0.s[0]
1543; CHECK-NEXT:    mov v0.16b, v1.16b
1544; CHECK-NEXT:    add sp, sp, #64
1545; CHECK-NEXT:    ret
1546	%tmp3 = frem <4 x float> %A, %B;
1547	ret <4 x float> %tmp3
1548}
1549
1550define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) {
1551; CHECK-LABEL: frem1d64:
1552; CHECK:       // %bb.0:
1553; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1554; CHECK-NEXT:    .cfi_def_cfa_offset 16
1555; CHECK-NEXT:    .cfi_offset w30, -16
1556; CHECK-NEXT:    bl fmod
1557; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1558; CHECK-NEXT:    ret
1559	%tmp3 = frem <1 x double> %A, %B;
1560	ret <1 x double> %tmp3
1561}
1562
1563define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
1564; CHECK-LABEL: frem2d64:
1565; CHECK:       // %bb.0:
1566; CHECK-NEXT:    sub sp, sp, #64
1567; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
1568; CHECK-NEXT:    .cfi_def_cfa_offset 64
1569; CHECK-NEXT:    .cfi_offset w30, -16
1570; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
1571; CHECK-NEXT:    mov d0, v0.d[1]
1572; CHECK-NEXT:    mov d1, v1.d[1]
1573; CHECK-NEXT:    bl fmod
1574; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1575; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
1576; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
1577; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1578; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
1579; CHECK-NEXT:    bl fmod
1580; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
1581; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1582; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
1583; CHECK-NEXT:    mov v0.d[1], v1.d[0]
1584; CHECK-NEXT:    add sp, sp, #64
1585; CHECK-NEXT:    ret
1586	%tmp3 = frem <2 x double> %A, %B;
1587	ret <2 x double> %tmp3
1588}
1589
1590declare <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8>, <8 x i8>)
1591declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>)
1592
1593define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) {
1594; CHECK-LABEL: poly_mulv8i8:
1595; CHECK:       // %bb.0:
1596; CHECK-NEXT:    pmul v0.8b, v0.8b, v1.8b
1597; CHECK-NEXT:    ret
1598   %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs)
1599   ret <8 x i8> %prod
1600}
1601
1602define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) {
1603; CHECK-LABEL: poly_mulv16i8:
1604; CHECK:       // %bb.0:
1605; CHECK-NEXT:    pmul v0.16b, v0.16b, v1.16b
1606; CHECK-NEXT:    ret
1607   %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs)
1608   ret <16 x i8> %prod
1609}
1610
1611declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>)
1612declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>)
1613declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>)
1614declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>)
1615
1616define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
1617; CHECK-LABEL: test_sqdmulh_v4i16:
1618; CHECK:       // %bb.0:
1619; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.4h
1620; CHECK-NEXT:    ret
1621   %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
1622   ret <4 x i16> %prod
1623}
1624
1625define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
1626; CHECK-LABEL: test_sqdmulh_v8i16:
1627; CHECK:       // %bb.0:
1628; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.8h
1629; CHECK-NEXT:    ret
1630   %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
1631   ret <8 x i16> %prod
1632}
1633
1634define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
1635; CHECK-LABEL: test_sqdmulh_v2i32:
1636; CHECK:       // %bb.0:
1637; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.2s
1638; CHECK-NEXT:    ret
1639   %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
1640   ret <2 x i32> %prod
1641}
1642
1643define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
1644; CHECK-LABEL: test_sqdmulh_v4i32:
1645; CHECK:       // %bb.0:
1646; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.4s
1647; CHECK-NEXT:    ret
1648   %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
1649   ret <4 x i32> %prod
1650}
1651
1652declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
1653declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
1654declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
1655declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
1656
1657define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
1658; CHECK-LABEL: test_sqrdmulh_v4i16:
1659; CHECK:       // %bb.0:
1660; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.4h
1661; CHECK-NEXT:    ret
1662   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
1663   ret <4 x i16> %prod
1664}
1665
1666define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
1667; CHECK-LABEL: test_sqrdmulh_v8i16:
1668; CHECK:       // %bb.0:
1669; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.8h
1670; CHECK-NEXT:    ret
1671   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
1672   ret <8 x i16> %prod
1673}
1674
1675define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
1676; CHECK-LABEL: test_sqrdmulh_v2i32:
1677; CHECK:       // %bb.0:
1678; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.2s
1679; CHECK-NEXT:    ret
1680   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
1681   ret <2 x i32> %prod
1682}
1683
1684define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
1685; CHECK-LABEL: test_sqrdmulh_v4i32:
1686; CHECK:       // %bb.0:
1687; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.4s
1688; CHECK-NEXT:    ret
1689   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
1690   ret <4 x i32> %prod
1691}
1692
1693declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>)
1694declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>)
1695declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
1696
1697define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) {
1698; CHECK-LABEL: fmulx_v2f32:
1699; CHECK:       // %bb.0:
1700; CHECK-NEXT:    fmulx v0.2s, v0.2s, v1.2s
1701; CHECK-NEXT:    ret
1702; Using registers other than v0, v1 and v2 are possible, but would be odd.
1703        %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs)
1704        ret <2 x float> %val
1705}
1706
1707define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) {
1708; CHECK-LABEL: fmulx_v4f32:
1709; CHECK:       // %bb.0:
1710; CHECK-NEXT:    fmulx v0.4s, v0.4s, v1.4s
1711; CHECK-NEXT:    ret
1712; Using registers other than v0, v1 and v2 are possible, but would be odd.
1713        %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs)
1714        ret <4 x float> %val
1715}
1716
1717define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
1718; CHECK-LABEL: fmulx_v2f64:
1719; CHECK:       // %bb.0:
1720; CHECK-NEXT:    fmulx v0.2d, v0.2d, v1.2d
1721; CHECK-NEXT:    ret
1722; Using registers other than v0, v1 and v2 are possible, but would be odd.
1723        %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs)
1724        ret <2 x double> %val
1725}
1726