xref: /llvm-project/llvm/test/CodeGen/AArch64/mul.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5define i8 @i8(i8 %a, i8 %b) {
6; CHECK-LABEL: i8:
7; CHECK:       // %bb.0: // %entry
8; CHECK-NEXT:    mul w0, w0, w1
9; CHECK-NEXT:    ret
10entry:
11  %s = mul i8 %a, %b
12  ret i8 %s
13}
14
15define i16 @i16(i16 %a, i16 %b) {
16; CHECK-LABEL: i16:
17; CHECK:       // %bb.0: // %entry
18; CHECK-NEXT:    mul w0, w0, w1
19; CHECK-NEXT:    ret
20entry:
21  %s = mul i16 %a, %b
22  ret i16 %s
23}
24
25define i32 @i32(i32 %a, i32 %b) {
26; CHECK-LABEL: i32:
27; CHECK:       // %bb.0: // %entry
28; CHECK-NEXT:    mul w0, w0, w1
29; CHECK-NEXT:    ret
30entry:
31  %s = mul i32 %a, %b
32  ret i32 %s
33}
34
35define i64 @i64(i64 %a, i64 %b) {
36; CHECK-LABEL: i64:
37; CHECK:       // %bb.0: // %entry
38; CHECK-NEXT:    mul x0, x0, x1
39; CHECK-NEXT:    ret
40entry:
41  %s = mul i64 %a, %b
42  ret i64 %s
43}
44
45define i128 @i128(i128 %a, i128 %b) {
46; CHECK-SD-LABEL: i128:
47; CHECK-SD:       // %bb.0: // %entry
48; CHECK-SD-NEXT:    umulh x8, x0, x2
49; CHECK-SD-NEXT:    madd x8, x0, x3, x8
50; CHECK-SD-NEXT:    mul x0, x0, x2
51; CHECK-SD-NEXT:    madd x1, x1, x2, x8
52; CHECK-SD-NEXT:    ret
53;
54; CHECK-GI-LABEL: i128:
55; CHECK-GI:       // %bb.0: // %entry
56; CHECK-GI-NEXT:    mul x9, x0, x3
57; CHECK-GI-NEXT:    mul x8, x0, x2
58; CHECK-GI-NEXT:    umulh x10, x0, x2
59; CHECK-GI-NEXT:    madd x9, x1, x2, x9
60; CHECK-GI-NEXT:    mov x0, x8
61; CHECK-GI-NEXT:    add x1, x9, x10
62; CHECK-GI-NEXT:    ret
63entry:
64  %s = mul i128 %a, %b
65  ret i128 %s
66}
67
68define void @v2i8(ptr %p1, ptr %p2) {
69; CHECK-SD-LABEL: v2i8:
70; CHECK-SD:       // %bb.0: // %entry
71; CHECK-SD-NEXT:    ld1 { v0.b }[0], [x0]
72; CHECK-SD-NEXT:    ld1 { v1.b }[0], [x1]
73; CHECK-SD-NEXT:    add x8, x0, #1
74; CHECK-SD-NEXT:    add x9, x1, #1
75; CHECK-SD-NEXT:    ld1 { v0.b }[4], [x8]
76; CHECK-SD-NEXT:    ld1 { v1.b }[4], [x9]
77; CHECK-SD-NEXT:    mul v0.2s, v0.2s, v1.2s
78; CHECK-SD-NEXT:    mov w8, v0.s[1]
79; CHECK-SD-NEXT:    fmov w9, s0
80; CHECK-SD-NEXT:    strb w9, [x0]
81; CHECK-SD-NEXT:    strb w8, [x0, #1]
82; CHECK-SD-NEXT:    ret
83;
84; CHECK-GI-LABEL: v2i8:
85; CHECK-GI:       // %bb.0: // %entry
86; CHECK-GI-NEXT:    ld1 { v0.b }[0], [x0]
87; CHECK-GI-NEXT:    ld1 { v1.b }[0], [x1]
88; CHECK-GI-NEXT:    ldr b2, [x0, #1]
89; CHECK-GI-NEXT:    ldr b3, [x1, #1]
90; CHECK-GI-NEXT:    mov v0.s[1], v2.s[0]
91; CHECK-GI-NEXT:    mov v1.s[1], v3.s[0]
92; CHECK-GI-NEXT:    mul v0.2s, v0.2s, v1.2s
93; CHECK-GI-NEXT:    mov s1, v0.s[1]
94; CHECK-GI-NEXT:    str b0, [x0]
95; CHECK-GI-NEXT:    str b1, [x0, #1]
96; CHECK-GI-NEXT:    ret
97entry:
98  %d = load <2 x i8>, ptr %p1
99  %e = load <2 x i8>, ptr %p2
100  %s = mul <2 x i8> %d, %e
101  store <2 x i8> %s, ptr %p1
102  ret void
103}
104
105define void @v3i8(ptr %p1, ptr %p2) {
106; CHECK-SD-LABEL: v3i8:
107; CHECK-SD:       // %bb.0: // %entry
108; CHECK-SD-NEXT:    sub sp, sp, #16
109; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
110; CHECK-SD-NEXT:    ldr s0, [x0]
111; CHECK-SD-NEXT:    ldr s1, [x1]
112; CHECK-SD-NEXT:    zip1 v0.8b, v0.8b, v0.8b
113; CHECK-SD-NEXT:    zip1 v1.8b, v1.8b, v0.8b
114; CHECK-SD-NEXT:    mul v0.4h, v0.4h, v1.4h
115; CHECK-SD-NEXT:    uzp1 v1.8b, v0.8b, v0.8b
116; CHECK-SD-NEXT:    umov w8, v0.h[2]
117; CHECK-SD-NEXT:    str s1, [sp, #12]
118; CHECK-SD-NEXT:    ldrh w9, [sp, #12]
119; CHECK-SD-NEXT:    strb w8, [x0, #2]
120; CHECK-SD-NEXT:    strh w9, [x0]
121; CHECK-SD-NEXT:    add sp, sp, #16
122; CHECK-SD-NEXT:    ret
123;
124; CHECK-GI-LABEL: v3i8:
125; CHECK-GI:       // %bb.0: // %entry
126; CHECK-GI-NEXT:    ldrb w8, [x0]
127; CHECK-GI-NEXT:    ldrb w9, [x1]
128; CHECK-GI-NEXT:    ldrb w10, [x0, #1]
129; CHECK-GI-NEXT:    ldrb w11, [x1, #1]
130; CHECK-GI-NEXT:    fmov s0, w8
131; CHECK-GI-NEXT:    fmov s1, w9
132; CHECK-GI-NEXT:    ldrb w8, [x0, #2]
133; CHECK-GI-NEXT:    ldrb w9, [x1, #2]
134; CHECK-GI-NEXT:    mov v0.h[1], w10
135; CHECK-GI-NEXT:    mov v1.h[1], w11
136; CHECK-GI-NEXT:    mov v0.h[2], w8
137; CHECK-GI-NEXT:    mov v1.h[2], w9
138; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
139; CHECK-GI-NEXT:    mov h1, v0.h[1]
140; CHECK-GI-NEXT:    mov h2, v0.h[2]
141; CHECK-GI-NEXT:    str b0, [x0]
142; CHECK-GI-NEXT:    str b1, [x0, #1]
143; CHECK-GI-NEXT:    str b2, [x0, #2]
144; CHECK-GI-NEXT:    ret
145entry:
146  %d = load <3 x i8>, ptr %p1
147  %e = load <3 x i8>, ptr %p2
148  %s = mul <3 x i8> %d, %e
149  store <3 x i8> %s, ptr %p1
150  ret void
151}
152
153define void @v4i8(ptr %p1, ptr %p2) {
154; CHECK-SD-LABEL: v4i8:
155; CHECK-SD:       // %bb.0: // %entry
156; CHECK-SD-NEXT:    ldr s0, [x0]
157; CHECK-SD-NEXT:    ldr s1, [x1]
158; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v1.8b
159; CHECK-SD-NEXT:    xtn v0.8b, v0.8h
160; CHECK-SD-NEXT:    str s0, [x0]
161; CHECK-SD-NEXT:    ret
162;
163; CHECK-GI-LABEL: v4i8:
164; CHECK-GI:       // %bb.0: // %entry
165; CHECK-GI-NEXT:    ldr w8, [x0]
166; CHECK-GI-NEXT:    ldr w9, [x1]
167; CHECK-GI-NEXT:    fmov s0, w8
168; CHECK-GI-NEXT:    fmov s1, w9
169; CHECK-GI-NEXT:    mov b2, v0.b[1]
170; CHECK-GI-NEXT:    mov b3, v1.b[1]
171; CHECK-GI-NEXT:    mov b4, v0.b[2]
172; CHECK-GI-NEXT:    mov b5, v0.b[3]
173; CHECK-GI-NEXT:    fmov w8, s2
174; CHECK-GI-NEXT:    mov b2, v1.b[2]
175; CHECK-GI-NEXT:    fmov w9, s3
176; CHECK-GI-NEXT:    mov b3, v1.b[3]
177; CHECK-GI-NEXT:    mov v0.h[1], w8
178; CHECK-GI-NEXT:    mov v1.h[1], w9
179; CHECK-GI-NEXT:    fmov w8, s4
180; CHECK-GI-NEXT:    fmov w9, s2
181; CHECK-GI-NEXT:    mov v0.h[2], w8
182; CHECK-GI-NEXT:    mov v1.h[2], w9
183; CHECK-GI-NEXT:    fmov w8, s5
184; CHECK-GI-NEXT:    fmov w9, s3
185; CHECK-GI-NEXT:    mov v0.h[3], w8
186; CHECK-GI-NEXT:    mov v1.h[3], w9
187; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
188; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
189; CHECK-GI-NEXT:    fmov w8, s0
190; CHECK-GI-NEXT:    str w8, [x0]
191; CHECK-GI-NEXT:    ret
192entry:
193  %d = load <4 x i8>, ptr %p1
194  %e = load <4 x i8>, ptr %p2
195  %s = mul <4 x i8> %d, %e
196  store <4 x i8> %s, ptr %p1
197  ret void
198}
199
200define <8 x i8> @v8i8(<8 x i8> %d, <8 x i8> %e) {
201; CHECK-LABEL: v8i8:
202; CHECK:       // %bb.0: // %entry
203; CHECK-NEXT:    mul v0.8b, v0.8b, v1.8b
204; CHECK-NEXT:    ret
205entry:
206  %s = mul <8 x i8> %d, %e
207  ret <8 x i8> %s
208}
209
210define <16 x i8> @v16i8(<16 x i8> %d, <16 x i8> %e) {
211; CHECK-LABEL: v16i8:
212; CHECK:       // %bb.0: // %entry
213; CHECK-NEXT:    mul v0.16b, v0.16b, v1.16b
214; CHECK-NEXT:    ret
215entry:
216  %s = mul <16 x i8> %d, %e
217  ret <16 x i8> %s
218}
219
220define <32 x i8> @v32i8(<32 x i8> %d, <32 x i8> %e) {
221; CHECK-SD-LABEL: v32i8:
222; CHECK-SD:       // %bb.0: // %entry
223; CHECK-SD-NEXT:    mul v1.16b, v1.16b, v3.16b
224; CHECK-SD-NEXT:    mul v0.16b, v0.16b, v2.16b
225; CHECK-SD-NEXT:    ret
226;
227; CHECK-GI-LABEL: v32i8:
228; CHECK-GI:       // %bb.0: // %entry
229; CHECK-GI-NEXT:    mul v0.16b, v0.16b, v2.16b
230; CHECK-GI-NEXT:    mul v1.16b, v1.16b, v3.16b
231; CHECK-GI-NEXT:    ret
232entry:
233  %s = mul <32 x i8> %d, %e
234  ret <32 x i8> %s
235}
236
237define void @v2i16(ptr %p1, ptr %p2) {
238; CHECK-SD-LABEL: v2i16:
239; CHECK-SD:       // %bb.0: // %entry
240; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x0]
241; CHECK-SD-NEXT:    ld1 { v1.h }[0], [x1]
242; CHECK-SD-NEXT:    add x8, x0, #2
243; CHECK-SD-NEXT:    add x9, x1, #2
244; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
245; CHECK-SD-NEXT:    ld1 { v1.h }[2], [x9]
246; CHECK-SD-NEXT:    mul v0.2s, v0.2s, v1.2s
247; CHECK-SD-NEXT:    mov w8, v0.s[1]
248; CHECK-SD-NEXT:    fmov w9, s0
249; CHECK-SD-NEXT:    strh w9, [x0]
250; CHECK-SD-NEXT:    strh w8, [x0, #2]
251; CHECK-SD-NEXT:    ret
252;
253; CHECK-GI-LABEL: v2i16:
254; CHECK-GI:       // %bb.0: // %entry
255; CHECK-GI-NEXT:    ld1 { v0.h }[0], [x0]
256; CHECK-GI-NEXT:    ld1 { v1.h }[0], [x1]
257; CHECK-GI-NEXT:    ldr h2, [x0, #2]
258; CHECK-GI-NEXT:    ldr h3, [x1, #2]
259; CHECK-GI-NEXT:    mov v0.s[1], v2.s[0]
260; CHECK-GI-NEXT:    mov v1.s[1], v3.s[0]
261; CHECK-GI-NEXT:    mul v0.2s, v0.2s, v1.2s
262; CHECK-GI-NEXT:    mov s1, v0.s[1]
263; CHECK-GI-NEXT:    str h0, [x0]
264; CHECK-GI-NEXT:    str h1, [x0, #2]
265; CHECK-GI-NEXT:    ret
266entry:
267  %d = load <2 x i16>, ptr %p1
268  %e = load <2 x i16>, ptr %p2
269  %s = mul <2 x i16> %d, %e
270  store <2 x i16> %s, ptr %p1
271  ret void
272}
273
274define void @v3i16(ptr %p1, ptr %p2) {
275; CHECK-SD-LABEL: v3i16:
276; CHECK-SD:       // %bb.0: // %entry
277; CHECK-SD-NEXT:    ldr d0, [x0]
278; CHECK-SD-NEXT:    ldr d1, [x1]
279; CHECK-SD-NEXT:    add x8, x0, #4
280; CHECK-SD-NEXT:    mul v0.4h, v0.4h, v1.4h
281; CHECK-SD-NEXT:    st1 { v0.h }[2], [x8]
282; CHECK-SD-NEXT:    str s0, [x0]
283; CHECK-SD-NEXT:    ret
284;
285; CHECK-GI-LABEL: v3i16:
286; CHECK-GI:       // %bb.0: // %entry
287; CHECK-GI-NEXT:    ldr h0, [x0]
288; CHECK-GI-NEXT:    ldr h1, [x1]
289; CHECK-GI-NEXT:    add x8, x0, #2
290; CHECK-GI-NEXT:    add x9, x1, #2
291; CHECK-GI-NEXT:    add x10, x1, #4
292; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
293; CHECK-GI-NEXT:    ld1 { v1.h }[1], [x9]
294; CHECK-GI-NEXT:    add x9, x0, #4
295; CHECK-GI-NEXT:    ld1 { v0.h }[2], [x9]
296; CHECK-GI-NEXT:    ld1 { v1.h }[2], [x10]
297; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
298; CHECK-GI-NEXT:    str h0, [x0]
299; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
300; CHECK-GI-NEXT:    st1 { v0.h }[2], [x9]
301; CHECK-GI-NEXT:    ret
302entry:
303  %d = load <3 x i16>, ptr %p1
304  %e = load <3 x i16>, ptr %p2
305  %s = mul <3 x i16> %d, %e
306  store <3 x i16> %s, ptr %p1
307  ret void
308}
309
310define <4 x i16> @v4i16(<4 x i16> %d, <4 x i16> %e) {
311; CHECK-LABEL: v4i16:
312; CHECK:       // %bb.0: // %entry
313; CHECK-NEXT:    mul v0.4h, v0.4h, v1.4h
314; CHECK-NEXT:    ret
315entry:
316  %s = mul <4 x i16> %d, %e
317  ret <4 x i16> %s
318}
319
320define <8 x i16> @v8i16(<8 x i16> %d, <8 x i16> %e) {
321; CHECK-LABEL: v8i16:
322; CHECK:       // %bb.0: // %entry
323; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
324; CHECK-NEXT:    ret
325entry:
326  %s = mul <8 x i16> %d, %e
327  ret <8 x i16> %s
328}
329
330define <16 x i16> @v16i16(<16 x i16> %d, <16 x i16> %e) {
331; CHECK-SD-LABEL: v16i16:
332; CHECK-SD:       // %bb.0: // %entry
333; CHECK-SD-NEXT:    mul v1.8h, v1.8h, v3.8h
334; CHECK-SD-NEXT:    mul v0.8h, v0.8h, v2.8h
335; CHECK-SD-NEXT:    ret
336;
337; CHECK-GI-LABEL: v16i16:
338; CHECK-GI:       // %bb.0: // %entry
339; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v2.8h
340; CHECK-GI-NEXT:    mul v1.8h, v1.8h, v3.8h
341; CHECK-GI-NEXT:    ret
342entry:
343  %s = mul <16 x i16> %d, %e
344  ret <16 x i16> %s
345}
346
347define <2 x i32> @v2i32(<2 x i32> %d, <2 x i32> %e) {
348; CHECK-LABEL: v2i32:
349; CHECK:       // %bb.0: // %entry
350; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
351; CHECK-NEXT:    ret
352entry:
353  %s = mul <2 x i32> %d, %e
354  ret <2 x i32> %s
355}
356
357define <3 x i32> @v3i32(<3 x i32> %d, <3 x i32> %e) {
358; CHECK-LABEL: v3i32:
359; CHECK:       // %bb.0: // %entry
360; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
361; CHECK-NEXT:    ret
362entry:
363  %s = mul <3 x i32> %d, %e
364  ret <3 x i32> %s
365}
366
367define <4 x i32> @v4i32(<4 x i32> %d, <4 x i32> %e) {
368; CHECK-LABEL: v4i32:
369; CHECK:       // %bb.0: // %entry
370; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
371; CHECK-NEXT:    ret
372entry:
373  %s = mul <4 x i32> %d, %e
374  ret <4 x i32> %s
375}
376
377define <8 x i32> @v8i32(<8 x i32> %d, <8 x i32> %e) {
378; CHECK-SD-LABEL: v8i32:
379; CHECK-SD:       // %bb.0: // %entry
380; CHECK-SD-NEXT:    mul v1.4s, v1.4s, v3.4s
381; CHECK-SD-NEXT:    mul v0.4s, v0.4s, v2.4s
382; CHECK-SD-NEXT:    ret
383;
384; CHECK-GI-LABEL: v8i32:
385; CHECK-GI:       // %bb.0: // %entry
386; CHECK-GI-NEXT:    mul v0.4s, v0.4s, v2.4s
387; CHECK-GI-NEXT:    mul v1.4s, v1.4s, v3.4s
388; CHECK-GI-NEXT:    ret
389entry:
390  %s = mul <8 x i32> %d, %e
391  ret <8 x i32> %s
392}
393
394define <2 x i64> @v2i64(<2 x i64> %d, <2 x i64> %e) {
395; CHECK-SD-LABEL: v2i64:
396; CHECK-SD:       // %bb.0: // %entry
397; CHECK-SD-NEXT:    fmov x10, d1
398; CHECK-SD-NEXT:    fmov x11, d0
399; CHECK-SD-NEXT:    mov x8, v1.d[1]
400; CHECK-SD-NEXT:    mov x9, v0.d[1]
401; CHECK-SD-NEXT:    mul x10, x11, x10
402; CHECK-SD-NEXT:    mul x8, x9, x8
403; CHECK-SD-NEXT:    fmov d0, x10
404; CHECK-SD-NEXT:    mov v0.d[1], x8
405; CHECK-SD-NEXT:    ret
406;
407; CHECK-GI-LABEL: v2i64:
408; CHECK-GI:       // %bb.0: // %entry
409; CHECK-GI-NEXT:    fmov x8, d0
410; CHECK-GI-NEXT:    fmov x9, d1
411; CHECK-GI-NEXT:    mov x10, v0.d[1]
412; CHECK-GI-NEXT:    mov x11, v1.d[1]
413; CHECK-GI-NEXT:    mul x8, x8, x9
414; CHECK-GI-NEXT:    mul x9, x10, x11
415; CHECK-GI-NEXT:    mov v0.d[0], x8
416; CHECK-GI-NEXT:    mov v0.d[1], x9
417; CHECK-GI-NEXT:    ret
418entry:
419  %s = mul <2 x i64> %d, %e
420  ret <2 x i64> %s
421}
422
423define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
424; CHECK-SD-LABEL: v3i64:
425; CHECK-SD:       // %bb.0: // %entry
426; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
427; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
428; CHECK-SD-NEXT:    fmov x8, d3
429; CHECK-SD-NEXT:    fmov x9, d0
430; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
431; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
432; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
433; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
434; CHECK-SD-NEXT:    fmov x10, d1
435; CHECK-SD-NEXT:    fmov x11, d2
436; CHECK-SD-NEXT:    mul x8, x9, x8
437; CHECK-SD-NEXT:    fmov x9, d4
438; CHECK-SD-NEXT:    mul x9, x10, x9
439; CHECK-SD-NEXT:    fmov x10, d5
440; CHECK-SD-NEXT:    fmov d0, x8
441; CHECK-SD-NEXT:    mul x10, x11, x10
442; CHECK-SD-NEXT:    fmov d1, x9
443; CHECK-SD-NEXT:    fmov d2, x10
444; CHECK-SD-NEXT:    ret
445;
446; CHECK-GI-LABEL: v3i64:
447; CHECK-GI:       // %bb.0: // %entry
448; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
449; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
450; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
451; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
452; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
453; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
454; CHECK-GI-NEXT:    fmov x8, d0
455; CHECK-GI-NEXT:    fmov x9, d3
456; CHECK-GI-NEXT:    mov x10, v0.d[1]
457; CHECK-GI-NEXT:    mov x11, v3.d[1]
458; CHECK-GI-NEXT:    mul x8, x8, x9
459; CHECK-GI-NEXT:    mul x9, x10, x11
460; CHECK-GI-NEXT:    mov v0.d[0], x8
461; CHECK-GI-NEXT:    fmov x8, d2
462; CHECK-GI-NEXT:    mov v0.d[1], x9
463; CHECK-GI-NEXT:    fmov x9, d5
464; CHECK-GI-NEXT:    mul x8, x8, x9
465; CHECK-GI-NEXT:    mov d1, v0.d[1]
466; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
467; CHECK-GI-NEXT:    fmov d2, x8
468; CHECK-GI-NEXT:    ret
469entry:
470  %s = mul <3 x i64> %d, %e
471  ret <3 x i64> %s
472}
473
474define <4 x i64> @v4i64(<4 x i64> %d, <4 x i64> %e) {
475; CHECK-SD-LABEL: v4i64:
476; CHECK-SD:       // %bb.0: // %entry
477; CHECK-SD-NEXT:    fmov x8, d2
478; CHECK-SD-NEXT:    fmov x9, d0
479; CHECK-SD-NEXT:    fmov x12, d1
480; CHECK-SD-NEXT:    mov x10, v2.d[1]
481; CHECK-SD-NEXT:    mov x11, v0.d[1]
482; CHECK-SD-NEXT:    mov x13, v3.d[1]
483; CHECK-SD-NEXT:    mov x14, v1.d[1]
484; CHECK-SD-NEXT:    mul x8, x9, x8
485; CHECK-SD-NEXT:    fmov x9, d3
486; CHECK-SD-NEXT:    mul x10, x11, x10
487; CHECK-SD-NEXT:    mul x9, x12, x9
488; CHECK-SD-NEXT:    fmov d0, x8
489; CHECK-SD-NEXT:    mul x11, x14, x13
490; CHECK-SD-NEXT:    mov v0.d[1], x10
491; CHECK-SD-NEXT:    fmov d1, x9
492; CHECK-SD-NEXT:    mov v1.d[1], x11
493; CHECK-SD-NEXT:    ret
494;
495; CHECK-GI-LABEL: v4i64:
496; CHECK-GI:       // %bb.0: // %entry
497; CHECK-GI-NEXT:    fmov x8, d0
498; CHECK-GI-NEXT:    fmov x9, d2
499; CHECK-GI-NEXT:    fmov x12, d3
500; CHECK-GI-NEXT:    mov x10, v0.d[1]
501; CHECK-GI-NEXT:    mov x11, v2.d[1]
502; CHECK-GI-NEXT:    mov x13, v1.d[1]
503; CHECK-GI-NEXT:    mov x14, v3.d[1]
504; CHECK-GI-NEXT:    mul x8, x8, x9
505; CHECK-GI-NEXT:    fmov x9, d1
506; CHECK-GI-NEXT:    mul x10, x10, x11
507; CHECK-GI-NEXT:    mul x9, x9, x12
508; CHECK-GI-NEXT:    mov v0.d[0], x8
509; CHECK-GI-NEXT:    mul x11, x13, x14
510; CHECK-GI-NEXT:    mov v1.d[0], x9
511; CHECK-GI-NEXT:    mov v0.d[1], x10
512; CHECK-GI-NEXT:    mov v1.d[1], x11
513; CHECK-GI-NEXT:    ret
514entry:
515  %s = mul <4 x i64> %d, %e
516  ret <4 x i64> %s
517}
518
519define <2 x i128> @v2i128(<2 x i128> %d, <2 x i128> %e) {
520; CHECK-SD-LABEL: v2i128:
521; CHECK-SD:       // %bb.0: // %entry
522; CHECK-SD-NEXT:    umulh x8, x2, x6
523; CHECK-SD-NEXT:    umulh x9, x0, x4
524; CHECK-SD-NEXT:    madd x8, x2, x7, x8
525; CHECK-SD-NEXT:    madd x9, x0, x5, x9
526; CHECK-SD-NEXT:    madd x3, x3, x6, x8
527; CHECK-SD-NEXT:    madd x1, x1, x4, x9
528; CHECK-SD-NEXT:    mul x0, x0, x4
529; CHECK-SD-NEXT:    mul x2, x2, x6
530; CHECK-SD-NEXT:    ret
531;
532; CHECK-GI-LABEL: v2i128:
533; CHECK-GI:       // %bb.0: // %entry
534; CHECK-GI-NEXT:    mul x9, x0, x5
535; CHECK-GI-NEXT:    mul x12, x2, x7
536; CHECK-GI-NEXT:    mul x8, x0, x4
537; CHECK-GI-NEXT:    umulh x10, x0, x4
538; CHECK-GI-NEXT:    madd x11, x1, x4, x9
539; CHECK-GI-NEXT:    mov x0, x8
540; CHECK-GI-NEXT:    mul x9, x2, x6
541; CHECK-GI-NEXT:    umulh x13, x2, x6
542; CHECK-GI-NEXT:    add x1, x11, x10
543; CHECK-GI-NEXT:    madd x12, x3, x6, x12
544; CHECK-GI-NEXT:    mov x2, x9
545; CHECK-GI-NEXT:    add x3, x12, x13
546; CHECK-GI-NEXT:    ret
547entry:
548  %s = mul <2 x i128> %d, %e
549  ret <2 x i128> %s
550}
551
552define <3 x i128> @v3i128(<3 x i128> %d, <3 x i128> %e) {
553; CHECK-SD-LABEL: v3i128:
554; CHECK-SD:       // %bb.0: // %entry
555; CHECK-SD-NEXT:    umulh x9, x0, x6
556; CHECK-SD-NEXT:    ldp x8, x10, [sp]
557; CHECK-SD-NEXT:    madd x9, x0, x7, x9
558; CHECK-SD-NEXT:    umulh x11, x2, x8
559; CHECK-SD-NEXT:    madd x1, x1, x6, x9
560; CHECK-SD-NEXT:    ldp x9, x12, [sp, #16]
561; CHECK-SD-NEXT:    madd x10, x2, x10, x11
562; CHECK-SD-NEXT:    umulh x13, x4, x9
563; CHECK-SD-NEXT:    madd x3, x3, x8, x10
564; CHECK-SD-NEXT:    madd x11, x4, x12, x13
565; CHECK-SD-NEXT:    mul x0, x0, x6
566; CHECK-SD-NEXT:    madd x5, x5, x9, x11
567; CHECK-SD-NEXT:    mul x2, x2, x8
568; CHECK-SD-NEXT:    mul x4, x4, x9
569; CHECK-SD-NEXT:    ret
570;
571; CHECK-GI-LABEL: v3i128:
572; CHECK-GI:       // %bb.0: // %entry
573; CHECK-GI-NEXT:    ldp x10, x13, [sp]
574; CHECK-GI-NEXT:    mul x9, x0, x7
575; CHECK-GI-NEXT:    mul x8, x0, x6
576; CHECK-GI-NEXT:    mul x13, x2, x13
577; CHECK-GI-NEXT:    madd x12, x1, x6, x9
578; CHECK-GI-NEXT:    mul x9, x2, x10
579; CHECK-GI-NEXT:    umulh x14, x2, x10
580; CHECK-GI-NEXT:    madd x10, x3, x10, x13
581; CHECK-GI-NEXT:    ldp x13, x15, [sp, #16]
582; CHECK-GI-NEXT:    mov x2, x9
583; CHECK-GI-NEXT:    umulh x11, x0, x6
584; CHECK-GI-NEXT:    mov x0, x8
585; CHECK-GI-NEXT:    mul x15, x4, x15
586; CHECK-GI-NEXT:    add x3, x10, x14
587; CHECK-GI-NEXT:    umulh x16, x4, x13
588; CHECK-GI-NEXT:    add x1, x12, x11
589; CHECK-GI-NEXT:    madd x15, x5, x13, x15
590; CHECK-GI-NEXT:    mul x4, x4, x13
591; CHECK-GI-NEXT:    add x5, x15, x16
592; CHECK-GI-NEXT:    ret
593entry:
594  %s = mul <3 x i128> %d, %e
595  ret <3 x i128> %s
596}
597
598define <4 x i128> @v4i128(<4 x i128> %d, <4 x i128> %e) {
599; CHECK-SD-LABEL: v4i128:
600; CHECK-SD:       // %bb.0: // %entry
601; CHECK-SD-NEXT:    ldp x8, x9, [sp]
602; CHECK-SD-NEXT:    ldp x11, x12, [sp, #16]
603; CHECK-SD-NEXT:    umulh x10, x0, x8
604; CHECK-SD-NEXT:    umulh x13, x2, x11
605; CHECK-SD-NEXT:    madd x9, x0, x9, x10
606; CHECK-SD-NEXT:    madd x10, x2, x12, x13
607; CHECK-SD-NEXT:    ldp x13, x14, [sp, #48]
608; CHECK-SD-NEXT:    madd x1, x1, x8, x9
609; CHECK-SD-NEXT:    madd x3, x3, x11, x10
610; CHECK-SD-NEXT:    ldp x9, x10, [sp, #32]
611; CHECK-SD-NEXT:    umulh x15, x6, x13
612; CHECK-SD-NEXT:    umulh x12, x4, x9
613; CHECK-SD-NEXT:    mul x0, x0, x8
614; CHECK-SD-NEXT:    madd x10, x4, x10, x12
615; CHECK-SD-NEXT:    madd x12, x6, x14, x15
616; CHECK-SD-NEXT:    madd x5, x5, x9, x10
617; CHECK-SD-NEXT:    madd x7, x7, x13, x12
618; CHECK-SD-NEXT:    mul x2, x2, x11
619; CHECK-SD-NEXT:    mul x4, x4, x9
620; CHECK-SD-NEXT:    mul x6, x6, x13
621; CHECK-SD-NEXT:    ret
622;
623; CHECK-GI-LABEL: v4i128:
624; CHECK-GI:       // %bb.0: // %entry
625; CHECK-GI-NEXT:    ldp x9, x10, [sp]
626; CHECK-GI-NEXT:    ldp x15, x16, [sp, #32]
627; CHECK-GI-NEXT:    mul x10, x0, x10
628; CHECK-GI-NEXT:    mul x16, x4, x16
629; CHECK-GI-NEXT:    madd x12, x1, x9, x10
630; CHECK-GI-NEXT:    ldp x10, x13, [sp, #16]
631; CHECK-GI-NEXT:    mul x8, x0, x9
632; CHECK-GI-NEXT:    mul x13, x2, x13
633; CHECK-GI-NEXT:    umulh x11, x0, x9
634; CHECK-GI-NEXT:    mul x9, x2, x10
635; CHECK-GI-NEXT:    umulh x14, x2, x10
636; CHECK-GI-NEXT:    add x1, x12, x11
637; CHECK-GI-NEXT:    madd x13, x3, x10, x13
638; CHECK-GI-NEXT:    mov x2, x9
639; CHECK-GI-NEXT:    mul x10, x4, x15
640; CHECK-GI-NEXT:    umulh x17, x4, x15
641; CHECK-GI-NEXT:    add x3, x13, x14
642; CHECK-GI-NEXT:    madd x15, x5, x15, x16
643; CHECK-GI-NEXT:    ldp x16, x18, [sp, #48]
644; CHECK-GI-NEXT:    mov x4, x10
645; CHECK-GI-NEXT:    mul x18, x6, x18
646; CHECK-GI-NEXT:    umulh x0, x6, x16
647; CHECK-GI-NEXT:    add x5, x15, x17
648; CHECK-GI-NEXT:    madd x18, x7, x16, x18
649; CHECK-GI-NEXT:    mul x6, x6, x16
650; CHECK-GI-NEXT:    add x7, x18, x0
651; CHECK-GI-NEXT:    mov x0, x8
652; CHECK-GI-NEXT:    ret
653entry:
654  %s = mul <4 x i128> %d, %e
655  ret <4 x i128> %s
656}
657