xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-mulv.ll (revision 1dd0d3cf40f21b842dbee107b3d203db9fbaa4ae)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64 -aarch64-enable-sink-fold=true -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; CHECK_GI:        warning: Instruction selection used fallback path for mulv_v3i64
6
7declare i8 @llvm.vector.reduce.mul.v2i8(<2 x i8>)
8declare i8 @llvm.vector.reduce.mul.v3i8(<3 x i8>)
9declare i8 @llvm.vector.reduce.mul.v4i8(<4 x i8>)
10declare i8 @llvm.vector.reduce.mul.v8i8(<8 x i8>)
11declare i8 @llvm.vector.reduce.mul.v16i8(<16 x i8>)
12declare i8 @llvm.vector.reduce.mul.v32i8(<32 x i8>)
13declare i16 @llvm.vector.reduce.mul.v2i16(<2 x i16>)
14declare i16 @llvm.vector.reduce.mul.v3i16(<3 x i16>)
15declare i16 @llvm.vector.reduce.mul.v4i16(<4 x i16>)
16declare i16 @llvm.vector.reduce.mul.v8i16(<8 x i16>)
17declare i16 @llvm.vector.reduce.mul.v16i16(<16 x i16>)
18declare i32 @llvm.vector.reduce.mul.v2i32(<2 x i32>)
19declare i32 @llvm.vector.reduce.mul.v3i32(<3 x i32>)
20declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>)
21declare i32 @llvm.vector.reduce.mul.v8i32(<8 x i32>)
22declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
23declare i64 @llvm.vector.reduce.mul.v3i64(<3 x i64>)
24declare i64 @llvm.vector.reduce.mul.v4i64(<4 x i64>)
25declare i128 @llvm.vector.reduce.mul.v2i128(<2 x i128>)
26
27define i8 @mulv_v2i8(<2 x i8> %a) {
28; CHECK-LABEL: mulv_v2i8:
29; CHECK:       // %bb.0: // %entry
30; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
31; CHECK-NEXT:    mov w8, v0.s[1]
32; CHECK-NEXT:    fmov w9, s0
33; CHECK-NEXT:    mul w0, w9, w8
34; CHECK-NEXT:    ret
35entry:
36  %arg1 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> %a)
37  ret i8 %arg1
38}
39
40define i8 @mulv_v3i8(<3 x i8> %a) {
41; CHECK-LABEL: mulv_v3i8:
42; CHECK:       // %bb.0: // %entry
43; CHECK-NEXT:    mul w8, w0, w1
44; CHECK-NEXT:    mul w0, w8, w2
45; CHECK-NEXT:    ret
46entry:
47  %arg1 = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %a)
48  ret i8 %arg1
49}
50
51define i8 @mulv_v4i8(<4 x i8> %a) {
52; CHECK-SD-LABEL: mulv_v4i8:
53; CHECK-SD:       // %bb.0: // %entry
54; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
55; CHECK-SD-NEXT:    umov w8, v0.h[1]
56; CHECK-SD-NEXT:    umov w9, v0.h[0]
57; CHECK-SD-NEXT:    umov w10, v0.h[2]
58; CHECK-SD-NEXT:    mul w8, w9, w8
59; CHECK-SD-NEXT:    umov w9, v0.h[3]
60; CHECK-SD-NEXT:    mul w8, w8, w10
61; CHECK-SD-NEXT:    mul w0, w8, w9
62; CHECK-SD-NEXT:    ret
63;
64; CHECK-GI-LABEL: mulv_v4i8:
65; CHECK-GI:       // %bb.0: // %entry
66; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
67; CHECK-GI-NEXT:    umov w8, v0.h[0]
68; CHECK-GI-NEXT:    umov w9, v0.h[1]
69; CHECK-GI-NEXT:    umov w10, v0.h[2]
70; CHECK-GI-NEXT:    umov w11, v0.h[3]
71; CHECK-GI-NEXT:    mul w8, w8, w9
72; CHECK-GI-NEXT:    mul w9, w10, w11
73; CHECK-GI-NEXT:    mul w0, w8, w9
74; CHECK-GI-NEXT:    ret
75entry:
76  %arg1 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> %a)
77  ret i8 %arg1
78}
79
80define i8 @mulv_v8i8(<8 x i8> %a) {
81; CHECK-SD-LABEL: mulv_v8i8:
82; CHECK-SD:       // %bb.0: // %entry
83; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
84; CHECK-SD-NEXT:    umov w8, v0.b[1]
85; CHECK-SD-NEXT:    umov w9, v0.b[0]
86; CHECK-SD-NEXT:    umov w10, v0.b[2]
87; CHECK-SD-NEXT:    mul w8, w9, w8
88; CHECK-SD-NEXT:    umov w9, v0.b[3]
89; CHECK-SD-NEXT:    mul w8, w8, w10
90; CHECK-SD-NEXT:    umov w10, v0.b[4]
91; CHECK-SD-NEXT:    mul w8, w8, w9
92; CHECK-SD-NEXT:    umov w9, v0.b[5]
93; CHECK-SD-NEXT:    mul w8, w8, w10
94; CHECK-SD-NEXT:    umov w10, v0.b[6]
95; CHECK-SD-NEXT:    mul w8, w8, w9
96; CHECK-SD-NEXT:    umov w9, v0.b[7]
97; CHECK-SD-NEXT:    mul w8, w8, w10
98; CHECK-SD-NEXT:    mul w0, w8, w9
99; CHECK-SD-NEXT:    ret
100;
101; CHECK-GI-LABEL: mulv_v8i8:
102; CHECK-GI:       // %bb.0: // %entry
103; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
104; CHECK-GI-NEXT:    umov w8, v0.b[0]
105; CHECK-GI-NEXT:    umov w9, v0.b[1]
106; CHECK-GI-NEXT:    umov w10, v0.b[2]
107; CHECK-GI-NEXT:    umov w11, v0.b[3]
108; CHECK-GI-NEXT:    umov w12, v0.b[4]
109; CHECK-GI-NEXT:    umov w13, v0.b[5]
110; CHECK-GI-NEXT:    umov w14, v0.b[6]
111; CHECK-GI-NEXT:    umov w15, v0.b[7]
112; CHECK-GI-NEXT:    mul w8, w8, w9
113; CHECK-GI-NEXT:    mul w9, w10, w11
114; CHECK-GI-NEXT:    mul w10, w12, w13
115; CHECK-GI-NEXT:    mul w11, w14, w15
116; CHECK-GI-NEXT:    mul w8, w8, w9
117; CHECK-GI-NEXT:    mul w9, w10, w11
118; CHECK-GI-NEXT:    mul w0, w8, w9
119; CHECK-GI-NEXT:    ret
120entry:
121  %arg1 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> %a)
122  ret i8 %arg1
123}
124
125define i8 @mulv_v16i8(<16 x i8> %a) {
126; CHECK-SD-LABEL: mulv_v16i8:
127; CHECK-SD:       // %bb.0: // %entry
128; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
129; CHECK-SD-NEXT:    mul v0.8b, v0.8b, v1.8b
130; CHECK-SD-NEXT:    umov w8, v0.b[1]
131; CHECK-SD-NEXT:    umov w9, v0.b[0]
132; CHECK-SD-NEXT:    umov w10, v0.b[2]
133; CHECK-SD-NEXT:    mul w8, w9, w8
134; CHECK-SD-NEXT:    umov w9, v0.b[3]
135; CHECK-SD-NEXT:    mul w8, w8, w10
136; CHECK-SD-NEXT:    umov w10, v0.b[4]
137; CHECK-SD-NEXT:    mul w8, w8, w9
138; CHECK-SD-NEXT:    umov w9, v0.b[5]
139; CHECK-SD-NEXT:    mul w8, w8, w10
140; CHECK-SD-NEXT:    umov w10, v0.b[6]
141; CHECK-SD-NEXT:    mul w8, w8, w9
142; CHECK-SD-NEXT:    umov w9, v0.b[7]
143; CHECK-SD-NEXT:    mul w8, w8, w10
144; CHECK-SD-NEXT:    mul w0, w8, w9
145; CHECK-SD-NEXT:    ret
146;
147; CHECK-GI-LABEL: mulv_v16i8:
148; CHECK-GI:       // %bb.0: // %entry
149; CHECK-GI-NEXT:    mov d1, v0.d[1]
150; CHECK-GI-NEXT:    mul v0.8b, v0.8b, v1.8b
151; CHECK-GI-NEXT:    umov w8, v0.b[0]
152; CHECK-GI-NEXT:    umov w9, v0.b[1]
153; CHECK-GI-NEXT:    umov w10, v0.b[2]
154; CHECK-GI-NEXT:    umov w11, v0.b[3]
155; CHECK-GI-NEXT:    umov w12, v0.b[4]
156; CHECK-GI-NEXT:    umov w13, v0.b[5]
157; CHECK-GI-NEXT:    umov w14, v0.b[6]
158; CHECK-GI-NEXT:    umov w15, v0.b[7]
159; CHECK-GI-NEXT:    mul w8, w8, w9
160; CHECK-GI-NEXT:    mul w9, w10, w11
161; CHECK-GI-NEXT:    mul w10, w12, w13
162; CHECK-GI-NEXT:    mul w11, w14, w15
163; CHECK-GI-NEXT:    mul w8, w8, w9
164; CHECK-GI-NEXT:    mul w9, w10, w11
165; CHECK-GI-NEXT:    mul w0, w8, w9
166; CHECK-GI-NEXT:    ret
167entry:
168  %arg1 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %a)
169  ret i8 %arg1
170}
171
172define i8 @mulv_v32i8(<32 x i8> %a) {
173; CHECK-SD-LABEL: mulv_v32i8:
174; CHECK-SD:       // %bb.0: // %entry
175; CHECK-SD-NEXT:    mul v0.16b, v0.16b, v1.16b
176; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
177; CHECK-SD-NEXT:    mul v0.8b, v0.8b, v1.8b
178; CHECK-SD-NEXT:    umov w8, v0.b[1]
179; CHECK-SD-NEXT:    umov w9, v0.b[0]
180; CHECK-SD-NEXT:    umov w10, v0.b[2]
181; CHECK-SD-NEXT:    mul w8, w9, w8
182; CHECK-SD-NEXT:    umov w9, v0.b[3]
183; CHECK-SD-NEXT:    mul w8, w8, w10
184; CHECK-SD-NEXT:    umov w10, v0.b[4]
185; CHECK-SD-NEXT:    mul w8, w8, w9
186; CHECK-SD-NEXT:    umov w9, v0.b[5]
187; CHECK-SD-NEXT:    mul w8, w8, w10
188; CHECK-SD-NEXT:    umov w10, v0.b[6]
189; CHECK-SD-NEXT:    mul w8, w8, w9
190; CHECK-SD-NEXT:    umov w9, v0.b[7]
191; CHECK-SD-NEXT:    mul w8, w8, w10
192; CHECK-SD-NEXT:    mul w0, w8, w9
193; CHECK-SD-NEXT:    ret
194;
195; CHECK-GI-LABEL: mulv_v32i8:
196; CHECK-GI:       // %bb.0: // %entry
197; CHECK-GI-NEXT:    mov d2, v0.d[1]
198; CHECK-GI-NEXT:    mov d3, v1.d[1]
199; CHECK-GI-NEXT:    mul v0.8b, v0.8b, v2.8b
200; CHECK-GI-NEXT:    mul v1.8b, v1.8b, v3.8b
201; CHECK-GI-NEXT:    mul v0.8b, v0.8b, v1.8b
202; CHECK-GI-NEXT:    umov w8, v0.b[0]
203; CHECK-GI-NEXT:    umov w9, v0.b[1]
204; CHECK-GI-NEXT:    umov w10, v0.b[2]
205; CHECK-GI-NEXT:    umov w11, v0.b[3]
206; CHECK-GI-NEXT:    umov w12, v0.b[4]
207; CHECK-GI-NEXT:    umov w13, v0.b[5]
208; CHECK-GI-NEXT:    umov w14, v0.b[6]
209; CHECK-GI-NEXT:    umov w15, v0.b[7]
210; CHECK-GI-NEXT:    mul w8, w8, w9
211; CHECK-GI-NEXT:    mul w9, w10, w11
212; CHECK-GI-NEXT:    mul w10, w12, w13
213; CHECK-GI-NEXT:    mul w11, w14, w15
214; CHECK-GI-NEXT:    mul w8, w8, w9
215; CHECK-GI-NEXT:    mul w9, w10, w11
216; CHECK-GI-NEXT:    mul w0, w8, w9
217; CHECK-GI-NEXT:    ret
218entry:
219  %arg1 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> %a)
220  ret i8 %arg1
221}
222
223define i16 @mulv_v2i16(<2 x i16> %a) {
224; CHECK-LABEL: mulv_v2i16:
225; CHECK:       // %bb.0: // %entry
226; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
227; CHECK-NEXT:    mov w8, v0.s[1]
228; CHECK-NEXT:    fmov w9, s0
229; CHECK-NEXT:    mul w0, w9, w8
230; CHECK-NEXT:    ret
231entry:
232  %arg1 = call i16 @llvm.vector.reduce.mul.v2i16(<2 x i16> %a)
233  ret i16 %arg1
234}
235
236define i16 @mulv_v3i16(<3 x i16> %a) {
237; CHECK-SD-LABEL: mulv_v3i16:
238; CHECK-SD:       // %bb.0: // %entry
239; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
240; CHECK-SD-NEXT:    umov w8, v0.h[1]
241; CHECK-SD-NEXT:    umov w9, v0.h[0]
242; CHECK-SD-NEXT:    umov w10, v0.h[2]
243; CHECK-SD-NEXT:    mul w8, w9, w8
244; CHECK-SD-NEXT:    mul w0, w8, w10
245; CHECK-SD-NEXT:    ret
246;
247; CHECK-GI-LABEL: mulv_v3i16:
248; CHECK-GI:       // %bb.0: // %entry
249; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
250; CHECK-GI-NEXT:    umov w8, v0.h[0]
251; CHECK-GI-NEXT:    umov w9, v0.h[1]
252; CHECK-GI-NEXT:    umov w10, v0.h[2]
253; CHECK-GI-NEXT:    mul w8, w8, w9
254; CHECK-GI-NEXT:    mul w0, w8, w10
255; CHECK-GI-NEXT:    ret
256entry:
257  %arg1 = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %a)
258  ret i16 %arg1
259}
260
261define i16 @mulv_v4i16(<4 x i16> %a) {
262; CHECK-SD-LABEL: mulv_v4i16:
263; CHECK-SD:       // %bb.0: // %entry
264; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
265; CHECK-SD-NEXT:    umov w8, v0.h[1]
266; CHECK-SD-NEXT:    umov w9, v0.h[0]
267; CHECK-SD-NEXT:    umov w10, v0.h[2]
268; CHECK-SD-NEXT:    mul w8, w9, w8
269; CHECK-SD-NEXT:    umov w9, v0.h[3]
270; CHECK-SD-NEXT:    mul w8, w8, w10
271; CHECK-SD-NEXT:    mul w0, w8, w9
272; CHECK-SD-NEXT:    ret
273;
274; CHECK-GI-LABEL: mulv_v4i16:
275; CHECK-GI:       // %bb.0: // %entry
276; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
277; CHECK-GI-NEXT:    umov w8, v0.h[0]
278; CHECK-GI-NEXT:    umov w9, v0.h[1]
279; CHECK-GI-NEXT:    umov w10, v0.h[2]
280; CHECK-GI-NEXT:    umov w11, v0.h[3]
281; CHECK-GI-NEXT:    mul w8, w8, w9
282; CHECK-GI-NEXT:    mul w9, w10, w11
283; CHECK-GI-NEXT:    mul w0, w8, w9
284; CHECK-GI-NEXT:    ret
285entry:
286  %arg1 = call i16 @llvm.vector.reduce.mul.v4i16(<4 x i16> %a)
287  ret i16 %arg1
288}
289
290define i16 @mulv_v8i16(<8 x i16> %a) {
291; CHECK-SD-LABEL: mulv_v8i16:
292; CHECK-SD:       // %bb.0: // %entry
293; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
294; CHECK-SD-NEXT:    mul v0.4h, v0.4h, v1.4h
295; CHECK-SD-NEXT:    umov w8, v0.h[1]
296; CHECK-SD-NEXT:    umov w9, v0.h[0]
297; CHECK-SD-NEXT:    umov w10, v0.h[2]
298; CHECK-SD-NEXT:    mul w8, w9, w8
299; CHECK-SD-NEXT:    umov w9, v0.h[3]
300; CHECK-SD-NEXT:    mul w8, w8, w10
301; CHECK-SD-NEXT:    mul w0, w8, w9
302; CHECK-SD-NEXT:    ret
303;
304; CHECK-GI-LABEL: mulv_v8i16:
305; CHECK-GI:       // %bb.0: // %entry
306; CHECK-GI-NEXT:    mov d1, v0.d[1]
307; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
308; CHECK-GI-NEXT:    umov w8, v0.h[0]
309; CHECK-GI-NEXT:    umov w9, v0.h[1]
310; CHECK-GI-NEXT:    umov w10, v0.h[2]
311; CHECK-GI-NEXT:    umov w11, v0.h[3]
312; CHECK-GI-NEXT:    mul w8, w8, w9
313; CHECK-GI-NEXT:    mul w9, w10, w11
314; CHECK-GI-NEXT:    mul w0, w8, w9
315; CHECK-GI-NEXT:    ret
316entry:
317  %arg1 = call i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %a)
318  ret i16 %arg1
319}
320
321define i16 @mulv_v16i16(<16 x i16> %a) {
322; CHECK-SD-LABEL: mulv_v16i16:
323; CHECK-SD:       // %bb.0: // %entry
324; CHECK-SD-NEXT:    mul v0.8h, v0.8h, v1.8h
325; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
326; CHECK-SD-NEXT:    mul v0.4h, v0.4h, v1.4h
327; CHECK-SD-NEXT:    umov w8, v0.h[1]
328; CHECK-SD-NEXT:    umov w9, v0.h[0]
329; CHECK-SD-NEXT:    umov w10, v0.h[2]
330; CHECK-SD-NEXT:    mul w8, w9, w8
331; CHECK-SD-NEXT:    umov w9, v0.h[3]
332; CHECK-SD-NEXT:    mul w8, w8, w10
333; CHECK-SD-NEXT:    mul w0, w8, w9
334; CHECK-SD-NEXT:    ret
335;
336; CHECK-GI-LABEL: mulv_v16i16:
337; CHECK-GI:       // %bb.0: // %entry
338; CHECK-GI-NEXT:    mov d2, v0.d[1]
339; CHECK-GI-NEXT:    mov d3, v1.d[1]
340; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v2.4h
341; CHECK-GI-NEXT:    mul v1.4h, v1.4h, v3.4h
342; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
343; CHECK-GI-NEXT:    umov w8, v0.h[0]
344; CHECK-GI-NEXT:    umov w9, v0.h[1]
345; CHECK-GI-NEXT:    umov w10, v0.h[2]
346; CHECK-GI-NEXT:    umov w11, v0.h[3]
347; CHECK-GI-NEXT:    mul w8, w8, w9
348; CHECK-GI-NEXT:    mul w9, w10, w11
349; CHECK-GI-NEXT:    mul w0, w8, w9
350; CHECK-GI-NEXT:    ret
351entry:
352  %arg1 = call i16 @llvm.vector.reduce.mul.v16i16(<16 x i16> %a)
353  ret i16 %arg1
354}
355
356define i32 @mulv_v2i32(<2 x i32> %a) {
357; CHECK-LABEL: mulv_v2i32:
358; CHECK:       // %bb.0: // %entry
359; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
360; CHECK-NEXT:    mov w8, v0.s[1]
361; CHECK-NEXT:    fmov w9, s0
362; CHECK-NEXT:    mul w0, w9, w8
363; CHECK-NEXT:    ret
364entry:
365  %arg1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a)
366  ret i32 %arg1
367}
368
369define i32 @mulv_v3i32(<3 x i32> %a) {
370; CHECK-LABEL: mulv_v3i32:
371; CHECK:       // %bb.0: // %entry
372; CHECK-NEXT:    mov v1.16b, v0.16b
373; CHECK-NEXT:    mov w8, #1 // =0x1
374; CHECK-NEXT:    mov v1.s[3], w8
375; CHECK-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
376; CHECK-NEXT:    mul v0.2s, v0.2s, v1.2s
377; CHECK-NEXT:    mov w8, v0.s[1]
378; CHECK-NEXT:    fmov w9, s0
379; CHECK-NEXT:    mul w0, w9, w8
380; CHECK-NEXT:    ret
381entry:
382  %arg1 = call i32 @llvm.vector.reduce.mul.v3i32(<3 x i32> %a)
383  ret i32 %arg1
384}
385
386define i32 @mulv_v4i32(<4 x i32> %a) {
387; CHECK-SD-LABEL: mulv_v4i32:
388; CHECK-SD:       // %bb.0: // %entry
389; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
390; CHECK-SD-NEXT:    mul v0.2s, v0.2s, v1.2s
391; CHECK-SD-NEXT:    mov w8, v0.s[1]
392; CHECK-SD-NEXT:    fmov w9, s0
393; CHECK-SD-NEXT:    mul w0, w9, w8
394; CHECK-SD-NEXT:    ret
395;
396; CHECK-GI-LABEL: mulv_v4i32:
397; CHECK-GI:       // %bb.0: // %entry
398; CHECK-GI-NEXT:    mov d1, v0.d[1]
399; CHECK-GI-NEXT:    mul v0.2s, v0.2s, v1.2s
400; CHECK-GI-NEXT:    mov w8, v0.s[1]
401; CHECK-GI-NEXT:    fmov w9, s0
402; CHECK-GI-NEXT:    mul w0, w9, w8
403; CHECK-GI-NEXT:    ret
404entry:
405  %arg1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a)
406  ret i32 %arg1
407}
408
409define i32 @mulv_v8i32(<8 x i32> %a) {
410; CHECK-SD-LABEL: mulv_v8i32:
411; CHECK-SD:       // %bb.0: // %entry
412; CHECK-SD-NEXT:    mul v0.4s, v0.4s, v1.4s
413; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
414; CHECK-SD-NEXT:    mul v0.2s, v0.2s, v1.2s
415; CHECK-SD-NEXT:    mov w8, v0.s[1]
416; CHECK-SD-NEXT:    fmov w9, s0
417; CHECK-SD-NEXT:    mul w0, w9, w8
418; CHECK-SD-NEXT:    ret
419;
420; CHECK-GI-LABEL: mulv_v8i32:
421; CHECK-GI:       // %bb.0: // %entry
422; CHECK-GI-NEXT:    mov d2, v0.d[1]
423; CHECK-GI-NEXT:    mov d3, v1.d[1]
424; CHECK-GI-NEXT:    mul v0.2s, v0.2s, v2.2s
425; CHECK-GI-NEXT:    mul v1.2s, v1.2s, v3.2s
426; CHECK-GI-NEXT:    mul v0.2s, v0.2s, v1.2s
427; CHECK-GI-NEXT:    mov w8, v0.s[1]
428; CHECK-GI-NEXT:    fmov w9, s0
429; CHECK-GI-NEXT:    mul w0, w9, w8
430; CHECK-GI-NEXT:    ret
431entry:
432  %arg1 = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %a)
433  ret i32 %arg1
434}
435
436define i64 @mulv_v2i64(<2 x i64> %a) {
437; CHECK-LABEL: mulv_v2i64:
438; CHECK:       // %bb.0: // %entry
439; CHECK-NEXT:    mov x8, v0.d[1]
440; CHECK-NEXT:    fmov x9, d0
441; CHECK-NEXT:    mul x0, x9, x8
442; CHECK-NEXT:    ret
443entry:
444  %arg1 = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a)
445  ret i64 %arg1
446}
447
448define i64 @mulv_v3i64(<3 x i64> %a) {
449; CHECK-SD-LABEL: mulv_v3i64:
450; CHECK-SD:       // %bb.0: // %entry
451; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
452; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
453; CHECK-SD-NEXT:    fmov x8, d2
454; CHECK-SD-NEXT:    fmov x9, d0
455; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
456; CHECK-SD-NEXT:    mul x8, x9, x8
457; CHECK-SD-NEXT:    fmov x9, d1
458; CHECK-SD-NEXT:    mul x0, x9, x8
459; CHECK-SD-NEXT:    ret
460;
461; CHECK-GI-LABEL: mulv_v3i64:
462; CHECK-GI:       // %bb.0: // %entry
463; CHECK-GI-NEXT:    fmov x8, d0
464; CHECK-GI-NEXT:    fmov x9, d1
465; CHECK-GI-NEXT:    mul x8, x8, x9
466; CHECK-GI-NEXT:    fmov x9, d2
467; CHECK-GI-NEXT:    mul x0, x8, x9
468; CHECK-GI-NEXT:    ret
469entry:
470  %arg1 = call i64 @llvm.vector.reduce.mul.v3i64(<3 x i64> %a)
471  ret i64 %arg1
472}
473
474define i64 @mulv_v4i64(<4 x i64> %a) {
475; CHECK-SD-LABEL: mulv_v4i64:
476; CHECK-SD:       // %bb.0: // %entry
477; CHECK-SD-NEXT:    mov x8, v1.d[1]
478; CHECK-SD-NEXT:    mov x9, v0.d[1]
479; CHECK-SD-NEXT:    fmov x10, d0
480; CHECK-SD-NEXT:    mul x8, x9, x8
481; CHECK-SD-NEXT:    fmov x9, d1
482; CHECK-SD-NEXT:    mul x9, x10, x9
483; CHECK-SD-NEXT:    mul x0, x9, x8
484; CHECK-SD-NEXT:    ret
485;
486; CHECK-GI-LABEL: mulv_v4i64:
487; CHECK-GI:       // %bb.0: // %entry
488; CHECK-GI-NEXT:    mov x8, v0.d[1]
489; CHECK-GI-NEXT:    fmov x10, d0
490; CHECK-GI-NEXT:    mov x9, v1.d[1]
491; CHECK-GI-NEXT:    mul x8, x10, x8
492; CHECK-GI-NEXT:    fmov x10, d1
493; CHECK-GI-NEXT:    mul x9, x10, x9
494; CHECK-GI-NEXT:    mul x0, x8, x9
495; CHECK-GI-NEXT:    ret
496entry:
497  %arg1 = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> %a)
498  ret i64 %arg1
499}
500
501define i128 @mulv_v2i128(<2 x i128> %a) {
502; CHECK-SD-LABEL: mulv_v2i128:
503; CHECK-SD:       // %bb.0: // %entry
504; CHECK-SD-NEXT:    umulh x8, x0, x2
505; CHECK-SD-NEXT:    madd x8, x0, x3, x8
506; CHECK-SD-NEXT:    mul x0, x0, x2
507; CHECK-SD-NEXT:    madd x1, x1, x2, x8
508; CHECK-SD-NEXT:    ret
509;
510; CHECK-GI-LABEL: mulv_v2i128:
511; CHECK-GI:       // %bb.0: // %entry
512; CHECK-GI-NEXT:    mul x9, x0, x3
513; CHECK-GI-NEXT:    mul x8, x0, x2
514; CHECK-GI-NEXT:    umulh x10, x0, x2
515; CHECK-GI-NEXT:    madd x9, x1, x2, x9
516; CHECK-GI-NEXT:    mov x0, x8
517; CHECK-GI-NEXT:    add x1, x9, x10
518; CHECK-GI-NEXT:    ret
519entry:
520  %arg1 = call i128 @llvm.vector.reduce.mul.v2i128(<2 x i128> %a)
521  ret i128 %arg1
522}
523