xref: /llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-or.ll (revision 13c1e7a8aadbbe796051cb35ff41acff89d2395c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
8; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9; RUN:   -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
10
11;;
12;; Vectors of type i32
13;;
14define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
15; PWR9LE-LABEL: v2i32:
16; PWR9LE:       # %bb.0: # %entry
17; PWR9LE-NEXT:    xxspltw vs0, v2, 2
18; PWR9LE-NEXT:    li r3, 0
19; PWR9LE-NEXT:    xxlor v2, v2, vs0
20; PWR9LE-NEXT:    vextuwrx r3, r3, v2
21; PWR9LE-NEXT:    blr
22;
23; PWR9BE-LABEL: v2i32:
24; PWR9BE:       # %bb.0: # %entry
25; PWR9BE-NEXT:    xxspltw vs0, v2, 1
26; PWR9BE-NEXT:    li r3, 0
27; PWR9BE-NEXT:    xxlor v2, v2, vs0
28; PWR9BE-NEXT:    vextuwlx r3, r3, v2
29; PWR9BE-NEXT:    blr
30;
31; PWR10LE-LABEL: v2i32:
32; PWR10LE:       # %bb.0: # %entry
33; PWR10LE-NEXT:    xxspltw vs0, v2, 2
34; PWR10LE-NEXT:    li r3, 0
35; PWR10LE-NEXT:    xxlor v2, v2, vs0
36; PWR10LE-NEXT:    vextuwrx r3, r3, v2
37; PWR10LE-NEXT:    blr
38;
39; PWR10BE-LABEL: v2i32:
40; PWR10BE:       # %bb.0: # %entry
41; PWR10BE-NEXT:    xxspltw vs0, v2, 1
42; PWR10BE-NEXT:    li r3, 0
43; PWR10BE-NEXT:    xxlor v2, v2, vs0
44; PWR10BE-NEXT:    vextuwlx r3, r3, v2
45; PWR10BE-NEXT:    blr
46entry:
47  %0 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
48  ret i32 %0
49}
50
51define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
52; PWR9LE-LABEL: v4i32:
53; PWR9LE:       # %bb.0: # %entry
54; PWR9LE-NEXT:    xxswapd v3, v2
55; PWR9LE-NEXT:    li r3, 0
56; PWR9LE-NEXT:    xxlor vs0, v2, v3
57; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
58; PWR9LE-NEXT:    xxlor v2, vs0, vs1
59; PWR9LE-NEXT:    vextuwrx r3, r3, v2
60; PWR9LE-NEXT:    blr
61;
62; PWR9BE-LABEL: v4i32:
63; PWR9BE:       # %bb.0: # %entry
64; PWR9BE-NEXT:    xxswapd v3, v2
65; PWR9BE-NEXT:    li r3, 0
66; PWR9BE-NEXT:    xxlor vs0, v2, v3
67; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
68; PWR9BE-NEXT:    xxlor v2, vs0, vs1
69; PWR9BE-NEXT:    vextuwlx r3, r3, v2
70; PWR9BE-NEXT:    blr
71;
72; PWR10LE-LABEL: v4i32:
73; PWR10LE:       # %bb.0: # %entry
74; PWR10LE-NEXT:    xxswapd v3, v2
75; PWR10LE-NEXT:    li r3, 0
76; PWR10LE-NEXT:    xxlor vs0, v2, v3
77; PWR10LE-NEXT:    xxspltw vs0, vs0, 2
78; PWR10LE-NEXT:    xxeval v2, v2, v3, vs0, 127
79; PWR10LE-NEXT:    vextuwrx r3, r3, v2
80; PWR10LE-NEXT:    blr
81;
82; PWR10BE-LABEL: v4i32:
83; PWR10BE:       # %bb.0: # %entry
84; PWR10BE-NEXT:    xxswapd v3, v2
85; PWR10BE-NEXT:    li r3, 0
86; PWR10BE-NEXT:    xxlor vs0, v2, v3
87; PWR10BE-NEXT:    xxspltw vs0, vs0, 1
88; PWR10BE-NEXT:    xxeval v2, v2, v3, vs0, 127
89; PWR10BE-NEXT:    vextuwlx r3, r3, v2
90; PWR10BE-NEXT:    blr
91entry:
92  %0 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
93  ret i32 %0
94}
95
96define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
97; PWR9LE-LABEL: v8i32:
98; PWR9LE:       # %bb.0: # %entry
99; PWR9LE-NEXT:    xxlor vs0, v2, v3
100; PWR9LE-NEXT:    li r3, 0
101; PWR9LE-NEXT:    xxswapd v2, vs0
102; PWR9LE-NEXT:    xxlor vs0, vs0, v2
103; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
104; PWR9LE-NEXT:    xxlor v2, vs0, vs1
105; PWR9LE-NEXT:    vextuwrx r3, r3, v2
106; PWR9LE-NEXT:    blr
107;
108; PWR9BE-LABEL: v8i32:
109; PWR9BE:       # %bb.0: # %entry
110; PWR9BE-NEXT:    xxlor vs0, v2, v3
111; PWR9BE-NEXT:    li r3, 0
112; PWR9BE-NEXT:    xxswapd v2, vs0
113; PWR9BE-NEXT:    xxlor vs0, vs0, v2
114; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
115; PWR9BE-NEXT:    xxlor v2, vs0, vs1
116; PWR9BE-NEXT:    vextuwlx r3, r3, v2
117; PWR9BE-NEXT:    blr
118;
119; PWR10LE-LABEL: v8i32:
120; PWR10LE:       # %bb.0: # %entry
121; PWR10LE-NEXT:    xxlor vs0, v2, v3
122; PWR10LE-NEXT:    li r3, 0
123; PWR10LE-NEXT:    xxswapd v4, vs0
124; PWR10LE-NEXT:    xxeval vs1, v2, v3, v4, 127
125; PWR10LE-NEXT:    xxspltw vs1, vs1, 2
126; PWR10LE-NEXT:    xxeval v2, vs0, v4, vs1, 127
127; PWR10LE-NEXT:    vextuwrx r3, r3, v2
128; PWR10LE-NEXT:    blr
129;
130; PWR10BE-LABEL: v8i32:
131; PWR10BE:       # %bb.0: # %entry
132; PWR10BE-NEXT:    xxlor vs0, v2, v3
133; PWR10BE-NEXT:    li r3, 0
134; PWR10BE-NEXT:    xxswapd v4, vs0
135; PWR10BE-NEXT:    xxeval vs1, v2, v3, v4, 127
136; PWR10BE-NEXT:    xxspltw vs1, vs1, 1
137; PWR10BE-NEXT:    xxeval v2, vs0, v4, vs1, 127
138; PWR10BE-NEXT:    vextuwlx r3, r3, v2
139; PWR10BE-NEXT:    blr
140entry:
141  %0 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %a)
142  ret i32 %0
143}
144
145define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
146; PWR9LE-LABEL: v16i32:
147; PWR9LE:       # %bb.0: # %entry
148; PWR9LE-NEXT:    xxlor vs0, v3, v5
149; PWR9LE-NEXT:    xxlor vs1, v2, v4
150; PWR9LE-NEXT:    li r3, 0
151; PWR9LE-NEXT:    xxlor vs0, vs1, vs0
152; PWR9LE-NEXT:    xxswapd v2, vs0
153; PWR9LE-NEXT:    xxlor vs0, vs0, v2
154; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
155; PWR9LE-NEXT:    xxlor v2, vs0, vs1
156; PWR9LE-NEXT:    vextuwrx r3, r3, v2
157; PWR9LE-NEXT:    blr
158;
159; PWR9BE-LABEL: v16i32:
160; PWR9BE:       # %bb.0: # %entry
161; PWR9BE-NEXT:    xxlor vs0, v3, v5
162; PWR9BE-NEXT:    xxlor vs1, v2, v4
163; PWR9BE-NEXT:    li r3, 0
164; PWR9BE-NEXT:    xxlor vs0, vs1, vs0
165; PWR9BE-NEXT:    xxswapd v2, vs0
166; PWR9BE-NEXT:    xxlor vs0, vs0, v2
167; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
168; PWR9BE-NEXT:    xxlor v2, vs0, vs1
169; PWR9BE-NEXT:    vextuwlx r3, r3, v2
170; PWR9BE-NEXT:    blr
171;
172; PWR10LE-LABEL: v16i32:
173; PWR10LE:       # %bb.0: # %entry
174; PWR10LE-NEXT:    xxlor vs1, v2, v4
175; PWR10LE-NEXT:    xxlor vs0, v3, v5
176; PWR10LE-NEXT:    li r3, 0
177; PWR10LE-NEXT:    xxeval vs2, vs1, v3, v5, 127
178; PWR10LE-NEXT:    xxswapd v2, vs2
179; PWR10LE-NEXT:    xxeval vs0, vs1, vs0, v2, 127
180; PWR10LE-NEXT:    xxspltw vs0, vs0, 2
181; PWR10LE-NEXT:    xxeval v2, vs2, v2, vs0, 127
182; PWR10LE-NEXT:    vextuwrx r3, r3, v2
183; PWR10LE-NEXT:    blr
184;
185; PWR10BE-LABEL: v16i32:
186; PWR10BE:       # %bb.0: # %entry
187; PWR10BE-NEXT:    xxlor vs1, v2, v4
188; PWR10BE-NEXT:    xxlor vs0, v3, v5
189; PWR10BE-NEXT:    li r3, 0
190; PWR10BE-NEXT:    xxeval vs2, vs1, v3, v5, 127
191; PWR10BE-NEXT:    xxswapd v2, vs2
192; PWR10BE-NEXT:    xxeval vs0, vs1, vs0, v2, 127
193; PWR10BE-NEXT:    xxspltw vs0, vs0, 1
194; PWR10BE-NEXT:    xxeval v2, vs2, v2, vs0, 127
195; PWR10BE-NEXT:    vextuwlx r3, r3, v2
196; PWR10BE-NEXT:    blr
197entry:
198  %0 = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %a)
199  ret i32 %0
200}
201
202declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) #0
203declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) #0
204declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) #0
205declare i32 @llvm.vector.reduce.or.v16i32(<16 x i32>) #0
206
207;;
208;; Vectors of type i64
209;;
210define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
211; PWR9LE-LABEL: v2i64:
212; PWR9LE:       # %bb.0: # %entry
213; PWR9LE-NEXT:    xxswapd v3, v2
214; PWR9LE-NEXT:    xxlor vs0, v2, v3
215; PWR9LE-NEXT:    mfvsrld r3, vs0
216; PWR9LE-NEXT:    blr
217;
218; PWR9BE-LABEL: v2i64:
219; PWR9BE:       # %bb.0: # %entry
220; PWR9BE-NEXT:    xxswapd v3, v2
221; PWR9BE-NEXT:    xxlor vs0, v2, v3
222; PWR9BE-NEXT:    mffprd r3, f0
223; PWR9BE-NEXT:    blr
224;
225; PWR10LE-LABEL: v2i64:
226; PWR10LE:       # %bb.0: # %entry
227; PWR10LE-NEXT:    xxswapd v3, v2
228; PWR10LE-NEXT:    xxlor vs0, v2, v3
229; PWR10LE-NEXT:    mfvsrld r3, vs0
230; PWR10LE-NEXT:    blr
231;
232; PWR10BE-LABEL: v2i64:
233; PWR10BE:       # %bb.0: # %entry
234; PWR10BE-NEXT:    xxswapd v3, v2
235; PWR10BE-NEXT:    xxlor vs0, v2, v3
236; PWR10BE-NEXT:    mffprd r3, f0
237; PWR10BE-NEXT:    blr
238entry:
239  %0 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
240  ret i64 %0
241}
242
243define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
244; PWR9LE-LABEL: v4i64:
245; PWR9LE:       # %bb.0: # %entry
246; PWR9LE-NEXT:    xxlor vs0, v2, v3
247; PWR9LE-NEXT:    xxswapd v2, vs0
248; PWR9LE-NEXT:    xxlor vs0, vs0, v2
249; PWR9LE-NEXT:    mfvsrld r3, vs0
250; PWR9LE-NEXT:    blr
251;
252; PWR9BE-LABEL: v4i64:
253; PWR9BE:       # %bb.0: # %entry
254; PWR9BE-NEXT:    xxlor vs0, v2, v3
255; PWR9BE-NEXT:    xxswapd v2, vs0
256; PWR9BE-NEXT:    xxlor vs0, vs0, v2
257; PWR9BE-NEXT:    mffprd r3, f0
258; PWR9BE-NEXT:    blr
259;
260; PWR10LE-LABEL: v4i64:
261; PWR10LE:       # %bb.0: # %entry
262; PWR10LE-NEXT:    xxlor vs0, v2, v3
263; PWR10LE-NEXT:    xxswapd v4, vs0
264; PWR10LE-NEXT:    xxeval vs0, v2, v3, v4, 127
265; PWR10LE-NEXT:    mfvsrld r3, vs0
266; PWR10LE-NEXT:    blr
267;
268; PWR10BE-LABEL: v4i64:
269; PWR10BE:       # %bb.0: # %entry
270; PWR10BE-NEXT:    xxlor vs0, v2, v3
271; PWR10BE-NEXT:    xxswapd v4, vs0
272; PWR10BE-NEXT:    xxeval vs0, v2, v3, v4, 127
273; PWR10BE-NEXT:    mffprd r3, f0
274; PWR10BE-NEXT:    blr
275entry:
276  %0 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %a)
277  ret i64 %0
278}
279
280define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
281; PWR9LE-LABEL: v8i64:
282; PWR9LE:       # %bb.0: # %entry
283; PWR9LE-NEXT:    xxlor vs0, v3, v5
284; PWR9LE-NEXT:    xxlor vs1, v2, v4
285; PWR9LE-NEXT:    xxlor vs0, vs1, vs0
286; PWR9LE-NEXT:    xxswapd v2, vs0
287; PWR9LE-NEXT:    xxlor vs0, vs0, v2
288; PWR9LE-NEXT:    mfvsrld r3, vs0
289; PWR9LE-NEXT:    blr
290;
291; PWR9BE-LABEL: v8i64:
292; PWR9BE:       # %bb.0: # %entry
293; PWR9BE-NEXT:    xxlor vs0, v3, v5
294; PWR9BE-NEXT:    xxlor vs1, v2, v4
295; PWR9BE-NEXT:    xxlor vs0, vs1, vs0
296; PWR9BE-NEXT:    xxswapd v2, vs0
297; PWR9BE-NEXT:    xxlor vs0, vs0, v2
298; PWR9BE-NEXT:    mffprd r3, f0
299; PWR9BE-NEXT:    blr
300;
301; PWR10LE-LABEL: v8i64:
302; PWR10LE:       # %bb.0: # %entry
303; PWR10LE-NEXT:    xxlor vs1, v2, v4
304; PWR10LE-NEXT:    xxlor vs0, v3, v5
305; PWR10LE-NEXT:    xxeval vs2, vs1, v3, v5, 127
306; PWR10LE-NEXT:    xxswapd v2, vs2
307; PWR10LE-NEXT:    xxeval vs0, vs1, vs0, v2, 127
308; PWR10LE-NEXT:    mfvsrld r3, vs0
309; PWR10LE-NEXT:    blr
310;
311; PWR10BE-LABEL: v8i64:
312; PWR10BE:       # %bb.0: # %entry
313; PWR10BE-NEXT:    xxlor vs1, v2, v4
314; PWR10BE-NEXT:    xxlor vs0, v3, v5
315; PWR10BE-NEXT:    xxeval vs2, vs1, v3, v5, 127
316; PWR10BE-NEXT:    xxswapd v2, vs2
317; PWR10BE-NEXT:    xxeval vs0, vs1, vs0, v2, 127
318; PWR10BE-NEXT:    mffprd r3, f0
319; PWR10BE-NEXT:    blr
320entry:
321  %0 = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a)
322  ret i64 %0
323}
324
325define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
326; PWR9LE-LABEL: v16i64:
327; PWR9LE:       # %bb.0: # %entry
328; PWR9LE-NEXT:    xxlor vs0, v4, v8
329; PWR9LE-NEXT:    xxlor vs1, v2, v6
330; PWR9LE-NEXT:    xxlor vs2, v5, v9
331; PWR9LE-NEXT:    xxlor vs3, v3, v7
332; PWR9LE-NEXT:    xxlor vs2, vs3, vs2
333; PWR9LE-NEXT:    xxlor vs0, vs1, vs0
334; PWR9LE-NEXT:    xxlor vs0, vs0, vs2
335; PWR9LE-NEXT:    xxswapd v2, vs0
336; PWR9LE-NEXT:    xxlor vs0, vs0, v2
337; PWR9LE-NEXT:    mfvsrld r3, vs0
338; PWR9LE-NEXT:    blr
339;
340; PWR9BE-LABEL: v16i64:
341; PWR9BE:       # %bb.0: # %entry
342; PWR9BE-NEXT:    xxlor vs0, v4, v8
343; PWR9BE-NEXT:    xxlor vs1, v2, v6
344; PWR9BE-NEXT:    xxlor vs2, v5, v9
345; PWR9BE-NEXT:    xxlor vs3, v3, v7
346; PWR9BE-NEXT:    xxlor vs2, vs3, vs2
347; PWR9BE-NEXT:    xxlor vs0, vs1, vs0
348; PWR9BE-NEXT:    xxlor vs0, vs0, vs2
349; PWR9BE-NEXT:    xxswapd v2, vs0
350; PWR9BE-NEXT:    xxlor vs0, vs0, v2
351; PWR9BE-NEXT:    mffprd r3, f0
352; PWR9BE-NEXT:    blr
353;
354; PWR10LE-LABEL: v16i64:
355; PWR10LE:       # %bb.0: # %entry
356; PWR10LE-NEXT:    xxlor vs1, v2, v6
357; PWR10LE-NEXT:    xxlor vs0, v5, v9
358; PWR10LE-NEXT:    xxlor vs2, v3, v7
359; PWR10LE-NEXT:    xxeval vs1, vs1, v4, v8, 127
360; PWR10LE-NEXT:    xxeval vs3, vs2, v5, v9, 127
361; PWR10LE-NEXT:    xxeval vs0, vs1, vs2, vs0, 127
362; PWR10LE-NEXT:    xxswapd v2, vs0
363; PWR10LE-NEXT:    xxeval vs0, vs1, vs3, v2, 127
364; PWR10LE-NEXT:    mfvsrld r3, vs0
365; PWR10LE-NEXT:    blr
366;
367; PWR10BE-LABEL: v16i64:
368; PWR10BE:       # %bb.0: # %entry
369; PWR10BE-NEXT:    xxlor vs1, v2, v6
370; PWR10BE-NEXT:    xxlor vs0, v5, v9
371; PWR10BE-NEXT:    xxlor vs2, v3, v7
372; PWR10BE-NEXT:    xxeval vs1, vs1, v4, v8, 127
373; PWR10BE-NEXT:    xxeval vs3, vs2, v5, v9, 127
374; PWR10BE-NEXT:    xxeval vs0, vs1, vs2, vs0, 127
375; PWR10BE-NEXT:    xxswapd v2, vs0
376; PWR10BE-NEXT:    xxeval vs0, vs1, vs3, v2, 127
377; PWR10BE-NEXT:    mffprd r3, f0
378; PWR10BE-NEXT:    blr
379entry:
380  %0 = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> %a)
381  ret i64 %0
382}
383
384declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) #0
385declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) #0
386declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64>) #0
387declare i64 @llvm.vector.reduce.or.v16i64(<16 x i64>) #0
388
389
390attributes #0 = { nounwind }
391