xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll (revision 889215a30ed60474e573f9632d1fa362dfa1b04e)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64--                 -S | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -passes=slp-vectorizer -mtriple=x86_64-- -mattr=avx512vl -S | FileCheck %s --check-prefixes=CHECK,AVX
4
5declare void @use1(i1)
6
7define i1 @logical_and_icmp(<4 x i32> %x) {
8; CHECK-LABEL: @logical_and_icmp(
9; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer
10; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
11; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
12; CHECK-NEXT:    ret i1 [[TMP3]]
13;
14  %x0 = extractelement <4 x i32> %x, i32 0
15  %x1 = extractelement <4 x i32> %x, i32 1
16  %x2 = extractelement <4 x i32> %x, i32 2
17  %x3 = extractelement <4 x i32> %x, i32 3
18  %c0 = icmp slt i32 %x0, 0
19  %c1 = icmp slt i32 %x1, 0
20  %c2 = icmp slt i32 %x2, 0
21  %c3 = icmp slt i32 %x3, 0
22  %s1 = select i1 %c0, i1 %c1, i1 false
23  %s2 = select i1 %s1, i1 %c2, i1 false
24  %s3 = select i1 %s2, i1 %c3, i1 false
25  ret i1 %s3
26}
27
28define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) {
29; CHECK-LABEL: @logical_or_icmp(
30; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
31; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
32; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
33; CHECK-NEXT:    ret i1 [[TMP3]]
34;
35  %x0 = extractelement <4 x i32> %x, i32 0
36  %x1 = extractelement <4 x i32> %x, i32 1
37  %x2 = extractelement <4 x i32> %x, i32 2
38  %x3 = extractelement <4 x i32> %x, i32 3
39  %y0 = extractelement <4 x i32> %y, i32 0
40  %y1 = extractelement <4 x i32> %y, i32 1
41  %y2 = extractelement <4 x i32> %y, i32 2
42  %y3 = extractelement <4 x i32> %y, i32 3
43  %c0 = icmp slt i32 %x0, %y0
44  %c1 = icmp slt i32 %x1, %y1
45  %c2 = icmp slt i32 %x2, %y2
46  %c3 = icmp slt i32 %x3, %y3
47  %s1 = select i1 %c0, i1 true, i1 %c1
48  %s2 = select i1 %s1, i1 true, i1 %c2
49  %s3 = select i1 %s2, i1 true, i1 %c3
50  ret i1 %s3
51}
52
53define i1 @logical_and_fcmp(<4 x float> %x) {
54; CHECK-LABEL: @logical_and_fcmp(
55; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
56; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
57; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
58; CHECK-NEXT:    ret i1 [[TMP3]]
59;
60  %x0 = extractelement <4 x float> %x, i32 0
61  %x1 = extractelement <4 x float> %x, i32 1
62  %x2 = extractelement <4 x float> %x, i32 2
63  %x3 = extractelement <4 x float> %x, i32 3
64  %c0 = fcmp olt float %x0, 0.0
65  %c1 = fcmp olt float %x1, 0.0
66  %c2 = fcmp olt float %x2, 0.0
67  %c3 = fcmp olt float %x3, 0.0
68  %s1 = select i1 %c0, i1 %c1, i1 false
69  %s2 = select i1 %s1, i1 %c2, i1 false
70  %s3 = select i1 %s2, i1 %c3, i1 false
71  ret i1 %s3
72}
73
74define i1 @logical_or_fcmp(<4 x float> %x) {
75; CHECK-LABEL: @logical_or_fcmp(
76; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer
77; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
78; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
79; CHECK-NEXT:    ret i1 [[TMP3]]
80;
81  %x0 = extractelement <4 x float> %x, i32 0
82  %x1 = extractelement <4 x float> %x, i32 1
83  %x2 = extractelement <4 x float> %x, i32 2
84  %x3 = extractelement <4 x float> %x, i32 3
85  %c0 = fcmp olt float %x0, 0.0
86  %c1 = fcmp olt float %x1, 0.0
87  %c2 = fcmp olt float %x2, 0.0
88  %c3 = fcmp olt float %x3, 0.0
89  %s1 = select i1 %c0, i1 true, i1 %c1
90  %s2 = select i1 %s1, i1 true, i1 %c2
91  %s3 = select i1 %s2, i1 true, i1 %c3
92  ret i1 %s3
93}
94
95define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
96; SSE-LABEL: @logical_and_icmp_diff_preds(
97; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
98; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
99; SSE-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
100; SSE-NEXT:    [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
101; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
102; SSE-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
103; SSE-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
104; SSE-NEXT:    ret i1 [[TMP7]]
105;
106; AVX-LABEL: @logical_and_icmp_diff_preds(
107; AVX-NEXT:    [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
108; AVX-NEXT:    [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1
109; AVX-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
110; AVX-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3
111; AVX-NEXT:    [[C0:%.*]] = icmp ult i32 [[X0]], 0
112; AVX-NEXT:    [[C1:%.*]] = icmp slt i32 [[X1]], 0
113; AVX-NEXT:    [[C2:%.*]] = icmp sgt i32 [[X2]], 0
114; AVX-NEXT:    [[C3:%.*]] = icmp slt i32 [[X3]], 0
115; AVX-NEXT:    [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false
116; AVX-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
117; AVX-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false
118; AVX-NEXT:    ret i1 [[S3]]
119;
120  %x0 = extractelement <4 x i32> %x, i32 0
121  %x1 = extractelement <4 x i32> %x, i32 1
122  %x2 = extractelement <4 x i32> %x, i32 2
123  %x3 = extractelement <4 x i32> %x, i32 3
124  %c0 = icmp ult i32 %x0, 0
125  %c1 = icmp slt i32 %x1, 0
126  %c2 = icmp sgt i32 %x2, 0
127  %c3 = icmp slt i32 %x3, 0
128  %s1 = select i1 %c0, i1 %c1, i1 false
129  %s2 = select i1 %s1, i1 %c2, i1 false
130  %s3 = select i1 %s2, i1 %c3, i1 false
131  ret i1 %s3
132}
133
134define i1 @logical_and_icmp_diff_const(<4 x i32> %x) {
135; CHECK-LABEL: @logical_and_icmp_diff_const(
136; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 0, i32 1, i32 2, i32 3>
137; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
138; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
139; CHECK-NEXT:    ret i1 [[TMP3]]
140;
141  %x0 = extractelement <4 x i32> %x, i32 0
142  %x1 = extractelement <4 x i32> %x, i32 1
143  %x2 = extractelement <4 x i32> %x, i32 2
144  %x3 = extractelement <4 x i32> %x, i32 3
145  %c0 = icmp sgt i32 %x0, 0
146  %c1 = icmp sgt i32 %x1, 1
147  %c2 = icmp sgt i32 %x2, 2
148  %c3 = icmp sgt i32 %x3, 3
149  %s1 = select i1 %c0, i1 %c1, i1 false
150  %s2 = select i1 %s1, i1 %c2, i1 false
151  %s3 = select i1 %s2, i1 %c3, i1 false
152  ret i1 %s3
153}
154
155define i1 @mixed_logical_icmp(<4 x i32> %x) {
156; CHECK-LABEL: @mixed_logical_icmp(
157; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
158; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
159; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
160; CHECK-NEXT:    [[S1:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false
161; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
162; CHECK-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[TMP4]]
163; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
164; CHECK-NEXT:    [[S3:%.*]] = select i1 [[S2]], i1 [[TMP5]], i1 false
165; CHECK-NEXT:    ret i1 [[S3]]
166;
167  %x0 = extractelement <4 x i32> %x, i32 0
168  %x1 = extractelement <4 x i32> %x, i32 1
169  %x2 = extractelement <4 x i32> %x, i32 2
170  %x3 = extractelement <4 x i32> %x, i32 3
171  %c0 = icmp sgt i32 %x0, 0
172  %c1 = icmp sgt i32 %x1, 0
173  %c2 = icmp sgt i32 %x2, 0
174  %c3 = icmp sgt i32 %x3, 0
175  %s1 = select i1 %c0, i1 %c1, i1 false
176  %s2 = select i1 %s1, i1 true, i1 %c2
177  %s3 = select i1 %s2, i1 %c3, i1 false
178  ret i1 %s3
179}
180
181define i1 @logical_and_icmp_subvec(<4 x i32> %x) {
182; CHECK-LABEL: @logical_and_icmp_subvec(
183; CHECK-NEXT:    [[X2:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
184; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
185; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
186; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[X2]], 0
187; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
188; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
189; CHECK-NEXT:    [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
190; CHECK-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
191; CHECK-NEXT:    ret i1 [[S2]]
192;
193  %x0 = extractelement <4 x i32> %x, i32 0
194  %x1 = extractelement <4 x i32> %x, i32 1
195  %x2 = extractelement <4 x i32> %x, i32 2
196  %c0 = icmp slt i32 %x0, 0
197  %c1 = icmp slt i32 %x1, 0
198  %c2 = icmp slt i32 %x2, 0
199  %s1 = select i1 %c0, i1 %c1, i1 false
200  %s2 = select i1 %s1, i1 %c2, i1 false
201  ret i1 %s2
202}
203
204; TODO: This is better than all-scalar and still safe,
205;       but we want this to be 2 reductions with glue
206;       logic...or a wide reduction?
207
208define i1 @logical_and_icmp_clamp(<4 x i32> %x) {
209; CHECK-LABEL: @logical_and_icmp_clamp(
210; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
211; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
212; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
213; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
214; CHECK-NEXT:    [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
215; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
216; CHECK-NEXT:    ret i1 [[TMP6]]
217;
218  %x0 = extractelement <4 x i32> %x, i32 0
219  %x1 = extractelement <4 x i32> %x, i32 1
220  %x2 = extractelement <4 x i32> %x, i32 2
221  %x3 = extractelement <4 x i32> %x, i32 3
222  %c0 = icmp slt i32 %x0, 42
223  %c1 = icmp slt i32 %x1, 42
224  %c2 = icmp slt i32 %x2, 42
225  %c3 = icmp slt i32 %x3, 42
226  %d0 = icmp sgt i32 %x0, 17
227  %d1 = icmp sgt i32 %x1, 17
228  %d2 = icmp sgt i32 %x2, 17
229  %d3 = icmp sgt i32 %x3, 17
230  %s1 = select i1 %c0, i1 %c1, i1 false
231  %s2 = select i1 %s1, i1 %c2, i1 false
232  %s3 = select i1 %s2, i1 %c3, i1 false
233  %s4 = select i1 %s3, i1 %d0, i1 false
234  %s5 = select i1 %s4, i1 %d1, i1 false
235  %s6 = select i1 %s5, i1 %d2, i1 false
236  %s7 = select i1 %s6, i1 %d3, i1 false
237  ret i1 %s7
238}
239
240define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
241; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_cmp(
242; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
243; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
244; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], <i32 17, i32 17, i32 17, i32 17, i32 42, i32 42, i32 42, i32 42>
245; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
246; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
247; CHECK-NEXT:    call void @use1(i1 [[TMP5]])
248; CHECK-NEXT:    [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]]
249; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]])
250; CHECK-NEXT:    ret i1 [[TMP7]]
251;
252  %x0 = extractelement <4 x i32> %x, i32 0
253  %x1 = extractelement <4 x i32> %x, i32 1
254  %x2 = extractelement <4 x i32> %x, i32 2
255  %x3 = extractelement <4 x i32> %x, i32 3
256  %c0 = icmp slt i32 %x0, 42
257  %c1 = icmp slt i32 %x1, 42
258  %c2 = icmp slt i32 %x2, 42
259  call void @use1(i1 %c2)
260  %c3 = icmp slt i32 %x3, 42
261  %d0 = icmp sgt i32 %x0, 17
262  %d1 = icmp sgt i32 %x1, 17
263  %d2 = icmp sgt i32 %x2, 17
264  %d3 = icmp sgt i32 %x3, 17
265  %s1 = select i1 %c0, i1 %c1, i1 false
266  %s2 = select i1 %s1, i1 %c2, i1 false
267  %s3 = select i1 %s2, i1 %c3, i1 false
268  %s4 = select i1 %s3, i1 %d0, i1 false
269  %s5 = select i1 %s4, i1 %d1, i1 false
270  %s6 = select i1 %s5, i1 %d2, i1 false
271  %s7 = select i1 %s6, i1 %d3, i1 false
272  ret i1 %s7
273}
274
275define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
276; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
277; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42)
278; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
279; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
280; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
281; CHECK-NEXT:    [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
282; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
283; CHECK-NEXT:    [[S2:%.*]] = select i1 [[S1]], i1 [[TMP5]], i1 false
284; CHECK-NEXT:    call void @use1(i1 [[S2]])
285; CHECK-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP2]]
286; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
287; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
288; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP8]], i1 false
289; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[S2]], i1 [[OP_RDX]], i1 false
290; CHECK-NEXT:    ret i1 [[OP_RDX1]]
291;
292  %x0 = extractelement <4 x i32> %x, i32 0
293  %x1 = extractelement <4 x i32> %x, i32 1
294  %x2 = extractelement <4 x i32> %x, i32 2
295  %x3 = extractelement <4 x i32> %x, i32 3
296  %c0 = icmp slt i32 %x0, 42
297  %c1 = icmp slt i32 %x1, 42
298  %c2 = icmp slt i32 %x2, 42
299  %c3 = icmp slt i32 %x3, 42
300  %d0 = icmp sgt i32 %x0, 17
301  %d1 = icmp sgt i32 %x1, 17
302  %d2 = icmp sgt i32 %x2, 17
303  %d3 = icmp sgt i32 %x3, 17
304  %s1 = select i1 %c0, i1 %c1, i1 false
305  %s2 = select i1 %s1, i1 %c2, i1 false
306  call void @use1(i1 %s2)
307  %s3 = select i1 %s2, i1 %c3, i1 false
308  %s4 = select i1 %s3, i1 %d0, i1 false
309  %s5 = select i1 %s4, i1 %d1, i1 false
310  %s6 = select i1 %s5, i1 %d2, i1 false
311  %s7 = select i1 %s6, i1 %d3, i1 false
312  ret i1 %s7
313}
314
315define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
316; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
317; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
318; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
319; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, <4 x i32> [[TMP2]], i64 4)
320; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]]
321; CHECK-NEXT:    [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]]
322; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]])
323; CHECK-NEXT:    ret i1 [[TMP6]]
324;
325  %x0 = extractelement <8 x i32> %x, i32 0
326  %x1 = extractelement <8 x i32> %x, i32 1
327  %x2 = extractelement <8 x i32> %x, i32 2
328  %x3 = extractelement <8 x i32> %x, i32 3
329  %y0 = extractelement <8 x i32> %y, i32 0
330  %y1 = extractelement <8 x i32> %y, i32 1
331  %y2 = extractelement <8 x i32> %y, i32 2
332  %y3 = extractelement <8 x i32> %y, i32 3
333  %c0 = icmp slt i32 %x0, 42
334  %c1 = icmp slt i32 %x1, 42
335  %c2 = icmp slt i32 %x2, 42
336  %c3 = icmp slt i32 %x3, 42
337  %d0 = icmp slt i32 %x0, %y0
338  %d1 = icmp slt i32 %x1, %y1
339  %d2 = icmp slt i32 %x2, %y2
340  %d3 = icmp slt i32 %x3, %y3
341  %s1 = select i1 %c0, i1 %c1, i1 false
342  %s2 = select i1 %s1, i1 %c2, i1 false
343  %s3 = select i1 %s2, i1 %c3, i1 false
344  %s4 = select i1 %s3, i1 %d0, i1 false
345  %s5 = select i1 %s4, i1 %d1, i1 false
346  %s6 = select i1 %s5, i1 %d2, i1 false
347  %s7 = select i1 %s6, i1 %d3, i1 false
348  ret i1 %s7
349}
350
351define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
352; CHECK-LABEL: @logical_and_icmp_clamp_partial(
353; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
354; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
355; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], splat (i32 42)
356; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
357; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
358; CHECK-NEXT:    [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
359; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
360; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
361; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP7]], i1 [[TMP6]], i1 false
362; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
363; CHECK-NEXT:    [[TMP9:%.*]] = freeze i1 [[TMP8]]
364; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[TMP9]], i1 [[C2]], i1 false
365; CHECK-NEXT:    [[TMP10:%.*]] = freeze i1 [[OP_RDX]]
366; CHECK-NEXT:    [[OP_RDX2:%.*]] = select i1 [[TMP10]], i1 [[OP_RDX1]], i1 false
367; CHECK-NEXT:    ret i1 [[OP_RDX2]]
368;
369  %x0 = extractelement <4 x i32> %x, i32 0
370  %x1 = extractelement <4 x i32> %x, i32 1
371  %x2 = extractelement <4 x i32> %x, i32 2
372  %x3 = extractelement <4 x i32> %x, i32 3
373  %c0 = icmp slt i32 %x0, 42
374  %c1 = icmp slt i32 %x1, 42
375  %c2 = icmp slt i32 %x2, 42
376  ; remove an element from the previous test
377  %d0 = icmp sgt i32 %x0, 17
378  %d1 = icmp sgt i32 %x1, 17
379  %d2 = icmp sgt i32 %x2, 17
380  %d3 = icmp sgt i32 %x3, 17
381  %s1 = select i1 %c0, i1 %c1, i1 false
382  %s2 = select i1 %s1, i1 %c2, i1 false
383  ; remove an element from the previous test
384  %s4 = select i1 %s2, i1 %d0, i1 false
385  %s5 = select i1 %s4, i1 %d1, i1 false
386  %s6 = select i1 %s5, i1 %d2, i1 false
387  %s7 = select i1 %s6, i1 %d3, i1 false
388  ret i1 %s7
389}
390
391define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
392; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff(
393; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
394; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 42, i32 42, i32 42, i32 poison>, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 3>
395; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[X]], i64 0)
396; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 17, i32 17, i32 17, i32 17, i32 poison, i32 poison, i32 poison, i32 42>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 15>
397; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <8 x i32> [[TMP3]], [[TMP4]]
398; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult <8 x i32> [[TMP3]], [[TMP4]]
399; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
400; CHECK-NEXT:    [[TMP8:%.*]] = freeze <8 x i1> [[TMP7]]
401; CHECK-NEXT:    [[TMP9:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP8]])
402; CHECK-NEXT:    ret i1 [[TMP9]]
403;
404  %x0 = extractelement <4 x i32> %x, i32 0
405  %x1 = extractelement <4 x i32> %x, i32 1
406  %x2 = extractelement <4 x i32> %x, i32 2
407  %x3 = extractelement <4 x i32> %x, i32 3
408  %c0 = icmp slt i32 %x0, 42
409  %c1 = icmp slt i32 %x1, 42
410  %c2 = icmp slt i32 %x2, 42
411  %c3 = icmp ult i32 %x3, 42 ; predicate changed
412  %d0 = icmp sgt i32 %x0, 17
413  %d1 = icmp sgt i32 %x1, 17
414  %d2 = icmp sgt i32 %x2, 17
415  %d3 = icmp sgt i32 %x3, 17
416  %s1 = select i1 %c0, i1 %c1, i1 false
417  %s2 = select i1 %s1, i1 %c2, i1 false
418  %s3 = select i1 %s2, i1 %c3, i1 false
419  %s4 = select i1 %s3, i1 %d0, i1 false
420  %s5 = select i1 %s4, i1 %d1, i1 false
421  %s6 = select i1 %s5, i1 %d2, i1 false
422  %s7 = select i1 %s6, i1 %d3, i1 false
423  ret i1 %s7
424}
425
426define i1 @logical_and_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
427; CHECK-LABEL: @logical_and_icmp_extra_op(
428; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
429; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
430; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
431; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[C:%.*]], i1 [[TMP3]], i1 false
432; CHECK-NEXT:    ret i1 [[OP_RDX]]
433;
434  %x0 = extractelement <4 x i32> %x, i32 0
435  %x1 = extractelement <4 x i32> %x, i32 1
436  %x2 = extractelement <4 x i32> %x, i32 2
437  %x3 = extractelement <4 x i32> %x, i32 3
438  %y0 = extractelement <4 x i32> %y, i32 0
439  %y1 = extractelement <4 x i32> %y, i32 1
440  %y2 = extractelement <4 x i32> %y, i32 2
441  %y3 = extractelement <4 x i32> %y, i32 3
442  %d0 = icmp slt i32 %x0, %y0
443  %d1 = icmp slt i32 %x1, %y1
444  %d2 = icmp slt i32 %x2, %y2
445  %d3 = icmp slt i32 %x3, %y3
446  %s3 = select i1 %c, i1 %c, i1 false
447  %s4 = select i1 %s3, i1 %d0, i1 false
448  %s5 = select i1 %s4, i1 %d1, i1 false
449  %s6 = select i1 %s5, i1 %d2, i1 false
450  %s7 = select i1 %s6, i1 %d3, i1 false
451  ret i1 %s7
452}
453
454define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
455; CHECK-LABEL: @logical_or_icmp_extra_op(
456; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]]
457; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
458; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
459; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[C:%.*]], i1 true, i1 [[TMP3]]
460; CHECK-NEXT:    ret i1 [[OP_RDX]]
461;
462  %x0 = extractelement <4 x i32> %x, i32 0
463  %x1 = extractelement <4 x i32> %x, i32 1
464  %x2 = extractelement <4 x i32> %x, i32 2
465  %x3 = extractelement <4 x i32> %x, i32 3
466  %y0 = extractelement <4 x i32> %y, i32 0
467  %y1 = extractelement <4 x i32> %y, i32 1
468  %y2 = extractelement <4 x i32> %y, i32 2
469  %y3 = extractelement <4 x i32> %y, i32 3
470  %d0 = icmp slt i32 %x0, %y0
471  %d1 = icmp slt i32 %x1, %y1
472  %d2 = icmp slt i32 %x2, %y2
473  %d3 = icmp slt i32 %x3, %y3
474  %s3 = select i1 %c, i1 true, i1 %c
475  %s4 = select i1 %s3, i1 true, i1 %d0
476  %s5 = select i1 %s4, i1 true, i1 %d1
477  %s6 = select i1 %s5, i1 true, i1 %d2
478  %s7 = select i1 %s6, i1 true, i1 %d3
479  ret i1 %s7
480}
481
482define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) {
483; CHECK-LABEL: @logical_and_icmp_extra_args(
484; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 17)
485; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
486; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
487; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false
488; CHECK-NEXT:    [[TMP4:%.*]] = freeze i1 [[C1:%.*]]
489; CHECK-NEXT:    [[OP_RDX1:%.*]] = select i1 [[TMP4]], i1 [[C2:%.*]], i1 false
490; CHECK-NEXT:    [[TMP5:%.*]] = freeze i1 [[OP_RDX]]
491; CHECK-NEXT:    [[OP_RDX2:%.*]] = select i1 [[TMP5]], i1 [[OP_RDX1]], i1 false
492; CHECK-NEXT:    ret i1 [[OP_RDX2]]
493;
494  %x0 = extractelement <4 x i32> %x, i32 0
495  %x1 = extractelement <4 x i32> %x, i32 1
496  %x2 = extractelement <4 x i32> %x, i32 2
497  %x3 = extractelement <4 x i32> %x, i32 3
498  %d0 = icmp sgt i32 %x0, 17
499  %d1 = icmp sgt i32 %x1, 17
500  %d2 = icmp sgt i32 %x2, 17
501  %d3 = icmp sgt i32 %x3, 17
502  %s1 = select i1 %d0, i1 %c0, i1 false ; <- d0, d1, d2, d3 gets reduced.
503  %s2 = select i1 %s1, i1 %c1, i1 false ; <- c0, c1, c2 remain scalar.
504  %s3 = select i1 %s2, i1 %c2, i1 false
505  %s5 = select i1 %s3, i1 %d1, i1 false
506  %s6 = select i1 %s5, i1 %d2, i1 false
507  %s7 = select i1 %s6, i1 %d3, i1 false
508  ret i1 %s7
509}
510
511