xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll (revision aca5f9aeea8da8857235347ed1363ccda5460cbb)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64    -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v2 -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v3 -S | FileCheck %s --check-prefixes=CHECK,AVX
5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown -mcpu=x86-64-v4 -S | FileCheck %s --check-prefixes=CHECK,AVX512
6
7; // PR42652
8; unsigned long bitmask_16xi8(const char *src) {
9;     unsigned long mask = 0;
10;     for (unsigned i = 0; i != 16; ++i) {
11;       if (src[i])
12;         mask |= (1ull << i);
13;     }
14;     return mask;
15; }
16
17define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) {
18; CHECK-LABEL: @bitmask_16xi8(
19; CHECK-NEXT:  entry:
20; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
21; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i8 [[TMP0]], 0
22; CHECK-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
23; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1
24; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_1]], align 1
25; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <8 x i8> [[TMP1]], zeroinitializer
26; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> zeroinitializer, <8 x i64> <i64 2, i64 4, i64 8, i64 16, i64 32, i64 64, i64 128, i64 256>
27; CHECK-NEXT:    [[ARRAYIDX_9:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9
28; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, ptr [[ARRAYIDX_9]], align 1
29; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i8> [[TMP4]], zeroinitializer
30; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> zeroinitializer, <4 x i64> <i64 512, i64 1024, i64 2048, i64 4096>
31; CHECK-NEXT:    [[ARRAYIDX_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 13
32; CHECK-NEXT:    [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX_13]], align 1
33; CHECK-NEXT:    [[TOBOOL_NOT_13:%.*]] = icmp eq i8 [[TMP7]], 0
34; CHECK-NEXT:    [[OR_13:%.*]] = select i1 [[TOBOOL_NOT_13]], i64 0, i64 8192
35; CHECK-NEXT:    [[ARRAYIDX_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 14
36; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX_14]], align 1
37; CHECK-NEXT:    [[TOBOOL_NOT_14:%.*]] = icmp eq i8 [[TMP8]], 0
38; CHECK-NEXT:    [[OR_14:%.*]] = select i1 [[TOBOOL_NOT_14]], i64 0, i64 16384
39; CHECK-NEXT:    [[ARRAYIDX_15:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 15
40; CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1
41; CHECK-NEXT:    [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0
42; CHECK-NEXT:    [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768
43; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP3]])
44; CHECK-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP6]])
45; CHECK-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP10]], [[TMP11]]
46; CHECK-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OP_RDX]], [[OR_13]]
47; CHECK-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]]
48; CHECK-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX1]], [[OP_RDX2]]
49; CHECK-NEXT:    [[OP_RDX4:%.*]] = or i64 [[OP_RDX3]], [[OR]]
50; CHECK-NEXT:    ret i64 [[OP_RDX4]]
51;
52entry:
53  %0 = load i8, ptr %src, align 1
54  %tobool.not = icmp ne i8 %0, 0
55  %or = zext i1 %tobool.not to i64
56  %arrayidx.1 = getelementptr inbounds i8, ptr %src, i64 1
57  %1 = load i8, ptr %arrayidx.1, align 1
58  %tobool.not.1 = icmp eq i8 %1, 0
59  %or.1 = select i1 %tobool.not.1, i64 0, i64 2
60  %mask.1.1 = or i64 %or.1, %or
61  %arrayidx.2 = getelementptr inbounds i8, ptr %src, i64 2
62  %2 = load i8, ptr %arrayidx.2, align 1
63  %tobool.not.2 = icmp eq i8 %2, 0
64  %or.2 = select i1 %tobool.not.2, i64 0, i64 4
65  %mask.1.2 = or i64 %or.2, %mask.1.1
66  %arrayidx.3 = getelementptr inbounds i8, ptr %src, i64 3
67  %3 = load i8, ptr %arrayidx.3, align 1
68  %tobool.not.3 = icmp eq i8 %3, 0
69  %or.3 = select i1 %tobool.not.3, i64 0, i64 8
70  %mask.1.3 = or i64 %or.3, %mask.1.2
71  %arrayidx.4 = getelementptr inbounds i8, ptr %src, i64 4
72  %4 = load i8, ptr %arrayidx.4, align 1
73  %tobool.not.4 = icmp eq i8 %4, 0
74  %or.4 = select i1 %tobool.not.4, i64 0, i64 16
75  %mask.1.4 = or i64 %or.4, %mask.1.3
76  %arrayidx.5 = getelementptr inbounds i8, ptr %src, i64 5
77  %5 = load i8, ptr %arrayidx.5, align 1
78  %tobool.not.5 = icmp eq i8 %5, 0
79  %or.5 = select i1 %tobool.not.5, i64 0, i64 32
80  %mask.1.5 = or i64 %or.5, %mask.1.4
81  %arrayidx.6 = getelementptr inbounds i8, ptr %src, i64 6
82  %6 = load i8, ptr %arrayidx.6, align 1
83  %tobool.not.6 = icmp eq i8 %6, 0
84  %or.6 = select i1 %tobool.not.6, i64 0, i64 64
85  %mask.1.6 = or i64 %or.6, %mask.1.5
86  %arrayidx.7 = getelementptr inbounds i8, ptr %src, i64 7
87  %7 = load i8, ptr %arrayidx.7, align 1
88  %tobool.not.7 = icmp eq i8 %7, 0
89  %or.7 = select i1 %tobool.not.7, i64 0, i64 128
90  %mask.1.7 = or i64 %or.7, %mask.1.6
91  %arrayidx.8 = getelementptr inbounds i8, ptr %src, i64 8
92  %8 = load i8, ptr %arrayidx.8, align 1
93  %tobool.not.8 = icmp eq i8 %8, 0
94  %or.8 = select i1 %tobool.not.8, i64 0, i64 256
95  %mask.1.8 = or i64 %or.8, %mask.1.7
96  %arrayidx.9 = getelementptr inbounds i8, ptr %src, i64 9
97  %9 = load i8, ptr %arrayidx.9, align 1
98  %tobool.not.9 = icmp eq i8 %9, 0
99  %or.9 = select i1 %tobool.not.9, i64 0, i64 512
100  %mask.1.9 = or i64 %or.9, %mask.1.8
101  %arrayidx.10 = getelementptr inbounds i8, ptr %src, i64 10
102  %10 = load i8, ptr %arrayidx.10, align 1
103  %tobool.not.10 = icmp eq i8 %10, 0
104  %or.10 = select i1 %tobool.not.10, i64 0, i64 1024
105  %mask.1.10 = or i64 %or.10, %mask.1.9
106  %arrayidx.11 = getelementptr inbounds i8, ptr %src, i64 11
107  %11 = load i8, ptr %arrayidx.11, align 1
108  %tobool.not.11 = icmp eq i8 %11, 0
109  %or.11 = select i1 %tobool.not.11, i64 0, i64 2048
110  %mask.1.11 = or i64 %or.11, %mask.1.10
111  %arrayidx.12 = getelementptr inbounds i8, ptr %src, i64 12
112  %12 = load i8, ptr %arrayidx.12, align 1
113  %tobool.not.12 = icmp eq i8 %12, 0
114  %or.12 = select i1 %tobool.not.12, i64 0, i64 4096
115  %mask.1.12 = or i64 %or.12, %mask.1.11
116  %arrayidx.13 = getelementptr inbounds i8, ptr %src, i64 13
117  %13 = load i8, ptr %arrayidx.13, align 1
118  %tobool.not.13 = icmp eq i8 %13, 0
119  %or.13 = select i1 %tobool.not.13, i64 0, i64 8192
120  %mask.1.13 = or i64 %or.13, %mask.1.12
121  %arrayidx.14 = getelementptr inbounds i8, ptr %src, i64 14
122  %14 = load i8, ptr %arrayidx.14, align 1
123  %tobool.not.14 = icmp eq i8 %14, 0
124  %or.14 = select i1 %tobool.not.14, i64 0, i64 16384
125  %mask.1.14 = or i64 %or.14, %mask.1.13
126  %arrayidx.15 = getelementptr inbounds i8, ptr %src, i64 15
127  %15 = load i8, ptr %arrayidx.15, align 1
128  %tobool.not.15 = icmp eq i8 %15, 0
129  %or.15 = select i1 %tobool.not.15, i64 0, i64 32768
130  %mask.1.15 = or i64 %or.15, %mask.1.14
131  ret i64 %mask.1.15
132}
133
134define i64 @bitmask_4xi16(ptr nocapture noundef readonly %src) {
135; SSE-LABEL: @bitmask_4xi16(
136; SSE-NEXT:  entry:
137; SSE-NEXT:    [[TMP0:%.*]] = load i16, ptr [[SRC:%.*]], align 2
138; SSE-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i16 [[TMP0]], 0
139; SSE-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
140; SSE-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 1
141; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_1]], align 2
142; SSE-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
143; SSE-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
144; SSE-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 5
145; SSE-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
146; SSE-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i16 [[TMP4]], 0
147; SSE-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
148; SSE-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 6
149; SSE-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
150; SSE-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i16 [[TMP5]], 0
151; SSE-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
152; SSE-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 7
153; SSE-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
154; SSE-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i16 [[TMP6]], 0
155; SSE-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
156; SSE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
157; SSE-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
158; SSE-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
159; SSE-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
160; SSE-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
161; SSE-NEXT:    ret i64 [[OP_RDX3]]
162;
163; AVX-LABEL: @bitmask_4xi16(
164; AVX-NEXT:  entry:
165; AVX-NEXT:    [[TMP0:%.*]] = load i16, ptr [[SRC:%.*]], align 2
166; AVX-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i16 [[TMP0]], 0
167; AVX-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
168; AVX-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 1
169; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_1]], align 2
170; AVX-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
171; AVX-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
172; AVX-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 5
173; AVX-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
174; AVX-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i16 [[TMP4]], 0
175; AVX-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
176; AVX-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 6
177; AVX-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_6]], align 2
178; AVX-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i16 [[TMP5]], 0
179; AVX-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
180; AVX-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 7
181; AVX-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_7]], align 2
182; AVX-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i16 [[TMP6]], 0
183; AVX-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
184; AVX-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
185; AVX-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
186; AVX-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
187; AVX-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
188; AVX-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
189; AVX-NEXT:    ret i64 [[OP_RDX3]]
190;
191; AVX512-LABEL: @bitmask_4xi16(
192; AVX512-NEXT:  entry:
193; AVX512-NEXT:    [[TMP0:%.*]] = load i16, ptr [[SRC:%.*]], align 2
194; AVX512-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i16 [[TMP0]], 0
195; AVX512-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
196; AVX512-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 1
197; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[ARRAYIDX_1]], align 2
198; AVX512-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i16> [[TMP1]], zeroinitializer
199; AVX512-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
200; AVX512-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 5
201; AVX512-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_5]], align 2
202; AVX512-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i16 [[TMP4]], 0
203; AVX512-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
204; AVX512-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 6
205; AVX512-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr [[ARRAYIDX_6]], align 2
206; AVX512-NEXT:    [[TMP6:%.*]] = icmp eq <2 x i16> [[TMP5]], zeroinitializer
207; AVX512-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> zeroinitializer, <2 x i64> <i64 64, i64 128>
208; AVX512-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
209; AVX512-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP8]], [[OR_5]]
210; AVX512-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
211; AVX512-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
212; AVX512-NEXT:    [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[TMP10]]
213; AVX512-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
214; AVX512-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
215; AVX512-NEXT:    ret i64 [[OP_RDX3]]
216;
217entry:
218  %0 = load i16, ptr %src, align 2
219  %tobool.not = icmp ne i16 %0, 0
220  %or = zext i1 %tobool.not to i64
221  %arrayidx.1 = getelementptr inbounds i16, ptr %src, i64 1
222  %1 = load i16, ptr %arrayidx.1, align 2
223  %tobool.not.1 = icmp eq i16 %1, 0
224  %or.1 = select i1 %tobool.not.1, i64 0, i64 2
225  %mask.1.1 = or i64 %or.1, %or
226  %arrayidx.2 = getelementptr inbounds i16, ptr %src, i64 2
227  %2 = load i16, ptr %arrayidx.2, align 2
228  %tobool.not.2 = icmp eq i16 %2, 0
229  %or.2 = select i1 %tobool.not.2, i64 0, i64 4
230  %mask.1.2 = or i64 %or.2, %mask.1.1
231  %arrayidx.3 = getelementptr inbounds i16, ptr %src, i64 3
232  %3 = load i16, ptr %arrayidx.3, align 2
233  %tobool.not.3 = icmp eq i16 %3, 0
234  %or.3 = select i1 %tobool.not.3, i64 0, i64 8
235  %mask.1.3 = or i64 %or.3, %mask.1.2
236  %arrayidx.4 = getelementptr inbounds i16, ptr %src, i64 4
237  %4 = load i16, ptr %arrayidx.4, align 2
238  %tobool.not.4 = icmp eq i16 %4, 0
239  %or.4 = select i1 %tobool.not.4, i64 0, i64 16
240  %mask.1.4 = or i64 %or.4, %mask.1.3
241  %arrayidx.5 = getelementptr inbounds i16, ptr %src, i64 5
242  %5 = load i16, ptr %arrayidx.5, align 2
243  %tobool.not.5 = icmp eq i16 %5, 0
244  %or.5 = select i1 %tobool.not.5, i64 0, i64 32
245  %mask.1.5 = or i64 %or.5, %mask.1.4
246  %arrayidx.6 = getelementptr inbounds i16, ptr %src, i64 6
247  %6 = load i16, ptr %arrayidx.6, align 2
248  %tobool.not.6 = icmp eq i16 %6, 0
249  %or.6 = select i1 %tobool.not.6, i64 0, i64 64
250  %mask.1.6 = or i64 %or.6, %mask.1.5
251  %arrayidx.7 = getelementptr inbounds i16, ptr %src, i64 7
252  %7 = load i16, ptr %arrayidx.7, align 2
253  %tobool.not.7 = icmp eq i16 %7, 0
254  %or.7 = select i1 %tobool.not.7, i64 0, i64 128
255  %mask.1.7 = or i64 %or.7, %mask.1.6
256  ret i64 %mask.1.7
257}
258
259define i64 @bitmask_8xi32(ptr nocapture noundef readonly %src) {
260; SSE-LABEL: @bitmask_8xi32(
261; SSE-NEXT:  entry:
262; SSE-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
263; SSE-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i32 [[TMP0]], 0
264; SSE-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
265; SSE-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
266; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_1]], align 4
267; SSE-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
268; SSE-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
269; SSE-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
270; SSE-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
271; SSE-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i32 [[TMP4]], 0
272; SSE-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
273; SSE-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 6
274; SSE-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
275; SSE-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i32 [[TMP5]], 0
276; SSE-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
277; SSE-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 7
278; SSE-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
279; SSE-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i32 [[TMP6]], 0
280; SSE-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
281; SSE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
282; SSE-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
283; SSE-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
284; SSE-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
285; SSE-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
286; SSE-NEXT:    ret i64 [[OP_RDX3]]
287;
288; AVX-LABEL: @bitmask_8xi32(
289; AVX-NEXT:  entry:
290; AVX-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
291; AVX-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i32 [[TMP0]], 0
292; AVX-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
293; AVX-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
294; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_1]], align 4
295; AVX-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
296; AVX-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
297; AVX-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
298; AVX-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
299; AVX-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i32 [[TMP4]], 0
300; AVX-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
301; AVX-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 6
302; AVX-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
303; AVX-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i32 [[TMP5]], 0
304; AVX-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
305; AVX-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 7
306; AVX-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
307; AVX-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i32 [[TMP6]], 0
308; AVX-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
309; AVX-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
310; AVX-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
311; AVX-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
312; AVX-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
313; AVX-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
314; AVX-NEXT:    ret i64 [[OP_RDX3]]
315;
316; AVX512-LABEL: @bitmask_8xi32(
317; AVX512-NEXT:  entry:
318; AVX512-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
319; AVX512-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i32 [[TMP0]], 0
320; AVX512-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
321; AVX512-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 1
322; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_1]], align 4
323; AVX512-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
324; AVX512-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
325; AVX512-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 5
326; AVX512-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
327; AVX512-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i32 [[TMP4]], 0
328; AVX512-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
329; AVX512-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 6
330; AVX512-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX_6]], align 4
331; AVX512-NEXT:    [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP5]], zeroinitializer
332; AVX512-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> zeroinitializer, <2 x i64> <i64 64, i64 128>
333; AVX512-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
334; AVX512-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP8]], [[OR_5]]
335; AVX512-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
336; AVX512-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
337; AVX512-NEXT:    [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[TMP10]]
338; AVX512-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
339; AVX512-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
340; AVX512-NEXT:    ret i64 [[OP_RDX3]]
341;
342entry:
343  %0 = load i32, ptr %src, align 4
344  %tobool.not = icmp ne i32 %0, 0
345  %or = zext i1 %tobool.not to i64
346  %arrayidx.1 = getelementptr inbounds i32, ptr %src, i64 1
347  %1 = load i32, ptr %arrayidx.1, align 4
348  %tobool.not.1 = icmp eq i32 %1, 0
349  %or.1 = select i1 %tobool.not.1, i64 0, i64 2
350  %mask.1.1 = or i64 %or.1, %or
351  %arrayidx.2 = getelementptr inbounds i32, ptr %src, i64 2
352  %2 = load i32, ptr %arrayidx.2, align 4
353  %tobool.not.2 = icmp eq i32 %2, 0
354  %or.2 = select i1 %tobool.not.2, i64 0, i64 4
355  %mask.1.2 = or i64 %or.2, %mask.1.1
356  %arrayidx.3 = getelementptr inbounds i32, ptr %src, i64 3
357  %3 = load i32, ptr %arrayidx.3, align 4
358  %tobool.not.3 = icmp eq i32 %3, 0
359  %or.3 = select i1 %tobool.not.3, i64 0, i64 8
360  %mask.1.3 = or i64 %or.3, %mask.1.2
361  %arrayidx.4 = getelementptr inbounds i32, ptr %src, i64 4
362  %4 = load i32, ptr %arrayidx.4, align 4
363  %tobool.not.4 = icmp eq i32 %4, 0
364  %or.4 = select i1 %tobool.not.4, i64 0, i64 16
365  %mask.1.4 = or i64 %or.4, %mask.1.3
366  %arrayidx.5 = getelementptr inbounds i32, ptr %src, i64 5
367  %5 = load i32, ptr %arrayidx.5, align 4
368  %tobool.not.5 = icmp eq i32 %5, 0
369  %or.5 = select i1 %tobool.not.5, i64 0, i64 32
370  %mask.1.5 = or i64 %or.5, %mask.1.4
371  %arrayidx.6 = getelementptr inbounds i32, ptr %src, i64 6
372  %6 = load i32, ptr %arrayidx.6, align 4
373  %tobool.not.6 = icmp eq i32 %6, 0
374  %or.6 = select i1 %tobool.not.6, i64 0, i64 64
375  %mask.1.6 = or i64 %or.6, %mask.1.5
376  %arrayidx.7 = getelementptr inbounds i32, ptr %src, i64 7
377  %7 = load i32, ptr %arrayidx.7, align 4
378  %tobool.not.7 = icmp eq i32 %7, 0
379  %or.7 = select i1 %tobool.not.7, i64 0, i64 128
380  %mask.1.7 = or i64 %or.7, %mask.1.6
381  ret i64 %mask.1.7
382}
383
384define i64 @bitmask_8xi64(ptr nocapture noundef readonly %src) {
385; SSE2-LABEL: @bitmask_8xi64(
386; SSE2-NEXT:  entry:
387; SSE2-NEXT:    [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
388; SSE2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
389; SSE2-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
390; SSE2-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
391; SSE2-NEXT:    [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_1]], align 8
392; SSE2-NEXT:    [[TOBOOL_NOT_1:%.*]] = icmp eq i64 [[TMP1]], 0
393; SSE2-NEXT:    [[OR_1:%.*]] = select i1 [[TOBOOL_NOT_1]], i64 0, i64 2
394; SSE2-NEXT:    [[MASK_1_1:%.*]] = or i64 [[OR_1]], [[OR]]
395; SSE2-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 2
396; SSE2-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX_2]], align 8
397; SSE2-NEXT:    [[TOBOOL_NOT_2:%.*]] = icmp eq i64 [[TMP2]], 0
398; SSE2-NEXT:    [[OR_2:%.*]] = select i1 [[TOBOOL_NOT_2]], i64 0, i64 4
399; SSE2-NEXT:    [[MASK_1_2:%.*]] = or i64 [[OR_2]], [[MASK_1_1]]
400; SSE2-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 3
401; SSE2-NEXT:    [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX_3]], align 8
402; SSE2-NEXT:    [[TOBOOL_NOT_3:%.*]] = icmp eq i64 [[TMP3]], 0
403; SSE2-NEXT:    [[OR_3:%.*]] = select i1 [[TOBOOL_NOT_3]], i64 0, i64 8
404; SSE2-NEXT:    [[MASK_1_3:%.*]] = or i64 [[OR_3]], [[MASK_1_2]]
405; SSE2-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 4
406; SSE2-NEXT:    [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_4]], align 8
407; SSE2-NEXT:    [[TOBOOL_NOT_4:%.*]] = icmp eq i64 [[TMP4]], 0
408; SSE2-NEXT:    [[OR_4:%.*]] = select i1 [[TOBOOL_NOT_4]], i64 0, i64 16
409; SSE2-NEXT:    [[MASK_1_4:%.*]] = or i64 [[OR_4]], [[MASK_1_3]]
410; SSE2-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
411; SSE2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
412; SSE2-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP5]], 0
413; SSE2-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
414; SSE2-NEXT:    [[MASK_1_5:%.*]] = or i64 [[OR_5]], [[MASK_1_4]]
415; SSE2-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
416; SSE2-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX_6]], align 8
417; SSE2-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i64 [[TMP6]], 0
418; SSE2-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
419; SSE2-NEXT:    [[MASK_1_6:%.*]] = or i64 [[OR_6]], [[MASK_1_5]]
420; SSE2-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 7
421; SSE2-NEXT:    [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX_7]], align 8
422; SSE2-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i64 [[TMP7]], 0
423; SSE2-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
424; SSE2-NEXT:    [[MASK_1_7:%.*]] = or i64 [[OR_7]], [[MASK_1_6]]
425; SSE2-NEXT:    ret i64 [[MASK_1_7]]
426;
427; SSE4-LABEL: @bitmask_8xi64(
428; SSE4-NEXT:  entry:
429; SSE4-NEXT:    [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
430; SSE4-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
431; SSE4-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
432; SSE4-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
433; SSE4-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX_1]], align 8
434; SSE4-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
435; SSE4-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
436; SSE4-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
437; SSE4-NEXT:    [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
438; SSE4-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP4]], 0
439; SSE4-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
440; SSE4-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
441; SSE4-NEXT:    [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX_6]], align 8
442; SSE4-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i64 [[TMP5]], 0
443; SSE4-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
444; SSE4-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 7
445; SSE4-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX_7]], align 8
446; SSE4-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i64 [[TMP6]], 0
447; SSE4-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
448; SSE4-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
449; SSE4-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
450; SSE4-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
451; SSE4-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
452; SSE4-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
453; SSE4-NEXT:    ret i64 [[OP_RDX3]]
454;
455; AVX-LABEL: @bitmask_8xi64(
456; AVX-NEXT:  entry:
457; AVX-NEXT:    [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
458; AVX-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
459; AVX-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
460; AVX-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
461; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX_1]], align 8
462; AVX-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
463; AVX-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
464; AVX-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
465; AVX-NEXT:    [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
466; AVX-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP4]], 0
467; AVX-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
468; AVX-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
469; AVX-NEXT:    [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX_6]], align 8
470; AVX-NEXT:    [[TOBOOL_NOT_6:%.*]] = icmp eq i64 [[TMP5]], 0
471; AVX-NEXT:    [[OR_6:%.*]] = select i1 [[TOBOOL_NOT_6]], i64 0, i64 64
472; AVX-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 7
473; AVX-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX_7]], align 8
474; AVX-NEXT:    [[TOBOOL_NOT_7:%.*]] = icmp eq i64 [[TMP6]], 0
475; AVX-NEXT:    [[OR_7:%.*]] = select i1 [[TOBOOL_NOT_7]], i64 0, i64 128
476; AVX-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
477; AVX-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP7]], [[OR_5]]
478; AVX-NEXT:    [[OP_RDX1:%.*]] = or i64 [[OR_6]], [[OR_7]]
479; AVX-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
480; AVX-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
481; AVX-NEXT:    ret i64 [[OP_RDX3]]
482;
483; AVX512-LABEL: @bitmask_8xi64(
484; AVX512-NEXT:  entry:
485; AVX512-NEXT:    [[TMP0:%.*]] = load i64, ptr [[SRC:%.*]], align 8
486; AVX512-NEXT:    [[TOBOOL_NOT:%.*]] = icmp ne i64 [[TMP0]], 0
487; AVX512-NEXT:    [[OR:%.*]] = zext i1 [[TOBOOL_NOT]] to i64
488; AVX512-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 1
489; AVX512-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[ARRAYIDX_1]], align 8
490; AVX512-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i64> [[TMP1]], zeroinitializer
491; AVX512-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> zeroinitializer, <4 x i64> <i64 2, i64 4, i64 8, i64 16>
492; AVX512-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 5
493; AVX512-NEXT:    [[TMP4:%.*]] = load i64, ptr [[ARRAYIDX_5]], align 8
494; AVX512-NEXT:    [[TOBOOL_NOT_5:%.*]] = icmp eq i64 [[TMP4]], 0
495; AVX512-NEXT:    [[OR_5:%.*]] = select i1 [[TOBOOL_NOT_5]], i64 0, i64 32
496; AVX512-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 6
497; AVX512-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_6]], align 8
498; AVX512-NEXT:    [[TMP6:%.*]] = icmp eq <2 x i64> [[TMP5]], zeroinitializer
499; AVX512-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> zeroinitializer, <2 x i64> <i64 64, i64 128>
500; AVX512-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> [[TMP3]])
501; AVX512-NEXT:    [[OP_RDX:%.*]] = or i64 [[TMP8]], [[OR_5]]
502; AVX512-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
503; AVX512-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
504; AVX512-NEXT:    [[OP_RDX1:%.*]] = or i64 [[TMP9]], [[TMP10]]
505; AVX512-NEXT:    [[OP_RDX2:%.*]] = or i64 [[OP_RDX]], [[OP_RDX1]]
506; AVX512-NEXT:    [[OP_RDX3:%.*]] = or i64 [[OP_RDX2]], [[OR]]
507; AVX512-NEXT:    ret i64 [[OP_RDX3]]
508;
509entry:
510  %0 = load i64, ptr %src, align 8
511  %tobool.not = icmp ne i64 %0, 0
512  %or = zext i1 %tobool.not to i64
513  %arrayidx.1 = getelementptr inbounds i64, ptr %src, i64 1
514  %1 = load i64, ptr %arrayidx.1, align 8
515  %tobool.not.1 = icmp eq i64 %1, 0
516  %or.1 = select i1 %tobool.not.1, i64 0, i64 2
517  %mask.1.1 = or i64 %or.1, %or
518  %arrayidx.2 = getelementptr inbounds i64, ptr %src, i64 2
519  %2 = load i64, ptr %arrayidx.2, align 8
520  %tobool.not.2 = icmp eq i64 %2, 0
521  %or.2 = select i1 %tobool.not.2, i64 0, i64 4
522  %mask.1.2 = or i64 %or.2, %mask.1.1
523  %arrayidx.3 = getelementptr inbounds i64, ptr %src, i64 3
524  %3 = load i64, ptr %arrayidx.3, align 8
525  %tobool.not.3 = icmp eq i64 %3, 0
526  %or.3 = select i1 %tobool.not.3, i64 0, i64 8
527  %mask.1.3 = or i64 %or.3, %mask.1.2
528  %arrayidx.4 = getelementptr inbounds i64, ptr %src, i64 4
529  %4 = load i64, ptr %arrayidx.4, align 8
530  %tobool.not.4 = icmp eq i64 %4, 0
531  %or.4 = select i1 %tobool.not.4, i64 0, i64 16
532  %mask.1.4 = or i64 %or.4, %mask.1.3
533  %arrayidx.5 = getelementptr inbounds i64, ptr %src, i64 5
534  %5 = load i64, ptr %arrayidx.5, align 8
535  %tobool.not.5 = icmp eq i64 %5, 0
536  %or.5 = select i1 %tobool.not.5, i64 0, i64 32
537  %mask.1.5 = or i64 %or.5, %mask.1.4
538  %arrayidx.6 = getelementptr inbounds i64, ptr %src, i64 6
539  %6 = load i64, ptr %arrayidx.6, align 8
540  %tobool.not.6 = icmp eq i64 %6, 0
541  %or.6 = select i1 %tobool.not.6, i64 0, i64 64
542  %mask.1.6 = or i64 %or.6, %mask.1.5
543  %arrayidx.7 = getelementptr inbounds i64, ptr %src, i64 7
544  %7 = load i64, ptr %arrayidx.7, align 8
545  %tobool.not.7 = icmp eq i64 %7, 0
546  %or.7 = select i1 %tobool.not.7, i64 0, i64 128
547  %mask.1.7 = or i64 %or.7, %mask.1.6
548  ret i64 %mask.1.7
549}
550