xref: /llvm-project/llvm/test/CodeGen/WebAssembly/simd-bitmask.ll (revision 18077e9fd688443ca111111541e7e3a71236efd5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mattr=+simd128 | FileCheck %s
3
4;; Test that SIMD bitmask instruction can be selected
5
6target triple = "wasm32-unknown-unknown"
7
8define i16 @bitmask_v16i8(<16 x i8> %v) {
9; CHECK-LABEL: bitmask_v16i8:
10; CHECK:         .functype bitmask_v16i8 (v128) -> (i32)
11; CHECK-NEXT:  # %bb.0:
12; CHECK-NEXT:    local.get 0
13; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
14; CHECK-NEXT:    i8x16.eq
15; CHECK-NEXT:    i8x16.bitmask
16; CHECK-NEXT:    # fallthrough-return
17  %cmp = icmp eq <16 x i8> %v, zeroinitializer
18  %bitmask = bitcast <16 x i1> %cmp to i16
19  ret i16 %bitmask
20}
21
22define i8 @bitmask_v8i16(<8 x i16> %v) {
23; CHECK-LABEL: bitmask_v8i16:
24; CHECK:         .functype bitmask_v8i16 (v128) -> (i32)
25; CHECK-NEXT:  # %bb.0:
26; CHECK-NEXT:    local.get 0
27; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0
28; CHECK-NEXT:    i16x8.eq
29; CHECK-NEXT:    i16x8.bitmask
30; CHECK-NEXT:    # fallthrough-return
31  %cmp = icmp eq <8 x i16> %v, zeroinitializer
32  %bitmask = bitcast <8 x i1> %cmp to i8
33  ret i8 %bitmask
34}
35
36define i8 @bitmask_v4i32(<4 x i32> %v) {
37; CHECK-LABEL: bitmask_v4i32:
38; CHECK:         .functype bitmask_v4i32 (v128) -> (i32)
39; CHECK-NEXT:  # %bb.0:
40; CHECK-NEXT:    local.get 0
41; CHECK-NEXT:    v128.const 0, 0, 0, 0
42; CHECK-NEXT:    i32x4.eq
43; CHECK-NEXT:    i32x4.bitmask
44; CHECK-NEXT:    # fallthrough-return
45  %cmp = icmp eq <4 x i32> %v, zeroinitializer
46  %bitmask = bitcast <4 x i1> %cmp to i4
47  %ext = zext i4 %bitmask to i8
48  ret i8 %ext
49}
50
51define i8 @bitmask_v2i64(<2 x i64> %v) {
52; CHECK-LABEL: bitmask_v2i64:
53; CHECK:         .functype bitmask_v2i64 (v128) -> (i32)
54; CHECK-NEXT:  # %bb.0:
55; CHECK-NEXT:    local.get 0
56; CHECK-NEXT:    v128.const 0, 0
57; CHECK-NEXT:    i64x2.eq
58; CHECK-NEXT:    i64x2.bitmask
59; CHECK-NEXT:    # fallthrough-return
60  %cmp = icmp eq <2 x i64> %v, zeroinitializer
61  %bitmask = bitcast <2 x i1> %cmp to i2
62  %ext = zext i2 %bitmask to i8
63  ret i8 %ext
64}
65
66;; Test unusual vectors
67
68define i1 @bitmask_v1i8(<1 x i8> %v) {
69; CHECK-LABEL: bitmask_v1i8:
70; CHECK:         .functype bitmask_v1i8 (v128) -> (i32)
71; CHECK-NEXT:  # %bb.0:
72; CHECK-NEXT:    local.get 0
73; CHECK-NEXT:    i8x16.extract_lane_u 0
74; CHECK-NEXT:    i32.eqz
75; CHECK-NEXT:    # fallthrough-return
76  %cmp = icmp eq <1 x i8> %v, zeroinitializer
77  %bitmask = bitcast <1 x i1> %cmp to i1
78  ret i1 %bitmask
79}
80
81define i7 @bitmask_v7i8(<7 x i8> %v) {
82; CHECK-LABEL: bitmask_v7i8:
83; CHECK:         .functype bitmask_v7i8 (i32, i32, i32, i32, i32, i32, i32) -> (i32)
84; CHECK-NEXT:    .local v128
85; CHECK-NEXT:  # %bb.0:
86; CHECK-NEXT:    global.get __stack_pointer
87; CHECK-NEXT:    i32.const 16
88; CHECK-NEXT:    i32.sub
89; CHECK-NEXT:    drop
90; CHECK-NEXT:    local.get 0
91; CHECK-NEXT:    i8x16.splat
92; CHECK-NEXT:    local.get 1
93; CHECK-NEXT:    i8x16.replace_lane 1
94; CHECK-NEXT:    local.get 2
95; CHECK-NEXT:    i8x16.replace_lane 2
96; CHECK-NEXT:    local.get 3
97; CHECK-NEXT:    i8x16.replace_lane 3
98; CHECK-NEXT:    local.get 4
99; CHECK-NEXT:    i8x16.replace_lane 4
100; CHECK-NEXT:    local.get 5
101; CHECK-NEXT:    i8x16.replace_lane 5
102; CHECK-NEXT:    local.get 6
103; CHECK-NEXT:    i8x16.replace_lane 6
104; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
105; CHECK-NEXT:    i8x16.eq
106; CHECK-NEXT:    local.tee 7
107; CHECK-NEXT:    i16x8.extract_lane_u 0
108; CHECK-NEXT:    i32.const 1
109; CHECK-NEXT:    i32.and
110; CHECK-NEXT:    local.get 7
111; CHECK-NEXT:    i16x8.extend_low_i8x16_s
112; CHECK-NEXT:    local.tee 7
113; CHECK-NEXT:    i16x8.extract_lane_u 1
114; CHECK-NEXT:    i32.const 1
115; CHECK-NEXT:    i32.and
116; CHECK-NEXT:    i32.const 1
117; CHECK-NEXT:    i32.shl
118; CHECK-NEXT:    i32.or
119; CHECK-NEXT:    local.get 7
120; CHECK-NEXT:    i16x8.extract_lane_u 2
121; CHECK-NEXT:    i32.const 1
122; CHECK-NEXT:    i32.and
123; CHECK-NEXT:    i32.const 2
124; CHECK-NEXT:    i32.shl
125; CHECK-NEXT:    i32.or
126; CHECK-NEXT:    local.get 7
127; CHECK-NEXT:    i16x8.extract_lane_u 3
128; CHECK-NEXT:    i32.const 1
129; CHECK-NEXT:    i32.and
130; CHECK-NEXT:    i32.const 3
131; CHECK-NEXT:    i32.shl
132; CHECK-NEXT:    i32.or
133; CHECK-NEXT:    local.get 7
134; CHECK-NEXT:    i16x8.extract_lane_u 4
135; CHECK-NEXT:    i32.const 1
136; CHECK-NEXT:    i32.and
137; CHECK-NEXT:    i32.const 4
138; CHECK-NEXT:    i32.shl
139; CHECK-NEXT:    i32.or
140; CHECK-NEXT:    local.get 7
141; CHECK-NEXT:    i16x8.extract_lane_u 5
142; CHECK-NEXT:    i32.const 1
143; CHECK-NEXT:    i32.and
144; CHECK-NEXT:    i32.const 5
145; CHECK-NEXT:    i32.shl
146; CHECK-NEXT:    i32.or
147; CHECK-NEXT:    local.get 7
148; CHECK-NEXT:    i16x8.extract_lane_u 6
149; CHECK-NEXT:    i32.const 6
150; CHECK-NEXT:    i32.shl
151; CHECK-NEXT:    i32.or
152; CHECK-NEXT:    i32.const 127
153; CHECK-NEXT:    i32.and
154; CHECK-NEXT:    # fallthrough-return
155  %cmp = icmp eq <7 x i8> %v, zeroinitializer
156  %bitmask = bitcast <7 x i1> %cmp to i7
157  ret i7 %bitmask
158}
159
160define i8 @bitmask_v8i8(<8 x i8> %v) {
161; CHECK-LABEL: bitmask_v8i8:
162; CHECK:         .functype bitmask_v8i8 (v128) -> (i32)
163; CHECK-NEXT:  # %bb.0:
164; CHECK-NEXT:    local.get 0
165; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
166; CHECK-NEXT:    i8x16.eq
167; CHECK-NEXT:    i16x8.extend_low_i8x16_s
168; CHECK-NEXT:    i16x8.bitmask
169; CHECK-NEXT:    # fallthrough-return
170  %cmp = icmp eq <8 x i8> %v, zeroinitializer
171  %bitmask = bitcast <8 x i1> %cmp to i8
172  ret i8 %bitmask
173}
174
175define i32 @bitmask_v32i8(<32 x i8> %v) {
176; CHECK-LABEL: bitmask_v32i8:
177; CHECK:         .functype bitmask_v32i8 (v128, v128) -> (i32)
178; CHECK-NEXT:    .local v128
179; CHECK-NEXT:  # %bb.0:
180; CHECK-NEXT:    global.get __stack_pointer
181; CHECK-NEXT:    i32.const 16
182; CHECK-NEXT:    i32.sub
183; CHECK-NEXT:    drop
184; CHECK-NEXT:    local.get 0
185; CHECK-NEXT:    v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
186; CHECK-NEXT:    local.tee 2
187; CHECK-NEXT:    i8x16.eq
188; CHECK-NEXT:    local.tee 0
189; CHECK-NEXT:    i8x16.extract_lane_u 0
190; CHECK-NEXT:    i32.const 1
191; CHECK-NEXT:    i32.and
192; CHECK-NEXT:    local.get 0
193; CHECK-NEXT:    i8x16.extract_lane_u 1
194; CHECK-NEXT:    i32.const 1
195; CHECK-NEXT:    i32.and
196; CHECK-NEXT:    i32.const 1
197; CHECK-NEXT:    i32.shl
198; CHECK-NEXT:    i32.or
199; CHECK-NEXT:    local.get 0
200; CHECK-NEXT:    i8x16.extract_lane_u 2
201; CHECK-NEXT:    i32.const 1
202; CHECK-NEXT:    i32.and
203; CHECK-NEXT:    i32.const 2
204; CHECK-NEXT:    i32.shl
205; CHECK-NEXT:    i32.or
206; CHECK-NEXT:    local.get 0
207; CHECK-NEXT:    i8x16.extract_lane_u 3
208; CHECK-NEXT:    i32.const 1
209; CHECK-NEXT:    i32.and
210; CHECK-NEXT:    i32.const 3
211; CHECK-NEXT:    i32.shl
212; CHECK-NEXT:    i32.or
213; CHECK-NEXT:    local.get 0
214; CHECK-NEXT:    i8x16.extract_lane_u 4
215; CHECK-NEXT:    i32.const 1
216; CHECK-NEXT:    i32.and
217; CHECK-NEXT:    i32.const 4
218; CHECK-NEXT:    i32.shl
219; CHECK-NEXT:    i32.or
220; CHECK-NEXT:    local.get 0
221; CHECK-NEXT:    i8x16.extract_lane_u 5
222; CHECK-NEXT:    i32.const 1
223; CHECK-NEXT:    i32.and
224; CHECK-NEXT:    i32.const 5
225; CHECK-NEXT:    i32.shl
226; CHECK-NEXT:    i32.or
227; CHECK-NEXT:    local.get 0
228; CHECK-NEXT:    i8x16.extract_lane_u 6
229; CHECK-NEXT:    i32.const 1
230; CHECK-NEXT:    i32.and
231; CHECK-NEXT:    i32.const 6
232; CHECK-NEXT:    i32.shl
233; CHECK-NEXT:    i32.or
234; CHECK-NEXT:    local.get 0
235; CHECK-NEXT:    i8x16.extract_lane_u 7
236; CHECK-NEXT:    i32.const 1
237; CHECK-NEXT:    i32.and
238; CHECK-NEXT:    i32.const 7
239; CHECK-NEXT:    i32.shl
240; CHECK-NEXT:    i32.or
241; CHECK-NEXT:    local.get 0
242; CHECK-NEXT:    i8x16.extract_lane_u 8
243; CHECK-NEXT:    i32.const 1
244; CHECK-NEXT:    i32.and
245; CHECK-NEXT:    i32.const 8
246; CHECK-NEXT:    i32.shl
247; CHECK-NEXT:    i32.or
248; CHECK-NEXT:    local.get 0
249; CHECK-NEXT:    i8x16.extract_lane_u 9
250; CHECK-NEXT:    i32.const 1
251; CHECK-NEXT:    i32.and
252; CHECK-NEXT:    i32.const 9
253; CHECK-NEXT:    i32.shl
254; CHECK-NEXT:    i32.or
255; CHECK-NEXT:    local.get 0
256; CHECK-NEXT:    i8x16.extract_lane_u 10
257; CHECK-NEXT:    i32.const 1
258; CHECK-NEXT:    i32.and
259; CHECK-NEXT:    i32.const 10
260; CHECK-NEXT:    i32.shl
261; CHECK-NEXT:    i32.or
262; CHECK-NEXT:    local.get 0
263; CHECK-NEXT:    i8x16.extract_lane_u 11
264; CHECK-NEXT:    i32.const 1
265; CHECK-NEXT:    i32.and
266; CHECK-NEXT:    i32.const 11
267; CHECK-NEXT:    i32.shl
268; CHECK-NEXT:    i32.or
269; CHECK-NEXT:    local.get 0
270; CHECK-NEXT:    i8x16.extract_lane_u 12
271; CHECK-NEXT:    i32.const 1
272; CHECK-NEXT:    i32.and
273; CHECK-NEXT:    i32.const 12
274; CHECK-NEXT:    i32.shl
275; CHECK-NEXT:    i32.or
276; CHECK-NEXT:    local.get 0
277; CHECK-NEXT:    i8x16.extract_lane_u 13
278; CHECK-NEXT:    i32.const 1
279; CHECK-NEXT:    i32.and
280; CHECK-NEXT:    i32.const 13
281; CHECK-NEXT:    i32.shl
282; CHECK-NEXT:    i32.or
283; CHECK-NEXT:    local.get 0
284; CHECK-NEXT:    i8x16.extract_lane_u 14
285; CHECK-NEXT:    i32.const 1
286; CHECK-NEXT:    i32.and
287; CHECK-NEXT:    i32.const 14
288; CHECK-NEXT:    i32.shl
289; CHECK-NEXT:    i32.or
290; CHECK-NEXT:    local.get 0
291; CHECK-NEXT:    i8x16.extract_lane_u 15
292; CHECK-NEXT:    i32.const 15
293; CHECK-NEXT:    i32.shl
294; CHECK-NEXT:    i32.or
295; CHECK-NEXT:    i32.const 65535
296; CHECK-NEXT:    i32.and
297; CHECK-NEXT:    local.get 1
298; CHECK-NEXT:    local.get 2
299; CHECK-NEXT:    i8x16.eq
300; CHECK-NEXT:    local.tee 0
301; CHECK-NEXT:    i8x16.extract_lane_u 15
302; CHECK-NEXT:    i32.const 31
303; CHECK-NEXT:    i32.shl
304; CHECK-NEXT:    local.get 0
305; CHECK-NEXT:    i8x16.extract_lane_u 14
306; CHECK-NEXT:    i32.const 1
307; CHECK-NEXT:    i32.and
308; CHECK-NEXT:    i32.const 30
309; CHECK-NEXT:    i32.shl
310; CHECK-NEXT:    local.get 0
311; CHECK-NEXT:    i8x16.extract_lane_u 13
312; CHECK-NEXT:    i32.const 1
313; CHECK-NEXT:    i32.and
314; CHECK-NEXT:    i32.const 29
315; CHECK-NEXT:    i32.shl
316; CHECK-NEXT:    local.get 0
317; CHECK-NEXT:    i8x16.extract_lane_u 12
318; CHECK-NEXT:    i32.const 1
319; CHECK-NEXT:    i32.and
320; CHECK-NEXT:    i32.const 28
321; CHECK-NEXT:    i32.shl
322; CHECK-NEXT:    local.get 0
323; CHECK-NEXT:    i8x16.extract_lane_u 11
324; CHECK-NEXT:    i32.const 1
325; CHECK-NEXT:    i32.and
326; CHECK-NEXT:    i32.const 27
327; CHECK-NEXT:    i32.shl
328; CHECK-NEXT:    local.get 0
329; CHECK-NEXT:    i8x16.extract_lane_u 10
330; CHECK-NEXT:    i32.const 1
331; CHECK-NEXT:    i32.and
332; CHECK-NEXT:    i32.const 26
333; CHECK-NEXT:    i32.shl
334; CHECK-NEXT:    local.get 0
335; CHECK-NEXT:    i8x16.extract_lane_u 9
336; CHECK-NEXT:    i32.const 1
337; CHECK-NEXT:    i32.and
338; CHECK-NEXT:    i32.const 25
339; CHECK-NEXT:    i32.shl
340; CHECK-NEXT:    local.get 0
341; CHECK-NEXT:    i8x16.extract_lane_u 8
342; CHECK-NEXT:    i32.const 1
343; CHECK-NEXT:    i32.and
344; CHECK-NEXT:    i32.const 24
345; CHECK-NEXT:    i32.shl
346; CHECK-NEXT:    local.get 0
347; CHECK-NEXT:    i8x16.extract_lane_u 7
348; CHECK-NEXT:    i32.const 1
349; CHECK-NEXT:    i32.and
350; CHECK-NEXT:    i32.const 23
351; CHECK-NEXT:    i32.shl
352; CHECK-NEXT:    local.get 0
353; CHECK-NEXT:    i8x16.extract_lane_u 6
354; CHECK-NEXT:    i32.const 1
355; CHECK-NEXT:    i32.and
356; CHECK-NEXT:    i32.const 22
357; CHECK-NEXT:    i32.shl
358; CHECK-NEXT:    local.get 0
359; CHECK-NEXT:    i8x16.extract_lane_u 5
360; CHECK-NEXT:    i32.const 1
361; CHECK-NEXT:    i32.and
362; CHECK-NEXT:    i32.const 21
363; CHECK-NEXT:    i32.shl
364; CHECK-NEXT:    local.get 0
365; CHECK-NEXT:    i8x16.extract_lane_u 4
366; CHECK-NEXT:    i32.const 1
367; CHECK-NEXT:    i32.and
368; CHECK-NEXT:    i32.const 20
369; CHECK-NEXT:    i32.shl
370; CHECK-NEXT:    local.get 0
371; CHECK-NEXT:    i8x16.extract_lane_u 3
372; CHECK-NEXT:    i32.const 1
373; CHECK-NEXT:    i32.and
374; CHECK-NEXT:    i32.const 19
375; CHECK-NEXT:    i32.shl
376; CHECK-NEXT:    local.get 0
377; CHECK-NEXT:    i8x16.extract_lane_u 2
378; CHECK-NEXT:    i32.const 1
379; CHECK-NEXT:    i32.and
380; CHECK-NEXT:    i32.const 18
381; CHECK-NEXT:    i32.shl
382; CHECK-NEXT:    local.get 0
383; CHECK-NEXT:    i8x16.extract_lane_u 1
384; CHECK-NEXT:    i32.const 1
385; CHECK-NEXT:    i32.and
386; CHECK-NEXT:    i32.const 17
387; CHECK-NEXT:    i32.shl
388; CHECK-NEXT:    local.get 0
389; CHECK-NEXT:    i8x16.extract_lane_u 0
390; CHECK-NEXT:    i32.const 1
391; CHECK-NEXT:    i32.and
392; CHECK-NEXT:    i32.const 16
393; CHECK-NEXT:    i32.shl
394; CHECK-NEXT:    i32.or
395; CHECK-NEXT:    i32.or
396; CHECK-NEXT:    i32.or
397; CHECK-NEXT:    i32.or
398; CHECK-NEXT:    i32.or
399; CHECK-NEXT:    i32.or
400; CHECK-NEXT:    i32.or
401; CHECK-NEXT:    i32.or
402; CHECK-NEXT:    i32.or
403; CHECK-NEXT:    i32.or
404; CHECK-NEXT:    i32.or
405; CHECK-NEXT:    i32.or
406; CHECK-NEXT:    i32.or
407; CHECK-NEXT:    i32.or
408; CHECK-NEXT:    i32.or
409; CHECK-NEXT:    i32.or
410; CHECK-NEXT:    # fallthrough-return
411  %cmp = icmp eq <32 x i8> %v, zeroinitializer
412  %bitmask = bitcast <32 x i1> %cmp to i32
413  ret i32 %bitmask
414}
415