xref: /llvm-project/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll (revision eff6b642583ace53aaed7947b92a43bcba283866)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5; Basic tests from input vector to bitmask
6; IR generated from clang for:
7; __builtin_convertvector + reinterpret_cast<uint16&>
8
9; CHECK-GI:       warning: Instruction selection used fallback path for convert_to_bitmask2
10; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4
11; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for convert_to_bitmask_2xi32
12; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for convert_to_bitmask_8xi2
13; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat
14
15define i16 @convert_to_bitmask16(<16 x i8> %vec) {
16; Bits used in mask
17; CHECK-SD-LABEL: convert_to_bitmask16:
18; CHECK-SD:       ; %bb.0:
19; CHECK-SD-NEXT:    adrp x8, lCPI0_0@PAGE
20; CHECK-SD-NEXT:    cmeq.16b v0, v0, #0
21; CHECK-SD-NEXT:    ldr q1, [x8, lCPI0_0@PAGEOFF]
22; CHECK-SD-NEXT:    bic.16b v0, v1, v0
23; CHECK-SD-NEXT:    ext.16b v1, v0, v0, #8
24; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
25; CHECK-SD-NEXT:    addv.8h h0, v0
26; CHECK-SD-NEXT:    fmov w0, s0
27; CHECK-SD-NEXT:    ret
28;
29; CHECK-GI-LABEL: convert_to_bitmask16:
30; CHECK-GI:       ; %bb.0:
31; CHECK-GI-NEXT:    sub sp, sp, #16
32; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
33; CHECK-GI-NEXT:    cmeq.16b v0, v0, #0
34; CHECK-GI-NEXT:    mvn.16b v0, v0
35; CHECK-GI-NEXT:    umov.b w8, v0[1]
36; CHECK-GI-NEXT:    umov.b w9, v0[0]
37; CHECK-GI-NEXT:    umov.b w10, v0[2]
38; CHECK-GI-NEXT:    umov.b w11, v0[3]
39; CHECK-GI-NEXT:    and w8, w8, #0x1
40; CHECK-GI-NEXT:    bfi w9, w8, #1, #31
41; CHECK-GI-NEXT:    and w8, w10, #0x1
42; CHECK-GI-NEXT:    umov.b w10, v0[4]
43; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #2
44; CHECK-GI-NEXT:    and w9, w11, #0x1
45; CHECK-GI-NEXT:    umov.b w11, v0[5]
46; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
47; CHECK-GI-NEXT:    and w9, w10, #0x1
48; CHECK-GI-NEXT:    umov.b w10, v0[6]
49; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #4
50; CHECK-GI-NEXT:    and w9, w11, #0x1
51; CHECK-GI-NEXT:    umov.b w11, v0[7]
52; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #5
53; CHECK-GI-NEXT:    and w9, w10, #0x1
54; CHECK-GI-NEXT:    umov.b w10, v0[8]
55; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #6
56; CHECK-GI-NEXT:    and w9, w11, #0x1
57; CHECK-GI-NEXT:    umov.b w11, v0[9]
58; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #7
59; CHECK-GI-NEXT:    and w9, w10, #0x1
60; CHECK-GI-NEXT:    umov.b w10, v0[10]
61; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #8
62; CHECK-GI-NEXT:    and w9, w11, #0x1
63; CHECK-GI-NEXT:    umov.b w11, v0[11]
64; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #9
65; CHECK-GI-NEXT:    and w9, w10, #0x1
66; CHECK-GI-NEXT:    umov.b w10, v0[12]
67; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #10
68; CHECK-GI-NEXT:    and w9, w11, #0x1
69; CHECK-GI-NEXT:    umov.b w11, v0[13]
70; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #11
71; CHECK-GI-NEXT:    and w9, w10, #0x1
72; CHECK-GI-NEXT:    umov.b w10, v0[14]
73; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #12
74; CHECK-GI-NEXT:    and w9, w11, #0x1
75; CHECK-GI-NEXT:    umov.b w11, v0[15]
76; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #13
77; CHECK-GI-NEXT:    and w9, w10, #0x1
78; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #14
79; CHECK-GI-NEXT:    and w9, w11, #0x1
80; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #15
81; CHECK-GI-NEXT:    strh w8, [sp, #14]
82; CHECK-GI-NEXT:    and w0, w8, #0xffff
83; CHECK-GI-NEXT:    add sp, sp, #16
84; CHECK-GI-NEXT:    ret
85
86; Actual conversion
87
88  %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer
89  %bitmask = bitcast <16 x i1> %cmp_result to i16
90  ret i16 %bitmask
91}
92
93define i16 @convert_to_bitmask8(<8 x i16> %vec) {
94; CHECK-SD-LABEL: convert_to_bitmask8:
95; CHECK-SD:       ; %bb.0:
96; CHECK-SD-NEXT:    adrp x8, lCPI1_0@PAGE
97; CHECK-SD-NEXT:    cmeq.8h v0, v0, #0
98; CHECK-SD-NEXT:    ldr q1, [x8, lCPI1_0@PAGEOFF]
99; CHECK-SD-NEXT:    bic.16b v0, v1, v0
100; CHECK-SD-NEXT:    addv.8h h0, v0
101; CHECK-SD-NEXT:    fmov w8, s0
102; CHECK-SD-NEXT:    and w0, w8, #0xff
103; CHECK-SD-NEXT:    ret
104;
105; CHECK-GI-LABEL: convert_to_bitmask8:
106; CHECK-GI:       ; %bb.0:
107; CHECK-GI-NEXT:    sub sp, sp, #16
108; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
109; CHECK-GI-NEXT:    cmeq.8h v0, v0, #0
110; CHECK-GI-NEXT:    mvn.16b v0, v0
111; CHECK-GI-NEXT:    xtn.8b v0, v0
112; CHECK-GI-NEXT:    umov.b w8, v0[1]
113; CHECK-GI-NEXT:    umov.b w9, v0[0]
114; CHECK-GI-NEXT:    umov.b w10, v0[2]
115; CHECK-GI-NEXT:    umov.b w11, v0[3]
116; CHECK-GI-NEXT:    and w8, w8, #0x1
117; CHECK-GI-NEXT:    bfi w9, w8, #1, #31
118; CHECK-GI-NEXT:    and w8, w10, #0x1
119; CHECK-GI-NEXT:    umov.b w10, v0[4]
120; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #2
121; CHECK-GI-NEXT:    and w9, w11, #0x1
122; CHECK-GI-NEXT:    umov.b w11, v0[5]
123; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
124; CHECK-GI-NEXT:    and w9, w10, #0x1
125; CHECK-GI-NEXT:    umov.b w10, v0[6]
126; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #4
127; CHECK-GI-NEXT:    and w9, w11, #0x1
128; CHECK-GI-NEXT:    umov.b w11, v0[7]
129; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #5
130; CHECK-GI-NEXT:    and w9, w10, #0x1
131; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #6
132; CHECK-GI-NEXT:    and w9, w11, #0x1
133; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #7
134; CHECK-GI-NEXT:    strb w8, [sp, #15]
135; CHECK-GI-NEXT:    and w0, w8, #0xff
136; CHECK-GI-NEXT:    add sp, sp, #16
137; CHECK-GI-NEXT:    ret
138
139
140  %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
141  %bitmask = bitcast <8 x i1> %cmp_result to i8
142  %extended_bitmask = zext i8 %bitmask to i16
143  ret i16 %extended_bitmask
144}
145
146define i4 @convert_to_bitmask4(<4 x i32> %vec) {
147; CHECK-SD-LABEL: convert_to_bitmask4:
148; CHECK-SD:       ; %bb.0:
149; CHECK-SD-NEXT:    adrp x8, lCPI2_0@PAGE
150; CHECK-SD-NEXT:    cmeq.4s v0, v0, #0
151; CHECK-SD-NEXT:    ldr q1, [x8, lCPI2_0@PAGEOFF]
152; CHECK-SD-NEXT:    bic.16b v0, v1, v0
153; CHECK-SD-NEXT:    addv.4s s0, v0
154; CHECK-SD-NEXT:    fmov w0, s0
155; CHECK-SD-NEXT:    ret
156;
157; CHECK-GI-LABEL: convert_to_bitmask4:
158; CHECK-GI:       ; %bb.0:
159; CHECK-GI-NEXT:    sub sp, sp, #16
160; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
161; CHECK-GI-NEXT:    cmeq.4s v0, v0, #0
162; CHECK-GI-NEXT:    mvn.16b v0, v0
163; CHECK-GI-NEXT:    mov.s w8, v0[1]
164; CHECK-GI-NEXT:    mov.s w9, v0[2]
165; CHECK-GI-NEXT:    fmov w11, s0
166; CHECK-GI-NEXT:    mov.s w10, v0[3]
167; CHECK-GI-NEXT:    and w8, w8, #0x1
168; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
169; CHECK-GI-NEXT:    and w8, w9, #0x1
170; CHECK-GI-NEXT:    and w9, w10, #0x1
171; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
172; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
173; CHECK-GI-NEXT:    strb w8, [sp, #15]
174; CHECK-GI-NEXT:    and w0, w8, #0xff
175; CHECK-GI-NEXT:    add sp, sp, #16
176; CHECK-GI-NEXT:    ret
177
178
179  %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
180  %bitmask = bitcast <4 x i1> %cmp_result to i4
181  ret i4 %bitmask
182}
183
184define i8 @convert_to_bitmask2(<2 x i64> %vec) {
185; CHECK-LABEL: convert_to_bitmask2:
186; CHECK:       ; %bb.0:
187; CHECK-NEXT:    adrp x8, lCPI3_0@PAGE
188; CHECK-NEXT:    cmeq.2d v0, v0, #0
189; CHECK-NEXT:    ldr q1, [x8, lCPI3_0@PAGEOFF]
190; CHECK-NEXT:    bic.16b v0, v1, v0
191; CHECK-NEXT:    addp.2d d0, v0
192; CHECK-NEXT:    fmov w8, s0
193; CHECK-NEXT:    and w0, w8, #0x3
194; CHECK-NEXT:    ret
195
196
197  %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer
198  %bitmask = bitcast <2 x i1> %cmp_result to i2
199  %extended_bitmask = zext i2 %bitmask to i8
200  ret i8 %extended_bitmask
201}
202
203; Clang's __builtin_convertvector adds an undef vector concat for vectors with <8 elements.
204define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) {
205; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4:
206; CHECK:       ; %bb.0:
207; CHECK-NEXT:    adrp x8, lCPI4_0@PAGE
208; CHECK-NEXT:    cmeq.4s v0, v0, #0
209; CHECK-NEXT:    ldr q1, [x8, lCPI4_0@PAGEOFF]
210; CHECK-NEXT:    bic.16b v0, v1, v0
211; CHECK-NEXT:    addv.4s s0, v0
212; CHECK-NEXT:    fmov w0, s0
213; CHECK-NEXT:    ret
214
215
216  %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
217  %vector_pad = shufflevector <4 x i1> %cmp_result, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
218  %bitmask = bitcast <8 x i1> %vector_pad to i8
219  ret i8 %bitmask
220}
221
222
223define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) {
224; CHECK-SD-LABEL: convert_to_bitmask_no_compare:
225; CHECK-SD:       ; %bb.0:
226; CHECK-SD-NEXT:    and.16b v0, v0, v1
227; CHECK-SD-NEXT:    adrp x8, lCPI5_0@PAGE
228; CHECK-SD-NEXT:    ldr q1, [x8, lCPI5_0@PAGEOFF]
229; CHECK-SD-NEXT:    shl.4s v0, v0, #31
230; CHECK-SD-NEXT:    cmlt.4s v0, v0, #0
231; CHECK-SD-NEXT:    and.16b v0, v0, v1
232; CHECK-SD-NEXT:    addv.4s s0, v0
233; CHECK-SD-NEXT:    fmov w0, s0
234; CHECK-SD-NEXT:    ret
235;
236; CHECK-GI-LABEL: convert_to_bitmask_no_compare:
237; CHECK-GI:       ; %bb.0:
238; CHECK-GI-NEXT:    sub sp, sp, #16
239; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
240; CHECK-GI-NEXT:    and.16b v0, v0, v1
241; CHECK-GI-NEXT:    mov.s w8, v0[1]
242; CHECK-GI-NEXT:    mov.s w9, v0[2]
243; CHECK-GI-NEXT:    fmov w11, s0
244; CHECK-GI-NEXT:    mov.s w10, v0[3]
245; CHECK-GI-NEXT:    and w8, w8, #0x1
246; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
247; CHECK-GI-NEXT:    and w8, w9, #0x1
248; CHECK-GI-NEXT:    and w9, w10, #0x1
249; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
250; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
251; CHECK-GI-NEXT:    strb w8, [sp, #15]
252; CHECK-GI-NEXT:    and w0, w8, #0xff
253; CHECK-GI-NEXT:    add sp, sp, #16
254; CHECK-GI-NEXT:    ret
255
256
257  %cmp = and <4 x i32> %vec1, %vec2
258  %trunc = trunc <4 x i32> %cmp to <4 x i1>
259  %bitmask = bitcast <4 x i1> %trunc to i4
260  ret i4 %bitmask
261}
262
263define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec2) {
264; CHECK-SD-LABEL: convert_to_bitmask_with_compare_chain:
265; CHECK-SD:       ; %bb.0:
266; CHECK-SD-NEXT:    cmeq.4s v2, v0, #0
267; CHECK-SD-NEXT:    cmeq.4s v0, v0, v1
268; CHECK-SD-NEXT:    adrp x8, lCPI6_0@PAGE
269; CHECK-SD-NEXT:    ldr q1, [x8, lCPI6_0@PAGEOFF]
270; CHECK-SD-NEXT:    bic.16b v0, v0, v2
271; CHECK-SD-NEXT:    and.16b v0, v0, v1
272; CHECK-SD-NEXT:    addv.4s s0, v0
273; CHECK-SD-NEXT:    fmov w0, s0
274; CHECK-SD-NEXT:    ret
275;
276; CHECK-GI-LABEL: convert_to_bitmask_with_compare_chain:
277; CHECK-GI:       ; %bb.0:
278; CHECK-GI-NEXT:    sub sp, sp, #16
279; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
280; CHECK-GI-NEXT:    cmeq.4s v2, v0, #0
281; CHECK-GI-NEXT:    cmeq.4s v0, v0, v1
282; CHECK-GI-NEXT:    bic.16b v0, v0, v2
283; CHECK-GI-NEXT:    mov.s w8, v0[1]
284; CHECK-GI-NEXT:    mov.s w9, v0[2]
285; CHECK-GI-NEXT:    fmov w11, s0
286; CHECK-GI-NEXT:    mov.s w10, v0[3]
287; CHECK-GI-NEXT:    and w8, w8, #0x1
288; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
289; CHECK-GI-NEXT:    and w8, w9, #0x1
290; CHECK-GI-NEXT:    and w9, w10, #0x1
291; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
292; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
293; CHECK-GI-NEXT:    strb w8, [sp, #15]
294; CHECK-GI-NEXT:    and w0, w8, #0xff
295; CHECK-GI-NEXT:    add sp, sp, #16
296; CHECK-GI-NEXT:    ret
297
298
299  %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
300  %cmp2 = icmp eq <4 x i32> %vec1, %vec2
301  %cmp3 = and <4 x i1> %cmp1, %cmp2
302  %bitmask = bitcast <4 x i1> %cmp3 to i4
303  ret i4 %bitmask
304}
305
306define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %vec2) {
307; CHECK-SD-LABEL: convert_to_bitmask_with_trunc_in_chain:
308; CHECK-SD:       ; %bb.0:
309; CHECK-SD-NEXT:    cmeq.4s v0, v0, #0
310; CHECK-SD-NEXT:    adrp x8, lCPI7_0@PAGE
311; CHECK-SD-NEXT:    bic.16b v0, v1, v0
312; CHECK-SD-NEXT:    ldr q1, [x8, lCPI7_0@PAGEOFF]
313; CHECK-SD-NEXT:    shl.4s v0, v0, #31
314; CHECK-SD-NEXT:    cmlt.4s v0, v0, #0
315; CHECK-SD-NEXT:    and.16b v0, v0, v1
316; CHECK-SD-NEXT:    addv.4s s0, v0
317; CHECK-SD-NEXT:    fmov w0, s0
318; CHECK-SD-NEXT:    ret
319;
320; CHECK-GI-LABEL: convert_to_bitmask_with_trunc_in_chain:
321; CHECK-GI:       ; %bb.0:
322; CHECK-GI-NEXT:    sub sp, sp, #16
323; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
324; CHECK-GI-NEXT:    cmeq.4s v0, v0, #0
325; CHECK-GI-NEXT:    bic.16b v0, v1, v0
326; CHECK-GI-NEXT:    mov.s w8, v0[1]
327; CHECK-GI-NEXT:    mov.s w9, v0[2]
328; CHECK-GI-NEXT:    fmov w11, s0
329; CHECK-GI-NEXT:    mov.s w10, v0[3]
330; CHECK-GI-NEXT:    and w8, w8, #0x1
331; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
332; CHECK-GI-NEXT:    and w8, w9, #0x1
333; CHECK-GI-NEXT:    and w9, w10, #0x1
334; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
335; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
336; CHECK-GI-NEXT:    strb w8, [sp, #15]
337; CHECK-GI-NEXT:    and w0, w8, #0xff
338; CHECK-GI-NEXT:    add sp, sp, #16
339; CHECK-GI-NEXT:    ret
340
341
342  %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
343  %trunc_vec = trunc <4 x i32> %vec2 to <4 x i1>
344  %and_res = and <4 x i1> %cmp1, %trunc_vec
345  %bitmask = bitcast <4 x i1> %and_res to i4
346  ret i4 %bitmask
347}
348
349define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <4 x i32> %vec2) {
350; CHECK-SD-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain:
351; CHECK-SD:       ; %bb.0:
352; CHECK-SD-NEXT:    cmeq.4s v0, v0, #0
353; CHECK-SD-NEXT:    cmeq.4s v1, v1, #0
354; CHECK-SD-NEXT:    adrp x8, lCPI8_0@PAGE
355; CHECK-SD-NEXT:    movi d2, #0x000000ffffffff
356; CHECK-SD-NEXT:    movi d3, #0x00ffffffffffff
357; CHECK-SD-NEXT:    bic.16b v0, v1, v0
358; CHECK-SD-NEXT:    movi d1, #0xffff0000ffff0000
359; CHECK-SD-NEXT:    xtn.4h v0, v0
360; CHECK-SD-NEXT:    orr.8b v0, v0, v2
361; CHECK-SD-NEXT:    movi d2, #0x00ffffffff0000
362; CHECK-SD-NEXT:    eor.8b v1, v0, v1
363; CHECK-SD-NEXT:    eor.8b v0, v0, v2
364; CHECK-SD-NEXT:    mov.h v1[2], wzr
365; CHECK-SD-NEXT:    orr.8b v0, v0, v3
366; CHECK-SD-NEXT:    orr.8b v0, v1, v0
367; CHECK-SD-NEXT:    ldr d1, [x8, lCPI8_0@PAGEOFF]
368; CHECK-SD-NEXT:    shl.4h v0, v0, #15
369; CHECK-SD-NEXT:    cmlt.4h v0, v0, #0
370; CHECK-SD-NEXT:    and.8b v0, v0, v1
371; CHECK-SD-NEXT:    addv.4h h0, v0
372; CHECK-SD-NEXT:    fmov w0, s0
373; CHECK-SD-NEXT:    ret
374;
375; CHECK-GI-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain:
376; CHECK-GI:       ; %bb.0:
377; CHECK-GI-NEXT:    sub sp, sp, #16
378; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
379; CHECK-GI-NEXT:    mov w8, #1 ; =0x1
380; CHECK-GI-NEXT:    mov w9, #0 ; =0x0
381; CHECK-GI-NEXT:    cmeq.4s v5, v0, #0
382; CHECK-GI-NEXT:    fmov s2, w8
383; CHECK-GI-NEXT:    fmov s4, w9
384; CHECK-GI-NEXT:    cmeq.4s v1, v1, #0
385; CHECK-GI-NEXT:    mov.16b v3, v2
386; CHECK-GI-NEXT:    mov.16b v0, v4
387; CHECK-GI-NEXT:    mov.h v4[1], w8
388; CHECK-GI-NEXT:    bic.16b v1, v1, v5
389; CHECK-GI-NEXT:    mov.16b v5, v2
390; CHECK-GI-NEXT:    mov.h v2[1], w8
391; CHECK-GI-NEXT:    mov.h v3[1], w8
392; CHECK-GI-NEXT:    mov.h v0[1], w8
393; CHECK-GI-NEXT:    mov.h v5[1], w8
394; CHECK-GI-NEXT:    mov.h v4[2], w8
395; CHECK-GI-NEXT:    xtn.4h v1, v1
396; CHECK-GI-NEXT:    mov.h v2[2], w8
397; CHECK-GI-NEXT:    mov.h v3[2], w9
398; CHECK-GI-NEXT:    mov.h v0[2], w9
399; CHECK-GI-NEXT:    mov.h v5[2], w9
400; CHECK-GI-NEXT:    mov.h v4[3], w9
401; CHECK-GI-NEXT:    mov.h v2[3], w9
402; CHECK-GI-NEXT:    mov.h v3[3], w9
403; CHECK-GI-NEXT:    mov.h v0[3], w8
404; CHECK-GI-NEXT:    mov.h v5[3], w8
405; CHECK-GI-NEXT:    orr.8b v1, v1, v3
406; CHECK-GI-NEXT:    eor.8b v0, v1, v0
407; CHECK-GI-NEXT:    eor.8b v1, v4, v1
408; CHECK-GI-NEXT:    and.8b v0, v0, v5
409; CHECK-GI-NEXT:    orr.8b v1, v2, v1
410; CHECK-GI-NEXT:    orr.8b v0, v0, v1
411; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
412; CHECK-GI-NEXT:    mov.s w8, v0[1]
413; CHECK-GI-NEXT:    mov.s w9, v0[2]
414; CHECK-GI-NEXT:    fmov w11, s0
415; CHECK-GI-NEXT:    mov.s w10, v0[3]
416; CHECK-GI-NEXT:    and w8, w8, #0x1
417; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
418; CHECK-GI-NEXT:    and w8, w9, #0x1
419; CHECK-GI-NEXT:    and w9, w10, #0x1
420; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
421; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
422; CHECK-GI-NEXT:    strb w8, [sp, #15]
423; CHECK-GI-NEXT:    and w0, w8, #0xff
424; CHECK-GI-NEXT:    add sp, sp, #16
425; CHECK-GI-NEXT:    ret
426
427
428  %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
429  %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer
430
431  ; Artificially make this a long chain to hide the original type
432  %chain1 = and <4 x i1> %cmp1, %cmp2;
433  %chain2 = or <4 x i1> %chain1, <i1 1, i1 1, i1 0, i1 0>;
434  %chain3 = xor <4 x i1> %chain2, <i1 0, i1 1, i1 0, i1 1>;
435  %chain4 = and <4 x i1> %chain3, <i1 1, i1 1, i1 0, i1 1>;
436  %chain5 = or <4 x i1> %chain4, <i1 1, i1 1, i1 1, i1 0>;
437  %chain6 = xor <4 x i1> <i1 0, i1 1, i1 1, i1 0>, %chain2;
438  %chain7 = or <4 x i1> %chain5, %chain6;
439  %bitmask = bitcast <4 x i1> %chain7 to i4
440  ret i4 %bitmask
441}
442
443define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4 x i32> %vec2) {
444; CHECK-SD-LABEL: convert_to_bitmask_with_different_types_in_chain:
445; CHECK-SD:       ; %bb.0:
446; CHECK-SD-NEXT:    cmeq.4s v1, v1, #0
447; CHECK-SD-NEXT:    cmeq.4h v0, v0, #0
448; CHECK-SD-NEXT:    adrp x8, lCPI9_0@PAGE
449; CHECK-SD-NEXT:    xtn.4h v1, v1
450; CHECK-SD-NEXT:    orn.8b v0, v1, v0
451; CHECK-SD-NEXT:    ldr d1, [x8, lCPI9_0@PAGEOFF]
452; CHECK-SD-NEXT:    and.8b v0, v0, v1
453; CHECK-SD-NEXT:    addv.4h h0, v0
454; CHECK-SD-NEXT:    fmov w0, s0
455; CHECK-SD-NEXT:    ret
456;
457; CHECK-GI-LABEL: convert_to_bitmask_with_different_types_in_chain:
458; CHECK-GI:       ; %bb.0:
459; CHECK-GI-NEXT:    sub sp, sp, #16
460; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
461; CHECK-GI-NEXT:    cmeq.4s v1, v1, #0
462; CHECK-GI-NEXT:    cmeq.4h v0, v0, #0
463; CHECK-GI-NEXT:    xtn.4h v1, v1
464; CHECK-GI-NEXT:    orn.8b v0, v1, v0
465; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
466; CHECK-GI-NEXT:    mov.s w8, v0[1]
467; CHECK-GI-NEXT:    mov.s w9, v0[2]
468; CHECK-GI-NEXT:    fmov w11, s0
469; CHECK-GI-NEXT:    mov.s w10, v0[3]
470; CHECK-GI-NEXT:    and w8, w8, #0x1
471; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
472; CHECK-GI-NEXT:    and w8, w9, #0x1
473; CHECK-GI-NEXT:    and w9, w10, #0x1
474; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
475; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
476; CHECK-GI-NEXT:    strb w8, [sp, #15]
477; CHECK-GI-NEXT:    and w0, w8, #0xff
478; CHECK-GI-NEXT:    add sp, sp, #16
479; CHECK-GI-NEXT:    ret
480
481
482  %cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer
483  %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer
484  %chain1 = or <4 x i1> %cmp1, %cmp2
485  %bitmask = bitcast <4 x i1> %chain1 to i4
486  ret i4 %bitmask
487}
488
489define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
490; CHECK-SD-LABEL: convert_to_bitmask_without_knowing_type:
491; CHECK-SD:       ; %bb.0:
492; CHECK-SD-NEXT:    shl.16b v0, v0, #7
493; CHECK-SD-NEXT:    adrp x8, lCPI10_0@PAGE
494; CHECK-SD-NEXT:    ldr q1, [x8, lCPI10_0@PAGEOFF]
495; CHECK-SD-NEXT:    cmlt.16b v0, v0, #0
496; CHECK-SD-NEXT:    and.16b v0, v0, v1
497; CHECK-SD-NEXT:    ext.16b v1, v0, v0, #8
498; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
499; CHECK-SD-NEXT:    addv.8h h0, v0
500; CHECK-SD-NEXT:    fmov w0, s0
501; CHECK-SD-NEXT:    ret
502;
503; CHECK-GI-LABEL: convert_to_bitmask_without_knowing_type:
504; CHECK-GI:       ; %bb.0:
505; CHECK-GI-NEXT:    sub sp, sp, #16
506; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
507; CHECK-GI-NEXT:    umov.b w8, v0[1]
508; CHECK-GI-NEXT:    umov.b w9, v0[0]
509; CHECK-GI-NEXT:    umov.b w10, v0[2]
510; CHECK-GI-NEXT:    umov.b w11, v0[3]
511; CHECK-GI-NEXT:    and w8, w8, #0x1
512; CHECK-GI-NEXT:    bfi w9, w8, #1, #31
513; CHECK-GI-NEXT:    and w8, w10, #0x1
514; CHECK-GI-NEXT:    umov.b w10, v0[4]
515; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #2
516; CHECK-GI-NEXT:    and w9, w11, #0x1
517; CHECK-GI-NEXT:    umov.b w11, v0[5]
518; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
519; CHECK-GI-NEXT:    and w9, w10, #0x1
520; CHECK-GI-NEXT:    umov.b w10, v0[6]
521; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #4
522; CHECK-GI-NEXT:    and w9, w11, #0x1
523; CHECK-GI-NEXT:    umov.b w11, v0[7]
524; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #5
525; CHECK-GI-NEXT:    and w9, w10, #0x1
526; CHECK-GI-NEXT:    umov.b w10, v0[8]
527; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #6
528; CHECK-GI-NEXT:    and w9, w11, #0x1
529; CHECK-GI-NEXT:    umov.b w11, v0[9]
530; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #7
531; CHECK-GI-NEXT:    and w9, w10, #0x1
532; CHECK-GI-NEXT:    umov.b w10, v0[10]
533; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #8
534; CHECK-GI-NEXT:    and w9, w11, #0x1
535; CHECK-GI-NEXT:    umov.b w11, v0[11]
536; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #9
537; CHECK-GI-NEXT:    and w9, w10, #0x1
538; CHECK-GI-NEXT:    umov.b w10, v0[12]
539; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #10
540; CHECK-GI-NEXT:    and w9, w11, #0x1
541; CHECK-GI-NEXT:    umov.b w11, v0[13]
542; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #11
543; CHECK-GI-NEXT:    and w9, w10, #0x1
544; CHECK-GI-NEXT:    umov.b w10, v0[14]
545; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #12
546; CHECK-GI-NEXT:    and w9, w11, #0x1
547; CHECK-GI-NEXT:    umov.b w11, v0[15]
548; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #13
549; CHECK-GI-NEXT:    and w9, w10, #0x1
550; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #14
551; CHECK-GI-NEXT:    and w9, w11, #0x1
552; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #15
553; CHECK-GI-NEXT:    strh w8, [sp, #14]
554; CHECK-GI-NEXT:    and w0, w8, #0xffff
555; CHECK-GI-NEXT:    add sp, sp, #16
556; CHECK-GI-NEXT:    ret
557
558  %bitmask = bitcast <16 x i1> %vec to i16
559  ret i16 %bitmask
560}
561
562define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) {
563; CHECK-LABEL: convert_to_bitmask_2xi32:
564; CHECK:       ; %bb.0:
565; CHECK-NEXT:    adrp x8, lCPI11_0@PAGE
566; CHECK-NEXT:    cmeq.2s v0, v0, #0
567; CHECK-NEXT:    ldr d1, [x8, lCPI11_0@PAGEOFF]
568; CHECK-NEXT:    bic.8b v0, v1, v0
569; CHECK-NEXT:    addp.2s v0, v0, v0
570; CHECK-NEXT:    fmov w0, s0
571; CHECK-NEXT:    ret
572
573  %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer
574  %bitmask = bitcast <2 x i1> %cmp_result to i2
575  ret i2 %bitmask
576}
577
578define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) {
579; CHECK-SD-LABEL: convert_to_bitmask_4xi8:
580; CHECK-SD:       ; %bb.0:
581; CHECK-SD-NEXT:    bic.4h v0, #255, lsl #8
582; CHECK-SD-NEXT:    adrp x8, lCPI12_0@PAGE
583; CHECK-SD-NEXT:    ldr d1, [x8, lCPI12_0@PAGEOFF]
584; CHECK-SD-NEXT:    cmeq.4h v0, v0, #0
585; CHECK-SD-NEXT:    bic.8b v0, v1, v0
586; CHECK-SD-NEXT:    addv.4h h0, v0
587; CHECK-SD-NEXT:    fmov w0, s0
588; CHECK-SD-NEXT:    ret
589;
590; CHECK-GI-LABEL: convert_to_bitmask_4xi8:
591; CHECK-GI:       ; %bb.0:
592; CHECK-GI-NEXT:    sub sp, sp, #16
593; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
594; CHECK-GI-NEXT:    mov w8, #0 ; =0x0
595; CHECK-GI-NEXT:    uzp1.8b v0, v0, v0
596; CHECK-GI-NEXT:    fmov s1, w8
597; CHECK-GI-NEXT:    mov.b v1[1], w8
598; CHECK-GI-NEXT:    mov.b v1[2], w8
599; CHECK-GI-NEXT:    mov.b v1[3], w8
600; CHECK-GI-NEXT:    cmeq.8b v0, v0, v1
601; CHECK-GI-NEXT:    mvn.8b v0, v0
602; CHECK-GI-NEXT:    umov.b w8, v0[0]
603; CHECK-GI-NEXT:    umov.b w9, v0[1]
604; CHECK-GI-NEXT:    mov.s v1[0], w8
605; CHECK-GI-NEXT:    umov.b w8, v0[2]
606; CHECK-GI-NEXT:    mov.s v1[1], w9
607; CHECK-GI-NEXT:    umov.b w9, v0[3]
608; CHECK-GI-NEXT:    mov.s v1[2], w8
609; CHECK-GI-NEXT:    mov.s v1[3], w9
610; CHECK-GI-NEXT:    mov.s w8, v1[1]
611; CHECK-GI-NEXT:    mov.s w9, v1[2]
612; CHECK-GI-NEXT:    fmov w11, s1
613; CHECK-GI-NEXT:    mov.s w10, v1[3]
614; CHECK-GI-NEXT:    and w8, w8, #0x1
615; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
616; CHECK-GI-NEXT:    and w8, w9, #0x1
617; CHECK-GI-NEXT:    and w9, w10, #0x1
618; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
619; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
620; CHECK-GI-NEXT:    strb w8, [sp, #15]
621; CHECK-GI-NEXT:    and w0, w8, #0xff
622; CHECK-GI-NEXT:    add sp, sp, #16
623; CHECK-GI-NEXT:    ret
624
625  %cmp_result = icmp ne <4 x i8> %vec, zeroinitializer
626  %bitmask = bitcast <4 x i1> %cmp_result to i4
627  ret i4 %bitmask
628}
629
630define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) {
631; CHECK-LABEL: convert_to_bitmask_8xi2:
632; CHECK:       ; %bb.0:
633; CHECK-NEXT:    movi.8b v1, #3
634; CHECK-NEXT:    adrp x8, lCPI13_0@PAGE
635; CHECK-NEXT:    and.8b v0, v0, v1
636; CHECK-NEXT:    ldr d1, [x8, lCPI13_0@PAGEOFF]
637; CHECK-NEXT:    cmeq.8b v0, v0, #0
638; CHECK-NEXT:    bic.8b v0, v1, v0
639; CHECK-NEXT:    addv.8b b0, v0
640; CHECK-NEXT:    fmov w0, s0
641; CHECK-NEXT:    ret
642
643  %cmp_result = icmp ne <8 x i2> %vec, zeroinitializer
644  %bitmask = bitcast <8 x i1> %cmp_result to i8
645  ret i8 %bitmask
646}
647
648define i4 @convert_to_bitmask_float(<4 x float> %vec) {
649; CHECK-SD-LABEL: convert_to_bitmask_float:
650; CHECK-SD:       ; %bb.0:
651; CHECK-SD-NEXT:    fcmgt.4s v1, v0, #0.0
652; CHECK-SD-NEXT:    fcmlt.4s v0, v0, #0.0
653; CHECK-SD-NEXT:    adrp x8, lCPI14_0@PAGE
654; CHECK-SD-NEXT:    orr.16b v0, v0, v1
655; CHECK-SD-NEXT:    ldr q1, [x8, lCPI14_0@PAGEOFF]
656; CHECK-SD-NEXT:    and.16b v0, v0, v1
657; CHECK-SD-NEXT:    addv.4s s0, v0
658; CHECK-SD-NEXT:    fmov w0, s0
659; CHECK-SD-NEXT:    ret
660;
661; CHECK-GI-LABEL: convert_to_bitmask_float:
662; CHECK-GI:       ; %bb.0:
663; CHECK-GI-NEXT:    sub sp, sp, #16
664; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
665; CHECK-GI-NEXT:    fcmgt.4s v1, v0, #0.0
666; CHECK-GI-NEXT:    fcmlt.4s v0, v0, #0.0
667; CHECK-GI-NEXT:    orr.16b v0, v0, v1
668; CHECK-GI-NEXT:    mov.s w8, v0[1]
669; CHECK-GI-NEXT:    mov.s w9, v0[2]
670; CHECK-GI-NEXT:    fmov w11, s0
671; CHECK-GI-NEXT:    mov.s w10, v0[3]
672; CHECK-GI-NEXT:    and w8, w8, #0x1
673; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
674; CHECK-GI-NEXT:    and w8, w9, #0x1
675; CHECK-GI-NEXT:    and w9, w10, #0x1
676; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
677; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
678; CHECK-GI-NEXT:    strb w8, [sp, #15]
679; CHECK-GI-NEXT:    and w0, w8, #0xff
680; CHECK-GI-NEXT:    add sp, sp, #16
681; CHECK-GI-NEXT:    ret
682
683
684  %cmp_result = fcmp one <4 x float> %vec, zeroinitializer
685  %bitmask = bitcast <4 x i1> %cmp_result to i4
686  ret i4 %bitmask
687}
688
689; Larger vector types don't map directly, but the can be split/truncated and then converted.
690; After the comparison against 0, this is truncated to <8 x i16>, which is valid again.
691define i8 @convert_large_vector(<8 x i32> %vec) {
692; CHECK-SD-LABEL: convert_large_vector:
693; CHECK-SD:       ; %bb.0:
694; CHECK-SD-NEXT:    sub sp, sp, #16
695; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
696; CHECK-SD-NEXT:    cmeq.4s v1, v1, #0
697; CHECK-SD-NEXT:    cmeq.4s v0, v0, #0
698; CHECK-SD-NEXT:    adrp x8, lCPI15_0@PAGE
699; CHECK-SD-NEXT:    uzp1.8h v0, v0, v1
700; CHECK-SD-NEXT:    ldr q1, [x8, lCPI15_0@PAGEOFF]
701; CHECK-SD-NEXT:    bic.16b v0, v1, v0
702; CHECK-SD-NEXT:    addv.8h h0, v0
703; CHECK-SD-NEXT:    fmov w8, s0
704; CHECK-SD-NEXT:    and w0, w8, #0xff
705; CHECK-SD-NEXT:    add sp, sp, #16
706; CHECK-SD-NEXT:    ret
707;
708; CHECK-GI-LABEL: convert_large_vector:
709; CHECK-GI:       ; %bb.0:
710; CHECK-GI-NEXT:    sub sp, sp, #16
711; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
712; CHECK-GI-NEXT:    cmeq.4s v0, v0, #0
713; CHECK-GI-NEXT:    cmeq.4s v1, v1, #0
714; CHECK-GI-NEXT:    mvn.16b v0, v0
715; CHECK-GI-NEXT:    mvn.16b v1, v1
716; CHECK-GI-NEXT:    uzp1.8h v0, v0, v1
717; CHECK-GI-NEXT:    xtn.8b v0, v0
718; CHECK-GI-NEXT:    umov.b w8, v0[1]
719; CHECK-GI-NEXT:    umov.b w9, v0[0]
720; CHECK-GI-NEXT:    umov.b w10, v0[2]
721; CHECK-GI-NEXT:    umov.b w11, v0[3]
722; CHECK-GI-NEXT:    and w8, w8, #0x1
723; CHECK-GI-NEXT:    bfi w9, w8, #1, #31
724; CHECK-GI-NEXT:    and w8, w10, #0x1
725; CHECK-GI-NEXT:    umov.b w10, v0[4]
726; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #2
727; CHECK-GI-NEXT:    and w9, w11, #0x1
728; CHECK-GI-NEXT:    umov.b w11, v0[5]
729; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
730; CHECK-GI-NEXT:    and w9, w10, #0x1
731; CHECK-GI-NEXT:    umov.b w10, v0[6]
732; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #4
733; CHECK-GI-NEXT:    and w9, w11, #0x1
734; CHECK-GI-NEXT:    umov.b w11, v0[7]
735; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #5
736; CHECK-GI-NEXT:    and w9, w10, #0x1
737; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #6
738; CHECK-GI-NEXT:    and w9, w11, #0x1
739; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #7
740; CHECK-GI-NEXT:    strb w8, [sp, #15]
741; CHECK-GI-NEXT:    and w0, w8, #0xff
742; CHECK-GI-NEXT:    add sp, sp, #16
743; CHECK-GI-NEXT:    ret
744
745
746   %cmp_result = icmp ne <8 x i32> %vec, zeroinitializer
747   %bitmask = bitcast <8 x i1> %cmp_result to i8
748   ret i8 %bitmask
749}
750
751define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) {
752; CHECK-SD-LABEL: convert_legalized_illegal_element_size:
753; CHECK-SD:       ; %bb.0:
754; CHECK-SD-NEXT:    movi.4s v1, #63, msl #16
755; CHECK-SD-NEXT:    adrp x8, lCPI16_0@PAGE
756; CHECK-SD-NEXT:    cmtst.4s v0, v0, v1
757; CHECK-SD-NEXT:    ldr d1, [x8, lCPI16_0@PAGEOFF]
758; CHECK-SD-NEXT:    xtn.4h v0, v0
759; CHECK-SD-NEXT:    and.8b v0, v0, v1
760; CHECK-SD-NEXT:    addv.4h h0, v0
761; CHECK-SD-NEXT:    fmov w0, s0
762; CHECK-SD-NEXT:    ret
763;
764; CHECK-GI-LABEL: convert_legalized_illegal_element_size:
765; CHECK-GI:       ; %bb.0:
766; CHECK-GI-NEXT:    sub sp, sp, #16
767; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
768; CHECK-GI-NEXT:    movi.4s v1, #63, msl #16
769; CHECK-GI-NEXT:    and.16b v0, v0, v1
770; CHECK-GI-NEXT:    cmeq.4s v0, v0, #0
771; CHECK-GI-NEXT:    mvn.16b v0, v0
772; CHECK-GI-NEXT:    mov.s w8, v0[1]
773; CHECK-GI-NEXT:    mov.s w9, v0[2]
774; CHECK-GI-NEXT:    fmov w11, s0
775; CHECK-GI-NEXT:    mov.s w10, v0[3]
776; CHECK-GI-NEXT:    and w8, w8, #0x1
777; CHECK-GI-NEXT:    bfi w11, w8, #1, #31
778; CHECK-GI-NEXT:    and w8, w9, #0x1
779; CHECK-GI-NEXT:    and w9, w10, #0x1
780; CHECK-GI-NEXT:    orr w8, w11, w8, lsl #2
781; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
782; CHECK-GI-NEXT:    strb w8, [sp, #15]
783; CHECK-GI-NEXT:    and w0, w8, #0xff
784; CHECK-GI-NEXT:    add sp, sp, #16
785; CHECK-GI-NEXT:    ret
786
787  %cmp_result = icmp ne <4 x i22> %vec, zeroinitializer
788  %bitmask = bitcast <4 x i1> %cmp_result to i4
789  ret i4 %bitmask
790}
791
792; This may still be converted as a v8i8 after the vector concat (but not as v4iX).
793define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
794; CHECK-LABEL: no_direct_convert_for_bad_concat:
795; CHECK:       ; %bb.0:
796; CHECK-NEXT:    cmtst.4s v0, v0, v0
797; CHECK-NEXT:    adrp x8, lCPI17_0@PAGE
798; CHECK-NEXT:    xtn.4h v0, v0
799; CHECK-NEXT:    umov.h w9, v0[0]
800; CHECK-NEXT:    mov.b v1[4], w9
801; CHECK-NEXT:    umov.h w9, v0[1]
802; CHECK-NEXT:    mov.b v1[5], w9
803; CHECK-NEXT:    umov.h w9, v0[2]
804; CHECK-NEXT:    mov.b v1[6], w9
805; CHECK-NEXT:    umov.h w9, v0[3]
806; CHECK-NEXT:    mov.b v1[7], w9
807; CHECK-NEXT:    shl.8b v0, v1, #7
808; CHECK-NEXT:    ldr d1, [x8, lCPI17_0@PAGEOFF]
809; CHECK-NEXT:    cmlt.8b v0, v0, #0
810; CHECK-NEXT:    and.8b v0, v0, v1
811; CHECK-NEXT:    addv.8b b0, v0
812; CHECK-NEXT:    fmov w0, s0
813; CHECK-NEXT:    ret
814
815  %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
816  %vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7>
817  %bitmask = bitcast <8 x i1> %vector_pad to i8
818  ret i8 %bitmask
819}
820
821define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) {
822; CHECK-SD-LABEL: no_convert_without_direct_bitcast:
823; CHECK-SD:       ; %bb.0:
824; CHECK-SD-NEXT:    cmtst.8h v0, v0, v0
825; CHECK-SD-NEXT:    xtn.8b v0, v0
826; CHECK-SD-NEXT:    ret
827;
828; CHECK-GI-LABEL: no_convert_without_direct_bitcast:
829; CHECK-GI:       ; %bb.0:
830; CHECK-GI-NEXT:    cmeq.8h v0, v0, #0
831; CHECK-GI-NEXT:    mvn.16b v0, v0
832; CHECK-GI-NEXT:    xtn.8b v0, v0
833; CHECK-GI-NEXT:    ret
834
835   %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
836   ret <8 x i1> %cmp_result
837}
838
839define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) {
840; CHECK-SD-LABEL: no_combine_illegal_num_elements:
841; CHECK-SD:       ; %bb.0:
842; CHECK-SD-NEXT:    sub sp, sp, #16
843; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
844; CHECK-SD-NEXT:    fmov s0, w0
845; CHECK-SD-NEXT:    fmov s1, w4
846; CHECK-SD-NEXT:    mov.s v0[1], w1
847; CHECK-SD-NEXT:    mov.s v1[1], w5
848; CHECK-SD-NEXT:    mov.s v0[2], w2
849; CHECK-SD-NEXT:    cmeq.4s v1, v1, #0
850; CHECK-SD-NEXT:    mov.s v0[3], w3
851; CHECK-SD-NEXT:    cmeq.4s v0, v0, #0
852; CHECK-SD-NEXT:    uzp1.8h v0, v0, v1
853; CHECK-SD-NEXT:    mvn.16b v0, v0
854; CHECK-SD-NEXT:    xtn.8b v0, v0
855; CHECK-SD-NEXT:    umov.b w8, v0[0]
856; CHECK-SD-NEXT:    umov.b w9, v0[1]
857; CHECK-SD-NEXT:    umov.b w10, v0[2]
858; CHECK-SD-NEXT:    and w8, w8, #0x1
859; CHECK-SD-NEXT:    bfi w8, w9, #1, #1
860; CHECK-SD-NEXT:    umov.b w9, v0[3]
861; CHECK-SD-NEXT:    bfi w8, w10, #2, #1
862; CHECK-SD-NEXT:    umov.b w10, v0[4]
863; CHECK-SD-NEXT:    bfi w8, w9, #3, #1
864; CHECK-SD-NEXT:    umov.b w9, v0[5]
865; CHECK-SD-NEXT:    bfi w8, w10, #4, #1
866; CHECK-SD-NEXT:    orr w8, w8, w9, lsl #5
867; CHECK-SD-NEXT:    and w0, w8, #0x3f
868; CHECK-SD-NEXT:    add sp, sp, #16
869; CHECK-SD-NEXT:    ret
870;
871; CHECK-GI-LABEL: no_combine_illegal_num_elements:
872; CHECK-GI:       ; %bb.0:
873; CHECK-GI-NEXT:    sub sp, sp, #16
874; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
875; CHECK-GI-NEXT:    mov.s v0[0], w0
876; CHECK-GI-NEXT:    mov.s v1[0], w4
877; CHECK-GI-NEXT:    mov.s v2[0], wzr
878; CHECK-GI-NEXT:    mov.s v0[1], w1
879; CHECK-GI-NEXT:    mov.s v1[1], w5
880; CHECK-GI-NEXT:    mov.s v2[1], wzr
881; CHECK-GI-NEXT:    mov.s v0[2], w2
882; CHECK-GI-NEXT:    cmeq.4s v1, v1, v2
883; CHECK-GI-NEXT:    mvn.16b v1, v1
884; CHECK-GI-NEXT:    mov.s v0[3], w3
885; CHECK-GI-NEXT:    cmeq.4s v0, v0, #0
886; CHECK-GI-NEXT:    mvn.16b v0, v0
887; CHECK-GI-NEXT:    mov.s w8, v0[1]
888; CHECK-GI-NEXT:    mov.s w9, v0[2]
889; CHECK-GI-NEXT:    mov.s w10, v0[3]
890; CHECK-GI-NEXT:    mov.h v0[1], w8
891; CHECK-GI-NEXT:    mov.s w8, v1[1]
892; CHECK-GI-NEXT:    mov.h v0[2], w9
893; CHECK-GI-NEXT:    mov.h v0[3], w10
894; CHECK-GI-NEXT:    mov.h v0[4], v1[0]
895; CHECK-GI-NEXT:    mov.h v0[5], w8
896; CHECK-GI-NEXT:    umov.h w8, v0[1]
897; CHECK-GI-NEXT:    umov.h w9, v0[0]
898; CHECK-GI-NEXT:    umov.h w10, v0[2]
899; CHECK-GI-NEXT:    umov.h w11, v0[3]
900; CHECK-GI-NEXT:    and w8, w8, #0x1
901; CHECK-GI-NEXT:    bfi w9, w8, #1, #31
902; CHECK-GI-NEXT:    and w8, w10, #0x1
903; CHECK-GI-NEXT:    umov.h w10, v0[4]
904; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #2
905; CHECK-GI-NEXT:    and w9, w11, #0x1
906; CHECK-GI-NEXT:    umov.h w11, v0[5]
907; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #3
908; CHECK-GI-NEXT:    and w9, w10, #0x1
909; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #4
910; CHECK-GI-NEXT:    and w9, w11, #0x1
911; CHECK-GI-NEXT:    orr w8, w8, w9, lsl #5
912; CHECK-GI-NEXT:    and w8, w8, #0x3f
913; CHECK-GI-NEXT:    strb w8, [sp, #15]
914; CHECK-GI-NEXT:    and w0, w8, #0xff
915; CHECK-GI-NEXT:    add sp, sp, #16
916; CHECK-GI-NEXT:    ret
917
918  %cmp_result = icmp ne <6 x i32> %vec, zeroinitializer
919  %bitmask = bitcast <6 x i1> %cmp_result to i6
920  ret i6 %bitmask
921}
922
923; Only apply the combine when casting a vector to a scalar.
924define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
925; CHECK-SD-LABEL: vector_to_vector_cast:
926; CHECK-SD:       ; %bb.0:
927; CHECK-SD-NEXT:    sub sp, sp, #16
928; CHECK-SD-NEXT:    shl.16b v0, v0, #7
929; CHECK-SD-NEXT:    adrp x8, lCPI20_0@PAGE
930; CHECK-SD-NEXT:    ldr q1, [x8, lCPI20_0@PAGEOFF]
931; CHECK-SD-NEXT:    add x8, sp, #14
932; CHECK-SD-NEXT:    cmlt.16b v0, v0, #0
933; CHECK-SD-NEXT:    and.16b v0, v0, v1
934; CHECK-SD-NEXT:    ext.16b v1, v0, v0, #8
935; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
936; CHECK-SD-NEXT:    addv.8h h0, v0
937; CHECK-SD-NEXT:    str h0, [sp, #14]
938; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x8]
939; CHECK-SD-NEXT:    orr x8, x8, #0x1
940; CHECK-SD-NEXT:    ld1.b { v0 }[4], [x8]
941; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
942; CHECK-SD-NEXT:    add sp, sp, #16
943; CHECK-SD-NEXT:    ret
944;
945; CHECK-GI-LABEL: vector_to_vector_cast:
946; CHECK-GI:       ; %bb.0:
947; CHECK-GI-NEXT:    sub sp, sp, #16
948; CHECK-GI-NEXT:    umov.b w8, v0[1]
949; CHECK-GI-NEXT:    mov d1, v0[1]
950; CHECK-GI-NEXT:    umov.b w10, v0[1]
951; CHECK-GI-NEXT:    umov.b w9, v0[0]
952; CHECK-GI-NEXT:    umov.b w13, v0[0]
953; CHECK-GI-NEXT:    umov.b w14, v0[2]
954; CHECK-GI-NEXT:    umov.b w15, v0[3]
955; CHECK-GI-NEXT:    umov.b w11, v0[2]
956; CHECK-GI-NEXT:    umov.b w16, v0[4]
957; CHECK-GI-NEXT:    umov.b w17, v0[5]
958; CHECK-GI-NEXT:    umov.b w12, v0[3]
959; CHECK-GI-NEXT:    and w8, w8, #0x1
960; CHECK-GI-NEXT:    and w10, w10, #0x1
961; CHECK-GI-NEXT:    umov.b w0, v1[1]
962; CHECK-GI-NEXT:    bfi w9, w8, #1, #31
963; CHECK-GI-NEXT:    bfi w13, w10, #1, #31
964; CHECK-GI-NEXT:    and w14, w14, #0x1
965; CHECK-GI-NEXT:    umov.b w8, v1[0]
966; CHECK-GI-NEXT:    umov.b w10, v1[2]
967; CHECK-GI-NEXT:    and w15, w15, #0x1
968; CHECK-GI-NEXT:    orr w13, w13, w14, lsl #2
969; CHECK-GI-NEXT:    umov.b w14, v1[3]
970; CHECK-GI-NEXT:    and w11, w11, #0x1
971; CHECK-GI-NEXT:    and w0, w0, #0x1
972; CHECK-GI-NEXT:    and w16, w16, #0x1
973; CHECK-GI-NEXT:    orr w9, w9, w11, lsl #2
974; CHECK-GI-NEXT:    orr w13, w13, w15, lsl #3
975; CHECK-GI-NEXT:    umov.b w15, v1[4]
976; CHECK-GI-NEXT:    umov.b w11, v0[6]
977; CHECK-GI-NEXT:    bfi w8, w0, #1, #31
978; CHECK-GI-NEXT:    and w10, w10, #0x1
979; CHECK-GI-NEXT:    and w17, w17, #0x1
980; CHECK-GI-NEXT:    orr w13, w13, w16, lsl #4
981; CHECK-GI-NEXT:    and w14, w14, #0x1
982; CHECK-GI-NEXT:    umov.b w0, v0[7]
983; CHECK-GI-NEXT:    orr w8, w8, w10, lsl #2
984; CHECK-GI-NEXT:    umov.b w10, v1[5]
985; CHECK-GI-NEXT:    umov.b w16, v1[6]
986; CHECK-GI-NEXT:    orr w13, w13, w17, lsl #5
987; CHECK-GI-NEXT:    umov.b w17, v0[4]
988; CHECK-GI-NEXT:    and w15, w15, #0x1
989; CHECK-GI-NEXT:    orr w8, w8, w14, lsl #3
990; CHECK-GI-NEXT:    and w12, w12, #0x1
991; CHECK-GI-NEXT:    and w11, w11, #0x1
992; CHECK-GI-NEXT:    umov.b w14, v1[7]
993; CHECK-GI-NEXT:    orr w9, w9, w12, lsl #3
994; CHECK-GI-NEXT:    orr w11, w13, w11, lsl #6
995; CHECK-GI-NEXT:    orr w8, w8, w15, lsl #4
996; CHECK-GI-NEXT:    umov.b w15, v0[5]
997; CHECK-GI-NEXT:    and w10, w10, #0x1
998; CHECK-GI-NEXT:    and w0, w0, #0x1
999; CHECK-GI-NEXT:    and w12, w17, #0x1
1000; CHECK-GI-NEXT:    umov.b w13, v0[1]
1001; CHECK-GI-NEXT:    orr w8, w8, w10, lsl #5
1002; CHECK-GI-NEXT:    and w16, w16, #0x1
1003; CHECK-GI-NEXT:    orr w9, w9, w12, lsl #4
1004; CHECK-GI-NEXT:    umov.b w10, v0[0]
1005; CHECK-GI-NEXT:    orr w11, w11, w0, lsl #7
1006; CHECK-GI-NEXT:    and w14, w14, #0x1
1007; CHECK-GI-NEXT:    and w12, w15, #0x1
1008; CHECK-GI-NEXT:    umov.b w15, v0[2]
1009; CHECK-GI-NEXT:    orr w8, w8, w16, lsl #6
1010; CHECK-GI-NEXT:    orr w9, w9, w12, lsl #5
1011; CHECK-GI-NEXT:    umov.b w12, v0[6]
1012; CHECK-GI-NEXT:    strb w11, [sp, #8]
1013; CHECK-GI-NEXT:    and w11, w13, #0x1
1014; CHECK-GI-NEXT:    umov.b w13, v0[3]
1015; CHECK-GI-NEXT:    orr w8, w8, w14, lsl #7
1016; CHECK-GI-NEXT:    umov.b w14, v0[7]
1017; CHECK-GI-NEXT:    ldr b0, [sp, #8]
1018; CHECK-GI-NEXT:    bfi w10, w11, #1, #31
1019; CHECK-GI-NEXT:    and w11, w15, #0x1
1020; CHECK-GI-NEXT:    strb w8, [sp, #9]
1021; CHECK-GI-NEXT:    umov.b w15, v0[4]
1022; CHECK-GI-NEXT:    and w8, w12, #0x1
1023; CHECK-GI-NEXT:    orr w10, w10, w11, lsl #2
1024; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #6
1025; CHECK-GI-NEXT:    and w9, w13, #0x1
1026; CHECK-GI-NEXT:    umov.b w11, v0[1]
1027; CHECK-GI-NEXT:    orr w9, w10, w9, lsl #3
1028; CHECK-GI-NEXT:    umov.b w10, v0[5]
1029; CHECK-GI-NEXT:    umov.b w12, v0[0]
1030; CHECK-GI-NEXT:    and w13, w14, #0x1
1031; CHECK-GI-NEXT:    umov.b w16, v0[2]
1032; CHECK-GI-NEXT:    umov.b w17, v0[3]
1033; CHECK-GI-NEXT:    and w14, w15, #0x1
1034; CHECK-GI-NEXT:    umov.b w15, v0[2]
1035; CHECK-GI-NEXT:    orr w8, w8, w13, lsl #7
1036; CHECK-GI-NEXT:    orr w9, w9, w14, lsl #4
1037; CHECK-GI-NEXT:    umov.b w13, v0[6]
1038; CHECK-GI-NEXT:    and w11, w11, #0x1
1039; CHECK-GI-NEXT:    umov.b w14, v0[3]
1040; CHECK-GI-NEXT:    strb w8, [sp, #10]
1041; CHECK-GI-NEXT:    and w8, w10, #0x1
1042; CHECK-GI-NEXT:    bfi w12, w11, #1, #31
1043; CHECK-GI-NEXT:    orr w8, w9, w8, lsl #5
1044; CHECK-GI-NEXT:    umov.b w10, v0[4]
1045; CHECK-GI-NEXT:    and w9, w15, #0x1
1046; CHECK-GI-NEXT:    umov.b w11, v0[7]
1047; CHECK-GI-NEXT:    umov.b w15, v0[1]
1048; CHECK-GI-NEXT:    orr w9, w12, w9, lsl #2
1049; CHECK-GI-NEXT:    umov.b w12, v0[5]
1050; CHECK-GI-NEXT:    and w13, w13, #0x1
1051; CHECK-GI-NEXT:    and w14, w14, #0x1
1052; CHECK-GI-NEXT:    orr w8, w8, w13, lsl #6
1053; CHECK-GI-NEXT:    umov.b w13, v0[0]
1054; CHECK-GI-NEXT:    orr w9, w9, w14, lsl #3
1055; CHECK-GI-NEXT:    and w10, w10, #0x1
1056; CHECK-GI-NEXT:    umov.b w14, v0[6]
1057; CHECK-GI-NEXT:    and w11, w11, #0x1
1058; CHECK-GI-NEXT:    and w15, w15, #0x1
1059; CHECK-GI-NEXT:    umov.b w0, v0[3]
1060; CHECK-GI-NEXT:    orr w9, w9, w10, lsl #4
1061; CHECK-GI-NEXT:    and w10, w12, #0x1
1062; CHECK-GI-NEXT:    umov.b w12, v0[7]
1063; CHECK-GI-NEXT:    orr w8, w8, w11, lsl #7
1064; CHECK-GI-NEXT:    bfi w13, w15, #1, #31
1065; CHECK-GI-NEXT:    and w11, w16, #0x1
1066; CHECK-GI-NEXT:    orr w9, w9, w10, lsl #5
1067; CHECK-GI-NEXT:    and w10, w14, #0x1
1068; CHECK-GI-NEXT:    umov.b w14, v0[4]
1069; CHECK-GI-NEXT:    strb w8, [sp, #11]
1070; CHECK-GI-NEXT:    umov.b w15, v0[1]
1071; CHECK-GI-NEXT:    umov.b w16, v0[3]
1072; CHECK-GI-NEXT:    orr w8, w9, w10, lsl #6
1073; CHECK-GI-NEXT:    orr w9, w13, w11, lsl #2
1074; CHECK-GI-NEXT:    and w10, w12, #0x1
1075; CHECK-GI-NEXT:    and w11, w17, #0x1
1076; CHECK-GI-NEXT:    umov.b w12, v0[5]
1077; CHECK-GI-NEXT:    umov.b w17, v0[0]
1078; CHECK-GI-NEXT:    orr w8, w8, w10, lsl #7
1079; CHECK-GI-NEXT:    orr w9, w9, w11, lsl #3
1080; CHECK-GI-NEXT:    umov.b w10, v0[1]
1081; CHECK-GI-NEXT:    and w11, w14, #0x1
1082; CHECK-GI-NEXT:    umov.b w14, v0[0]
1083; CHECK-GI-NEXT:    and w15, w15, #0x1
1084; CHECK-GI-NEXT:    orr w9, w9, w11, lsl #4
1085; CHECK-GI-NEXT:    umov.b w11, v0[2]
1086; CHECK-GI-NEXT:    umov.b w13, v0[6]
1087; CHECK-GI-NEXT:    and w12, w12, #0x1
1088; CHECK-GI-NEXT:    bfi w17, w15, #1, #31
1089; CHECK-GI-NEXT:    umov.b w15, v0[5]
1090; CHECK-GI-NEXT:    orr w9, w9, w12, lsl #5
1091; CHECK-GI-NEXT:    and w10, w10, #0x1
1092; CHECK-GI-NEXT:    umov.b w12, v0[2]
1093; CHECK-GI-NEXT:    bfi w14, w10, #1, #31
1094; CHECK-GI-NEXT:    umov.b w10, v0[4]
1095; CHECK-GI-NEXT:    ldr b1, [sp, #9]
1096; CHECK-GI-NEXT:    and w11, w11, #0x1
1097; CHECK-GI-NEXT:    and w13, w13, #0x1
1098; CHECK-GI-NEXT:    strb w8, [sp, #12]
1099; CHECK-GI-NEXT:    orr w11, w14, w11, lsl #2
1100; CHECK-GI-NEXT:    and w14, w16, #0x1
1101; CHECK-GI-NEXT:    umov.b w16, v0[4]
1102; CHECK-GI-NEXT:    and w12, w12, #0x1
1103; CHECK-GI-NEXT:    and w15, w15, #0x1
1104; CHECK-GI-NEXT:    orr w9, w9, w13, lsl #6
1105; CHECK-GI-NEXT:    orr w11, w11, w14, lsl #3
1106; CHECK-GI-NEXT:    orr w12, w17, w12, lsl #2
1107; CHECK-GI-NEXT:    and w10, w10, #0x1
1108; CHECK-GI-NEXT:    and w17, w0, #0x1
1109; CHECK-GI-NEXT:    umov.b w0, v0[5]
1110; CHECK-GI-NEXT:    umov.b w14, v0[6]
1111; CHECK-GI-NEXT:    orr w10, w11, w10, lsl #4
1112; CHECK-GI-NEXT:    orr w12, w12, w17, lsl #3
1113; CHECK-GI-NEXT:    umov.b w11, v0[7]
1114; CHECK-GI-NEXT:    and w16, w16, #0x1
1115; CHECK-GI-NEXT:    umov.b w17, v0[6]
1116; CHECK-GI-NEXT:    orr w10, w10, w15, lsl #5
1117; CHECK-GI-NEXT:    umov.b w15, v0[7]
1118; CHECK-GI-NEXT:    orr w12, w12, w16, lsl #4
1119; CHECK-GI-NEXT:    and w16, w0, #0x1
1120; CHECK-GI-NEXT:    umov.b w0, v0[7]
1121; CHECK-GI-NEXT:    and w14, w14, #0x1
1122; CHECK-GI-NEXT:    orr w12, w12, w16, lsl #5
1123; CHECK-GI-NEXT:    orr w10, w10, w14, lsl #6
1124; CHECK-GI-NEXT:    and w11, w11, #0x1
1125; CHECK-GI-NEXT:    and w13, w17, #0x1
1126; CHECK-GI-NEXT:    orr w9, w9, w11, lsl #7
1127; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
1128; CHECK-GI-NEXT:    orr w11, w12, w13, lsl #6
1129; CHECK-GI-NEXT:    and w12, w15, #0x1
1130; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
1131; CHECK-GI-NEXT:    orr w8, w10, w12, lsl #7
1132; CHECK-GI-NEXT:    and w10, w0, #0x1
1133; CHECK-GI-NEXT:    strb w9, [sp, #13]
1134; CHECK-GI-NEXT:    orr w9, w11, w10, lsl #7
1135; CHECK-GI-NEXT:    strb w8, [sp, #14]
1136; CHECK-GI-NEXT:    strb w9, [sp, #15]
1137; CHECK-GI-NEXT:    add sp, sp, #16
1138; CHECK-GI-NEXT:    ret
1139  %bc = bitcast <16 x i1> %arg to <2 x i8>
1140  ret <2 x i8> %bc
1141}
1142