xref: /llvm-project/llvm/test/CodeGen/AArch64/vselect-ext.ll (revision d460c1de3b989cea919b9d60c21644f28f987950)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3
4define <16 x i32> @no_existing_zext(<16 x i8> %a, <16 x i32> %op) {
5; CHECK-LABEL: no_existing_zext:
6; CHECK:       ; %bb.0: ; %entry
7; CHECK-NEXT:    movi.16b v5, #10
8; CHECK-NEXT:    cmhi.16b v0, v0, v5
9; CHECK-NEXT:    sshll.8h v5, v0, #0
10; CHECK-NEXT:    sshll2.8h v0, v0, #0
11; CHECK-NEXT:    sshll2.4s v16, v0, #0
12; CHECK-NEXT:    sshll.4s v6, v5, #0
13; CHECK-NEXT:    sshll.4s v7, v0, #0
14; CHECK-NEXT:    sshll2.4s v5, v5, #0
15; CHECK-NEXT:    and.16b v4, v4, v16
16; CHECK-NEXT:    and.16b v0, v1, v6
17; CHECK-NEXT:    and.16b v1, v2, v5
18; CHECK-NEXT:    and.16b v2, v3, v7
19; CHECK-NEXT:    mov.16b v3, v4
20; CHECK-NEXT:    ret
21entry:
22  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
23  %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> zeroinitializer
24  ret <16 x i32> %sel
25}
26
27define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) {
28; CHECK-LABEL: second_compare_operand_not_splat:
29; CHECK:       ; %bb.0: ; %entry
30; CHECK-NEXT:    cmgt.16b v1, v0, v1
31; CHECK-NEXT:    ushll.8h v2, v0, #0
32; CHECK-NEXT:    ushll2.8h v0, v0, #0
33; CHECK-NEXT:    sshll.8h v3, v1, #0
34; CHECK-NEXT:    sshll2.8h v1, v1, #0
35; CHECK-NEXT:    ushll.4s v4, v2, #0
36; CHECK-NEXT:    ushll.4s v5, v0, #0
37; CHECK-NEXT:    ushll2.4s v2, v2, #0
38; CHECK-NEXT:    ushll2.4s v6, v0, #0
39; CHECK-NEXT:    sshll.4s v0, v3, #0
40; CHECK-NEXT:    sshll.4s v7, v1, #0
41; CHECK-NEXT:    sshll2.4s v16, v3, #0
42; CHECK-NEXT:    sshll2.4s v1, v1, #0
43; CHECK-NEXT:    and.16b v0, v4, v0
44; CHECK-NEXT:    and.16b v3, v6, v1
45; CHECK-NEXT:    and.16b v1, v2, v16
46; CHECK-NEXT:    and.16b v2, v5, v7
47; CHECK-NEXT:    ret
48entry:
49  %ext = zext <16 x i8> %a to <16 x i32>
50  %cmp = icmp sgt <16 x i8> %a, %b
51  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
52  ret <16 x i32> %sel
53}
54
55define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) {
56; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select:
57; CHECK:       ; %bb.0: ; %entry
58; CHECK-NEXT:    movi.16b v1, #10
59; CHECK-NEXT:    ushll.8h v2, v0, #0
60; CHECK-NEXT:    ushll.4s v4, v2, #0
61; CHECK-NEXT:    ushll2.4s v2, v2, #0
62; CHECK-NEXT:    cmgt.16b v1, v0, v1
63; CHECK-NEXT:    ushll2.8h v0, v0, #0
64; CHECK-NEXT:    sshll.8h v3, v1, #0
65; CHECK-NEXT:    sshll2.8h v1, v1, #0
66; CHECK-NEXT:    ushll.4s v5, v0, #0
67; CHECK-NEXT:    ushll2.4s v6, v0, #0
68; CHECK-NEXT:    sshll.4s v0, v3, #0
69; CHECK-NEXT:    sshll.4s v7, v1, #0
70; CHECK-NEXT:    sshll2.4s v16, v3, #0
71; CHECK-NEXT:    sshll2.4s v1, v1, #0
72; CHECK-NEXT:    and.16b v0, v4, v0
73; CHECK-NEXT:    and.16b v3, v6, v1
74; CHECK-NEXT:    and.16b v1, v2, v16
75; CHECK-NEXT:    and.16b v2, v5, v7
76; CHECK-NEXT:    ret
77entry:
78  %ext = zext <16 x i8> %a to <16 x i32>
79  %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
80  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
81  ret <16 x i32> %sel
82}
83
84define <8 x i64> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i64(<8 x i8> %a) {
85; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64:
86; CHECK:       ; %bb.0:
87; CHECK-NEXT:    ushll.8h v0, v0, #0
88; CHECK-NEXT:    mov w8, #10 ; =0xa
89; CHECK-NEXT:    dup.2d v2, x8
90; CHECK-NEXT:    ushll.4s v1, v0, #0
91; CHECK-NEXT:    ushll2.4s v0, v0, #0
92; CHECK-NEXT:    ushll.2d v3, v1, #0
93; CHECK-NEXT:    ushll2.2d v4, v0, #0
94; CHECK-NEXT:    ushll2.2d v1, v1, #0
95; CHECK-NEXT:    ushll.2d v5, v0, #0
96; CHECK-NEXT:    cmhi.2d v0, v3, v2
97; CHECK-NEXT:    cmhi.2d v7, v1, v2
98; CHECK-NEXT:    cmhi.2d v6, v5, v2
99; CHECK-NEXT:    cmhi.2d v2, v4, v2
100; CHECK-NEXT:    and.16b v0, v3, v0
101; CHECK-NEXT:    and.16b v1, v1, v7
102; CHECK-NEXT:    and.16b v3, v4, v2
103; CHECK-NEXT:    and.16b v2, v5, v6
104; CHECK-NEXT:    ret
105  %ext = zext <8 x i8> %a to <8 x i64>
106  %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
107  %sel = select <8 x i1> %cmp, <8 x i64> %ext, <8 x i64> zeroinitializer
108  ret <8 x i64> %sel
109}
110
111
112define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) {
113; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32:
114; CHECK:       ; %bb.0:
115; CHECK-NEXT:    ushll.8h v2, v0, #0
116; CHECK-NEXT:    ushll2.8h v0, v0, #0
117; CHECK-NEXT:    movi.4s v1, #10
118; CHECK-NEXT:    ushll.4s v3, v2, #0
119; CHECK-NEXT:    ushll2.4s v4, v0, #0
120; CHECK-NEXT:    ushll2.4s v2, v2, #0
121; CHECK-NEXT:    ushll.4s v5, v0, #0
122; CHECK-NEXT:    cmhi.4s v0, v3, v1
123; CHECK-NEXT:    cmhi.4s v7, v2, v1
124; CHECK-NEXT:    cmhi.4s v6, v5, v1
125; CHECK-NEXT:    cmhi.4s v1, v4, v1
126; CHECK-NEXT:    and.16b v0, v3, v0
127; CHECK-NEXT:    and.16b v3, v4, v1
128; CHECK-NEXT:    and.16b v1, v2, v7
129; CHECK-NEXT:    and.16b v2, v5, v6
130; CHECK-NEXT:    ret
131  %ext = zext <16 x i8> %a to <16 x i32>
132  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
133  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
134  ret <16 x i32> %sel
135}
136
137define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) {
138; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32:
139; CHECK:       ; %bb.0:
140; CHECK-NEXT:    ushll.8h v0, v0, #0
141; CHECK-NEXT:    movi.4s v1, #10
142; CHECK-NEXT:    ushll2.4s v2, v0, #0
143; CHECK-NEXT:    ushll.4s v0, v0, #0
144; CHECK-NEXT:    cmhi.4s v3, v0, v1
145; CHECK-NEXT:    cmhi.4s v1, v2, v1
146; CHECK-NEXT:    and.16b v1, v2, v1
147; CHECK-NEXT:    and.16b v0, v0, v3
148; CHECK-NEXT:    ret
149  %ext = zext <8 x i8> %a to <8 x i32>
150  %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
151  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
152  ret <8 x i32> %sel
153}
154
155define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2(<8 x i16> %a) {
156; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2:
157; CHECK:       ; %bb.0:
158; CHECK-NEXT:    movi.4s v1, #10
159; CHECK-NEXT:    ushll2.4s v2, v0, #0
160; CHECK-NEXT:    ushll.4s v0, v0, #0
161; CHECK-NEXT:    cmhi.4s v3, v0, v1
162; CHECK-NEXT:    cmhi.4s v1, v2, v1
163; CHECK-NEXT:    and.16b v1, v2, v1
164; CHECK-NEXT:    and.16b v0, v0, v3
165; CHECK-NEXT:    ret
166  %ext = zext <8 x i16> %a to <8 x i32>
167  %cmp = icmp ugt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
168  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
169  ret <8 x i32> %sel
170}
171
172
173define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
174; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
175; CHECK:       ; %bb.0:
176; CHECK-NEXT:    bic.8h v0, #128, lsl #8
177; CHECK-NEXT:    movi.4s v1, #10
178; CHECK-NEXT:    ushll2.4s v2, v0, #0
179; CHECK-NEXT:    ushll.4s v0, v0, #0
180; CHECK-NEXT:    cmhi.4s v3, v0, v1
181; CHECK-NEXT:    cmhi.4s v1, v2, v1
182; CHECK-NEXT:    and.16b v1, v2, v1
183; CHECK-NEXT:    and.16b v0, v0, v3
184; CHECK-NEXT:    ret
185  %ext = zext <8 x i15> %a to <8 x i32>
186  %cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
187  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
188  ret <8 x i32> %sel
189}
190
191define <7 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v7i32(<7 x i16> %a) {
192; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v7i32:
193; CHECK:       ; %bb.0:
194; CHECK-NEXT:    movi.8h v1, #10
195; CHECK-NEXT:    ushll.4s v2, v0, #0
196; CHECK-NEXT:    cmhi.8h v1, v0, v1
197; CHECK-NEXT:    ushll2.4s v0, v0, #0
198; CHECK-NEXT:    sshll.4s v3, v1, #0
199; CHECK-NEXT:    sshll2.4s v1, v1, #0
200; CHECK-NEXT:    and.16b v2, v2, v3
201; CHECK-NEXT:    and.16b v0, v0, v1
202; CHECK-NEXT:    mov.s w1, v2[1]
203; CHECK-NEXT:    mov.s w2, v2[2]
204; CHECK-NEXT:    mov.s w3, v2[3]
205; CHECK-NEXT:    mov.s w5, v0[1]
206; CHECK-NEXT:    mov.s w6, v0[2]
207; CHECK-NEXT:    fmov w0, s2
208; CHECK-NEXT:    fmov w4, s0
209; CHECK-NEXT:    ret
210  %ext = zext <7 x i16> %a to <7 x i32>
211  %cmp = icmp ugt <7 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
212  %sel = select <7 x i1> %cmp, <7 x i32> %ext, <7 x i32> zeroinitializer
213  ret <7 x i32> %sel
214}
215
216define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8> %a) {
217; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v3i16:
218; CHECK:       ; %bb.0:
219; CHECK-NEXT:    fmov s0, w0
220; CHECK-NEXT:  Lloh0:
221; CHECK-NEXT:    adrp x8, lCPI9_0@PAGE
222; CHECK-NEXT:    movi.2d v3, #0x0000ff000000ff
223; CHECK-NEXT:  Lloh1:
224; CHECK-NEXT:    ldr d2, [x8, lCPI9_0@PAGEOFF]
225; CHECK-NEXT:    mov.h v0[1], w1
226; CHECK-NEXT:    mov.h v0[2], w2
227; CHECK-NEXT:    ushll.4s v1, v0, #0
228; CHECK-NEXT:    bic.4h v0, #255, lsl #8
229; CHECK-NEXT:    cmhi.4h v0, v0, v2
230; CHECK-NEXT:    and.16b v1, v1, v3
231; CHECK-NEXT:    sshll.4s v0, v0, #0
232; CHECK-NEXT:    and.16b v0, v1, v0
233; CHECK-NEXT:    ret
234; CHECK-NEXT:    .loh AdrpLdr Lloh0, Lloh1
235  %ext = zext <3 x i8> %a to <3 x i32>
236  %cmp = icmp ugt <3 x i8> %a, <i8 10, i8 10, i8 10>
237  %sel = select <3 x i1> %cmp, <3 x i32> %ext, <3 x i32> zeroinitializer
238  ret <3 x i32> %sel
239}
240
241define <4 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v4i32(<4 x i16> %a) {
242; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v4i32:
243; CHECK:       ; %bb.0:
244; CHECK-NEXT:    movi.4s v1, #10
245; CHECK-NEXT:    ushll.4s v0, v0, #0
246; CHECK-NEXT:    cmhi.4s v1, v0, v1
247; CHECK-NEXT:    and.16b v0, v0, v1
248; CHECK-NEXT:    ret
249  %ext = zext <4 x i16> %a to <4 x i32>
250  %cmp = icmp ugt <4 x i16> %a, <i16 10, i16 10, i16 10, i16 10>
251  %sel = select <4 x i1> %cmp, <4 x i32> %ext, <4 x i32> zeroinitializer
252  ret <4 x i32> %sel
253}
254
255define <2 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v2i32(<2 x i16> %a) {
256; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v2i32:
257; CHECK:       ; %bb.0:
258; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
259; CHECK-NEXT:    movi.2s v2, #10
260; CHECK-NEXT:    and.8b v0, v0, v1
261; CHECK-NEXT:    cmhi.2s v1, v0, v2
262; CHECK-NEXT:    and.8b v0, v0, v1
263; CHECK-NEXT:    ret
264  %ext = zext <2 x i16> %a to <2 x i32>
265  %cmp = icmp ugt <2 x i16> %a, <i16 10, i16 10>
266  %sel = select <2 x i1> %cmp, <2 x i32> %ext, <2 x i32> zeroinitializer
267  ret <2 x i32> %sel
268}
269
270define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
271; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32:
272; CHECK:       ; %bb.0:
273; CHECK-NEXT:    movi.4s v1, #10
274; CHECK-NEXT:    ushll2.4s v2, v0, #0
275; CHECK-NEXT:    ushll.4s v0, v0, #0
276; CHECK-NEXT:    cmeq.4s v3, v0, v1
277; CHECK-NEXT:    cmeq.4s v1, v2, v1
278; CHECK-NEXT:    and.16b v1, v2, v1
279; CHECK-NEXT:    and.16b v0, v0, v3
280; CHECK-NEXT:    ret
281  %ext = zext <8 x i16> %a to <8 x i32>
282  %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
283  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
284  ret <8 x i32> %sel
285}
286
287define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
288; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
289; CHECK:       ; %bb.0:
290; CHECK-NEXT:    bic.8h v0, #224, lsl #8
291; CHECK-NEXT:    movi.4s v1, #10
292; CHECK-NEXT:    ushll2.4s v2, v0, #0
293; CHECK-NEXT:    ushll.4s v0, v0, #0
294; CHECK-NEXT:    cmeq.4s v3, v0, v1
295; CHECK-NEXT:    cmeq.4s v1, v2, v1
296; CHECK-NEXT:    and.16b v1, v2, v1
297; CHECK-NEXT:    and.16b v0, v0, v3
298; CHECK-NEXT:    ret
299  %ext = zext <8 x i13> %a to <8 x i32>
300  %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
301  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
302  ret <8 x i32> %sel
303}
304
305define <16 x i32> @same_zext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) {
306; CHECK-LABEL: same_zext_used_in_cmp_ne_and_select_v8i32:
307; CHECK:       ; %bb.0:
308; CHECK-NEXT:    ushll.8h v2, v0, #0
309; CHECK-NEXT:    ushll2.8h v0, v0, #0
310; CHECK-NEXT:    movi.4s v1, #10
311; CHECK-NEXT:    ushll.4s v3, v2, #0
312; CHECK-NEXT:    ushll2.4s v4, v0, #0
313; CHECK-NEXT:    ushll2.4s v2, v2, #0
314; CHECK-NEXT:    ushll.4s v5, v0, #0
315; CHECK-NEXT:    cmeq.4s v0, v3, v1
316; CHECK-NEXT:    cmeq.4s v7, v2, v1
317; CHECK-NEXT:    cmeq.4s v6, v5, v1
318; CHECK-NEXT:    cmeq.4s v1, v4, v1
319; CHECK-NEXT:    bic.16b v0, v3, v0
320; CHECK-NEXT:    bic.16b v3, v4, v1
321; CHECK-NEXT:    bic.16b v1, v2, v7
322; CHECK-NEXT:    bic.16b v2, v5, v6
323; CHECK-NEXT:    ret
324  %ext = zext <16 x i8> %a to <16 x i32>
325  %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
326  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
327  ret <16 x i32> %sel
328}
329
330; A variation of @same_zext_used_in_cmp_unsigned_pred_and_select, with with
331; multiple users of the compare.
332define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, ptr %ptr) {
333; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use:
334; CHECK:       ; %bb.0: ; %entry
335; CHECK-NEXT:    movi.16b v16, #10
336; CHECK-NEXT:    ushll.8h v19, v0, #0
337; CHECK-NEXT:    ldr q21, [sp]
338; CHECK-NEXT:    ushll.4s v24, v19, #0
339; CHECK-NEXT:    ushll2.4s v19, v19, #0
340; CHECK-NEXT:    cmhi.16b v16, v0, v16
341; CHECK-NEXT:    ushll2.8h v0, v0, #0
342; CHECK-NEXT:    sshll2.8h v17, v16, #0
343; CHECK-NEXT:    sshll.8h v16, v16, #0
344; CHECK-NEXT:    ushll.4s v25, v0, #0
345; CHECK-NEXT:    ushll2.4s v0, v0, #0
346; CHECK-NEXT:    sshll2.4s v18, v17, #0
347; CHECK-NEXT:    sshll.4s v17, v17, #0
348; CHECK-NEXT:    sshll2.4s v22, v16, #0
349; CHECK-NEXT:    sshll.4s v16, v16, #0
350; CHECK-NEXT:    sshll2.2d v20, v18, #0
351; CHECK-NEXT:    sshll.2d v23, v18, #0
352; CHECK-NEXT:    sshll2.2d v26, v17, #0
353; CHECK-NEXT:    sshll.2d v27, v17, #0
354; CHECK-NEXT:    and.16b v20, v21, v20
355; CHECK-NEXT:    sshll2.2d v21, v22, #0
356; CHECK-NEXT:    and.16b v7, v7, v23
357; CHECK-NEXT:    sshll.2d v23, v22, #0
358; CHECK-NEXT:    and.16b v6, v6, v26
359; CHECK-NEXT:    sshll2.2d v26, v16, #0
360; CHECK-NEXT:    and.16b v5, v5, v27
361; CHECK-NEXT:    stp q7, q20, [x0, #96]
362; CHECK-NEXT:    sshll.2d v20, v16, #0
363; CHECK-NEXT:    and.16b v21, v4, v21
364; CHECK-NEXT:    and.16b v4, v0, v18
365; CHECK-NEXT:    and.16b v7, v3, v23
366; CHECK-NEXT:    and.16b v3, v19, v22
367; CHECK-NEXT:    stp q5, q6, [x0, #64]
368; CHECK-NEXT:    and.16b v0, v24, v16
369; CHECK-NEXT:    and.16b v6, v2, v26
370; CHECK-NEXT:    and.16b v2, v25, v17
371; CHECK-NEXT:    and.16b v5, v1, v20
372; CHECK-NEXT:    mov.16b v1, v3
373; CHECK-NEXT:    mov.16b v3, v4
374; CHECK-NEXT:    stp q7, q21, [x0, #32]
375; CHECK-NEXT:    stp q5, q6, [x0]
376; CHECK-NEXT:    ret
377entry:
378  %ext = zext <16 x i8> %a to <16 x i32>
379  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
380  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
381  %sel.2 = select <16 x i1> %cmp, <16 x i64> %v, <16 x i64> zeroinitializer
382  store <16 x i64> %sel.2, ptr %ptr
383  ret <16 x i32> %sel
384}
385
386define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) {
387; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32:
388; CHECK:       ; %bb.0: ; %entry
389; CHECK-NEXT:    sshll.8h v2, v0, #0
390; CHECK-NEXT:    sshll2.8h v0, v0, #0
391; CHECK-NEXT:    movi.4s v1, #10
392; CHECK-NEXT:    sshll.4s v3, v2, #0
393; CHECK-NEXT:    sshll2.4s v4, v0, #0
394; CHECK-NEXT:    sshll2.4s v2, v2, #0
395; CHECK-NEXT:    sshll.4s v5, v0, #0
396; CHECK-NEXT:    cmgt.4s v0, v3, v1
397; CHECK-NEXT:    cmgt.4s v7, v2, v1
398; CHECK-NEXT:    cmgt.4s v6, v5, v1
399; CHECK-NEXT:    cmgt.4s v1, v4, v1
400; CHECK-NEXT:    and.16b v0, v3, v0
401; CHECK-NEXT:    and.16b v3, v4, v1
402; CHECK-NEXT:    and.16b v1, v2, v7
403; CHECK-NEXT:    and.16b v2, v5, v6
404; CHECK-NEXT:    ret
405entry:
406  %ext = sext <16 x i8> %a to <16 x i32>
407  %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
408  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
409  ret <16 x i32> %sel
410}
411
412define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
413; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32:
414; CHECK:       ; %bb.0:
415; CHECK-NEXT:    movi.4s v1, #10
416; CHECK-NEXT:    sshll2.4s v2, v0, #0
417; CHECK-NEXT:    sshll.4s v0, v0, #0
418; CHECK-NEXT:    cmeq.4s v3, v0, v1
419; CHECK-NEXT:    cmeq.4s v1, v2, v1
420; CHECK-NEXT:    and.16b v1, v2, v1
421; CHECK-NEXT:    and.16b v0, v0, v3
422; CHECK-NEXT:    ret
423  %ext = sext <8 x i16> %a to <8 x i32>
424  %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
425  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
426  ret <8 x i32> %sel
427}
428
429define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
430; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
431; CHECK:       ; %bb.0:
432; CHECK-NEXT:    ushll.4s v2, v0, #0
433; CHECK-NEXT:    ushll2.4s v0, v0, #0
434; CHECK-NEXT:    movi.4s v1, #10
435; CHECK-NEXT:    shl.4s v0, v0, #19
436; CHECK-NEXT:    shl.4s v2, v2, #19
437; CHECK-NEXT:    sshr.4s v0, v0, #19
438; CHECK-NEXT:    sshr.4s v2, v2, #19
439; CHECK-NEXT:    cmeq.4s v3, v2, v1
440; CHECK-NEXT:    cmeq.4s v1, v0, v1
441; CHECK-NEXT:    and.16b v1, v0, v1
442; CHECK-NEXT:    and.16b v0, v2, v3
443; CHECK-NEXT:    ret
444  %ext = sext <8 x i13> %a to <8 x i32>
445  %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
446  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
447  ret <8 x i32> %sel
448}
449
450define <16 x i32> @same_sext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) {
451; CHECK-LABEL: same_sext_used_in_cmp_ne_and_select_v8i32:
452; CHECK:       ; %bb.0:
453; CHECK-NEXT:    sshll.8h v2, v0, #0
454; CHECK-NEXT:    sshll2.8h v0, v0, #0
455; CHECK-NEXT:    movi.4s v1, #10
456; CHECK-NEXT:    sshll.4s v3, v2, #0
457; CHECK-NEXT:    sshll2.4s v4, v0, #0
458; CHECK-NEXT:    sshll2.4s v2, v2, #0
459; CHECK-NEXT:    sshll.4s v5, v0, #0
460; CHECK-NEXT:    cmeq.4s v0, v3, v1
461; CHECK-NEXT:    cmeq.4s v7, v2, v1
462; CHECK-NEXT:    cmeq.4s v6, v5, v1
463; CHECK-NEXT:    cmeq.4s v1, v4, v1
464; CHECK-NEXT:    bic.16b v0, v3, v0
465; CHECK-NEXT:    bic.16b v3, v4, v1
466; CHECK-NEXT:    bic.16b v1, v2, v7
467; CHECK-NEXT:    bic.16b v2, v5, v6
468; CHECK-NEXT:    ret
469  %ext = sext <16 x i8> %a to <16 x i32>
470  %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
471  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
472  ret <16 x i32> %sel
473}
474
475define <8 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v8i32(<8 x i16> %a) {
476; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v8i32:
477; CHECK:       ; %bb.0: ; %entry
478; CHECK-NEXT:    movi.4s v1, #10
479; CHECK-NEXT:    sshll2.4s v2, v0, #0
480; CHECK-NEXT:    sshll.4s v0, v0, #0
481; CHECK-NEXT:    cmgt.4s v3, v0, v1
482; CHECK-NEXT:    cmgt.4s v1, v2, v1
483; CHECK-NEXT:    and.16b v1, v2, v1
484; CHECK-NEXT:    and.16b v0, v0, v3
485; CHECK-NEXT:    ret
486entry:
487  %ext = sext <8 x i16> %a to <8 x i32>
488  %cmp = icmp sgt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
489  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
490  ret <8 x i32> %sel
491}
492
493define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
494; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
495; CHECK:       ; %bb.0:
496; CHECK-NEXT:    ushll.4s v2, v0, #0
497; CHECK-NEXT:    ushll2.4s v0, v0, #0
498; CHECK-NEXT:    movi.4s v1, #10
499; CHECK-NEXT:    shl.4s v0, v0, #17
500; CHECK-NEXT:    shl.4s v2, v2, #17
501; CHECK-NEXT:    sshr.4s v0, v0, #17
502; CHECK-NEXT:    sshr.4s v2, v2, #17
503; CHECK-NEXT:    cmge.4s v3, v2, v1
504; CHECK-NEXT:    cmge.4s v1, v0, v1
505; CHECK-NEXT:    and.16b v1, v0, v1
506; CHECK-NEXT:    and.16b v0, v2, v3
507; CHECK-NEXT:    ret
508  %ext = sext <8 x i15> %a to <8 x i32>
509  %cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
510  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
511  ret <8 x i32> %sel
512}
513
514define <16 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) {
515; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select:
516; CHECK:       ; %bb.0: ; %entry
517; CHECK-NEXT:    movi.16b v1, #10
518; CHECK-NEXT:    sshll.8h v2, v0, #0
519; CHECK-NEXT:    ext.16b v4, v2, v2, #8
520; CHECK-NEXT:    cmhi.16b v1, v0, v1
521; CHECK-NEXT:    sshll2.8h v0, v0, #0
522; CHECK-NEXT:    sshll.8h v3, v1, #0
523; CHECK-NEXT:    sshll2.8h v1, v1, #0
524; CHECK-NEXT:    ext.16b v5, v0, v0, #8
525; CHECK-NEXT:    ext.16b v6, v3, v3, #8
526; CHECK-NEXT:    ext.16b v7, v1, v1, #8
527; CHECK-NEXT:    and.8b v2, v2, v3
528; CHECK-NEXT:    and.8b v1, v0, v1
529; CHECK-NEXT:    sshll.4s v0, v2, #0
530; CHECK-NEXT:    and.8b v3, v5, v7
531; CHECK-NEXT:    and.8b v4, v4, v6
532; CHECK-NEXT:    sshll.4s v2, v1, #0
533; CHECK-NEXT:    sshll.4s v3, v3, #0
534; CHECK-NEXT:    sshll.4s v1, v4, #0
535; CHECK-NEXT:    ret
536entry:
537  %ext = sext <16 x i8> %a to <16 x i32>
538  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
539  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
540  ret <16 x i32> %sel
541}
542
543define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) {
544; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred:
545; CHECK:       ; %bb.0: ; %entry
546; CHECK-NEXT:    cmge.16b v1, v0, #0
547; CHECK-NEXT:    ushll.8h v2, v0, #0
548; CHECK-NEXT:    ushll2.8h v0, v0, #0
549; CHECK-NEXT:    sshll.8h v3, v1, #0
550; CHECK-NEXT:    sshll2.8h v1, v1, #0
551; CHECK-NEXT:    ushll.4s v4, v2, #0
552; CHECK-NEXT:    ushll.4s v5, v0, #0
553; CHECK-NEXT:    ushll2.4s v2, v2, #0
554; CHECK-NEXT:    ushll2.4s v6, v0, #0
555; CHECK-NEXT:    sshll.4s v0, v3, #0
556; CHECK-NEXT:    sshll.4s v7, v1, #0
557; CHECK-NEXT:    sshll2.4s v16, v3, #0
558; CHECK-NEXT:    sshll2.4s v1, v1, #0
559; CHECK-NEXT:    and.16b v0, v4, v0
560; CHECK-NEXT:    and.16b v3, v6, v1
561; CHECK-NEXT:    and.16b v1, v2, v16
562; CHECK-NEXT:    and.16b v2, v5, v7
563; CHECK-NEXT:    ret
564entry:
565  %ext = zext <16 x i8> %a to <16 x i32>
566  %cmp = icmp sgt <16 x i8> %a,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
567  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
568  ret <16 x i32> %sel
569}
570
571define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) {
572; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32:
573; CHECK:       ; %bb.0: ; %entry
574; CHECK-NEXT:  Lloh2:
575; CHECK-NEXT:    adrp x8, lCPI24_0@PAGE
576; CHECK-NEXT:  Lloh3:
577; CHECK-NEXT:    adrp x9, lCPI24_1@PAGE
578; CHECK-NEXT:  Lloh4:
579; CHECK-NEXT:    adrp x10, lCPI24_2@PAGE
580; CHECK-NEXT:  Lloh5:
581; CHECK-NEXT:    ldr q0, [x8, lCPI24_0@PAGEOFF]
582; CHECK-NEXT:  Lloh6:
583; CHECK-NEXT:    adrp x8, lCPI24_3@PAGE
584; CHECK-NEXT:  Lloh7:
585; CHECK-NEXT:    ldr q1, [x9, lCPI24_1@PAGEOFF]
586; CHECK-NEXT:  Lloh8:
587; CHECK-NEXT:    ldr q2, [x10, lCPI24_2@PAGEOFF]
588; CHECK-NEXT:  Lloh9:
589; CHECK-NEXT:    ldr q3, [x8, lCPI24_3@PAGEOFF]
590; CHECK-NEXT:    mov x8, xzr
591; CHECK-NEXT:  LBB24_1: ; %loop
592; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
593; CHECK-NEXT:    ldr q4, [x0, x8]
594; CHECK-NEXT:    add x8, x8, #16
595; CHECK-NEXT:    cmp x8, #128
596; CHECK-NEXT:    cmge.16b v5, v4, #0
597; CHECK-NEXT:    tbl.16b v7, { v4 }, v0
598; CHECK-NEXT:    tbl.16b v16, { v4 }, v1
599; CHECK-NEXT:    tbl.16b v18, { v4 }, v2
600; CHECK-NEXT:    tbl.16b v4, { v4 }, v3
601; CHECK-NEXT:    sshll2.8h v6, v5, #0
602; CHECK-NEXT:    sshll.8h v5, v5, #0
603; CHECK-NEXT:    sshll2.4s v17, v6, #0
604; CHECK-NEXT:    sshll.4s v6, v6, #0
605; CHECK-NEXT:    sshll2.4s v19, v5, #0
606; CHECK-NEXT:    sshll.4s v5, v5, #0
607; CHECK-NEXT:    and.16b v7, v7, v17
608; CHECK-NEXT:    and.16b v6, v16, v6
609; CHECK-NEXT:    and.16b v16, v18, v19
610; CHECK-NEXT:    and.16b v4, v4, v5
611; CHECK-NEXT:    stp q6, q7, [x1, #32]
612; CHECK-NEXT:    stp q4, q16, [x1], #64
613; CHECK-NEXT:    b.ne LBB24_1
614; CHECK-NEXT:  ; %bb.2: ; %exit
615; CHECK-NEXT:    ret
616; CHECK-NEXT:    .loh AdrpLdr Lloh6, Lloh9
617; CHECK-NEXT:    .loh AdrpLdr Lloh4, Lloh8
618; CHECK-NEXT:    .loh AdrpLdr Lloh3, Lloh7
619; CHECK-NEXT:    .loh AdrpAdrp Lloh2, Lloh6
620; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh5
621entry:
622  br label %loop
623
624loop:
625  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
626  %src.gep = getelementptr i8, ptr %src, i64 %iv
627  %load = load <16 x i8>, ptr %src.gep
628  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
629  %ext = zext <16 x i8> %load to <16 x i32>
630  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
631  %dst.gep = getelementptr i32, ptr %dst, i64 %iv
632  store <16 x i32> %sel, ptr %dst.gep
633  %iv.next = add nuw i64 %iv, 16
634  %ec = icmp eq i64 %iv.next, 128
635  br i1 %ec, label %exit, label %loop
636
637exit:
638  ret void
639}
640
641define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) {
642; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32:
643; CHECK:       ; %bb.0: ; %entry
644; CHECK-NEXT:  Lloh10:
645; CHECK-NEXT:    adrp x8, lCPI25_0@PAGE
646; CHECK-NEXT:  Lloh11:
647; CHECK-NEXT:    adrp x9, lCPI25_1@PAGE
648; CHECK-NEXT:  Lloh12:
649; CHECK-NEXT:    adrp x10, lCPI25_2@PAGE
650; CHECK-NEXT:  Lloh13:
651; CHECK-NEXT:    ldr q0, [x8, lCPI25_0@PAGEOFF]
652; CHECK-NEXT:  Lloh14:
653; CHECK-NEXT:    adrp x8, lCPI25_3@PAGE
654; CHECK-NEXT:  Lloh15:
655; CHECK-NEXT:    ldr q1, [x9, lCPI25_1@PAGEOFF]
656; CHECK-NEXT:  Lloh16:
657; CHECK-NEXT:    ldr q2, [x10, lCPI25_2@PAGEOFF]
658; CHECK-NEXT:  Lloh17:
659; CHECK-NEXT:    ldr q3, [x8, lCPI25_3@PAGEOFF]
660; CHECK-NEXT:    mov x8, xzr
661; CHECK-NEXT:  LBB25_1: ; %loop
662; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
663; CHECK-NEXT:    ldr q4, [x0, x8]
664; CHECK-NEXT:    add x8, x8, #16
665; CHECK-NEXT:    cmp x8, #128
666; CHECK-NEXT:    cmge.16b v5, v4, #0
667; CHECK-NEXT:    tbl.16b v7, { v4 }, v0
668; CHECK-NEXT:    tbl.16b v16, { v4 }, v1
669; CHECK-NEXT:    tbl.16b v18, { v4 }, v2
670; CHECK-NEXT:    tbl.16b v4, { v4 }, v3
671; CHECK-NEXT:    sshll2.8h v6, v5, #0
672; CHECK-NEXT:    sshll.8h v5, v5, #0
673; CHECK-NEXT:    sshll2.4s v17, v6, #0
674; CHECK-NEXT:    sshll.4s v6, v6, #0
675; CHECK-NEXT:    sshll2.4s v19, v5, #0
676; CHECK-NEXT:    sshll.4s v5, v5, #0
677; CHECK-NEXT:    and.16b v7, v7, v17
678; CHECK-NEXT:    and.16b v6, v16, v6
679; CHECK-NEXT:    and.16b v16, v18, v19
680; CHECK-NEXT:    and.16b v4, v4, v5
681; CHECK-NEXT:    stp q6, q7, [x1, #32]
682; CHECK-NEXT:    stp q4, q16, [x1], #64
683; CHECK-NEXT:    b.ne LBB25_1
684; CHECK-NEXT:  ; %bb.2: ; %exit
685; CHECK-NEXT:    ret
686; CHECK-NEXT:    .loh AdrpLdr Lloh14, Lloh17
687; CHECK-NEXT:    .loh AdrpLdr Lloh12, Lloh16
688; CHECK-NEXT:    .loh AdrpLdr Lloh11, Lloh15
689; CHECK-NEXT:    .loh AdrpAdrp Lloh10, Lloh14
690; CHECK-NEXT:    .loh AdrpLdr Lloh10, Lloh13
691entry:
692  br label %loop
693
694loop:
695  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
696  %src.gep = getelementptr i8, ptr %src, i64 %iv
697  %load = load <16 x i8>, ptr %src.gep
698  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
699  %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15>
700  %ext = bitcast <64 x i8> %ext.shuf to <16 x i32>
701  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
702  %dst.gep = getelementptr i32, ptr %dst, i64 %iv
703  store <16 x i32> %sel, ptr %dst.gep
704  %iv.next = add nuw i64 %iv, 16
705  %ec = icmp eq i64 %iv.next, 128
706  br i1 %ec, label %exit, label %loop
707
708exit:
709  ret void
710}
711
712define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) {
713; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32:
714; CHECK:       ; %bb.0: ; %entry
715; CHECK-NEXT:  Lloh18:
716; CHECK-NEXT:    adrp x8, lCPI26_0@PAGE
717; CHECK-NEXT:  Lloh19:
718; CHECK-NEXT:    adrp x9, lCPI26_1@PAGE
719; CHECK-NEXT:  Lloh20:
720; CHECK-NEXT:    adrp x10, lCPI26_2@PAGE
721; CHECK-NEXT:  Lloh21:
722; CHECK-NEXT:    ldr q0, [x8, lCPI26_0@PAGEOFF]
723; CHECK-NEXT:  Lloh22:
724; CHECK-NEXT:    adrp x8, lCPI26_3@PAGE
725; CHECK-NEXT:  Lloh23:
726; CHECK-NEXT:    ldr q1, [x9, lCPI26_1@PAGEOFF]
727; CHECK-NEXT:  Lloh24:
728; CHECK-NEXT:    ldr q2, [x10, lCPI26_2@PAGEOFF]
729; CHECK-NEXT:  Lloh25:
730; CHECK-NEXT:    ldr q3, [x8, lCPI26_3@PAGEOFF]
731; CHECK-NEXT:    mov x8, xzr
732; CHECK-NEXT:  LBB26_1: ; %loop
733; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
734; CHECK-NEXT:    ldr q4, [x0, x8]
735; CHECK-NEXT:    add x8, x8, #16
736; CHECK-NEXT:    cmp x8, #128
737; CHECK-NEXT:    cmge.16b v5, v4, #0
738; CHECK-NEXT:    tbl.16b v7, { v4 }, v0
739; CHECK-NEXT:    tbl.16b v16, { v4 }, v1
740; CHECK-NEXT:    tbl.16b v18, { v4 }, v2
741; CHECK-NEXT:    tbl.16b v4, { v4 }, v3
742; CHECK-NEXT:    sshll2.8h v6, v5, #0
743; CHECK-NEXT:    sshll.8h v5, v5, #0
744; CHECK-NEXT:    sshll2.4s v17, v6, #0
745; CHECK-NEXT:    sshll.4s v6, v6, #0
746; CHECK-NEXT:    sshll2.4s v19, v5, #0
747; CHECK-NEXT:    sshll.4s v5, v5, #0
748; CHECK-NEXT:    and.16b v7, v7, v17
749; CHECK-NEXT:    and.16b v6, v16, v6
750; CHECK-NEXT:    and.16b v16, v18, v19
751; CHECK-NEXT:    and.16b v4, v4, v5
752; CHECK-NEXT:    stp q6, q7, [x1, #32]
753; CHECK-NEXT:    stp q4, q16, [x1], #64
754; CHECK-NEXT:    b.ne LBB26_1
755; CHECK-NEXT:  ; %bb.2: ; %exit
756; CHECK-NEXT:    ret
757; CHECK-NEXT:    .loh AdrpLdr Lloh22, Lloh25
758; CHECK-NEXT:    .loh AdrpLdr Lloh20, Lloh24
759; CHECK-NEXT:    .loh AdrpLdr Lloh19, Lloh23
760; CHECK-NEXT:    .loh AdrpAdrp Lloh18, Lloh22
761; CHECK-NEXT:    .loh AdrpLdr Lloh18, Lloh21
762entry:
763  br label %loop
764
765loop:
766  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
767  %src.gep = getelementptr i8, ptr %src, i64 %iv
768  %load = load <16 x i8>, ptr %src.gep
769  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
770  %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 1, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15>
771  %ext = bitcast <64 x i8> %ext.shuf to <16 x i32>
772  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
773  %dst.gep = getelementptr i32, ptr %dst, i64 %iv
774  store <16 x i32> %sel, ptr %dst.gep
775  %iv.next = add nuw i64 %iv, 16
776  %ec = icmp eq i64 %iv.next, 128
777  br i1 %ec, label %exit, label %loop
778
779exit:
780  ret void
781}
782