xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-masked-load.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE
4
5define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(ptr %dest, <4 x i32> %a) {
6; CHECK-LE-LABEL: masked_v4i32_align4_zero:
7; CHECK-LE:       @ %bb.0: @ %entry
8; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
9; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0]
10; CHECK-LE-NEXT:    bx lr
11;
12; CHECK-BE-LABEL: masked_v4i32_align4_zero:
13; CHECK-BE:       @ %bb.0: @ %entry
14; CHECK-BE-NEXT:    vrev64.32 q1, q0
15; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
16; CHECK-BE-NEXT:    vldrwt.u32 q1, [r0]
17; CHECK-BE-NEXT:    vrev64.32 q0, q1
18; CHECK-BE-NEXT:    bx lr
19entry:
20  %c = icmp sgt <4 x i32> %a, zeroinitializer
21  %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x i32> zeroinitializer)
22  ret <4 x i32> %l
23}
24
25define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_undef(ptr %dest, <4 x i32> %a) {
26; CHECK-LE-LABEL: masked_v4i32_align4_undef:
27; CHECK-LE:       @ %bb.0: @ %entry
28; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
29; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0]
30; CHECK-LE-NEXT:    bx lr
31;
32; CHECK-BE-LABEL: masked_v4i32_align4_undef:
33; CHECK-BE:       @ %bb.0: @ %entry
34; CHECK-BE-NEXT:    vrev64.32 q1, q0
35; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
36; CHECK-BE-NEXT:    vldrwt.u32 q1, [r0]
37; CHECK-BE-NEXT:    vrev64.32 q0, q1
38; CHECK-BE-NEXT:    bx lr
39entry:
40  %c = icmp sgt <4 x i32> %a, zeroinitializer
41  %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x i32> undef)
42  ret <4 x i32> %l
43}
44
45define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align1_undef(ptr %dest, <4 x i32> %a) {
46; CHECK-LE-LABEL: masked_v4i32_align1_undef:
47; CHECK-LE:       @ %bb.0: @ %entry
48; CHECK-LE-NEXT:    .pad #4
49; CHECK-LE-NEXT:    sub sp, #4
50; CHECK-LE-NEXT:    vcmp.s32 gt, q0, zr
51; CHECK-LE-NEXT:    @ implicit-def: $q0
52; CHECK-LE-NEXT:    vmrs r2, p0
53; CHECK-LE-NEXT:    and r1, r2, #1
54; CHECK-LE-NEXT:    rsbs r3, r1, #0
55; CHECK-LE-NEXT:    movs r1, #0
56; CHECK-LE-NEXT:    bfi r1, r3, #0, #1
57; CHECK-LE-NEXT:    ubfx r3, r2, #4, #1
58; CHECK-LE-NEXT:    rsbs r3, r3, #0
59; CHECK-LE-NEXT:    bfi r1, r3, #1, #1
60; CHECK-LE-NEXT:    ubfx r3, r2, #8, #1
61; CHECK-LE-NEXT:    ubfx r2, r2, #12, #1
62; CHECK-LE-NEXT:    rsbs r3, r3, #0
63; CHECK-LE-NEXT:    bfi r1, r3, #2, #1
64; CHECK-LE-NEXT:    rsbs r2, r2, #0
65; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
66; CHECK-LE-NEXT:    lsls r2, r1, #31
67; CHECK-LE-NEXT:    itt ne
68; CHECK-LE-NEXT:    ldrne r2, [r0]
69; CHECK-LE-NEXT:    vmovne.32 q0[0], r2
70; CHECK-LE-NEXT:    lsls r2, r1, #30
71; CHECK-LE-NEXT:    itt mi
72; CHECK-LE-NEXT:    ldrmi r2, [r0, #4]
73; CHECK-LE-NEXT:    vmovmi.32 q0[1], r2
74; CHECK-LE-NEXT:    lsls r2, r1, #29
75; CHECK-LE-NEXT:    itt mi
76; CHECK-LE-NEXT:    ldrmi r2, [r0, #8]
77; CHECK-LE-NEXT:    vmovmi.32 q0[2], r2
78; CHECK-LE-NEXT:    lsls r1, r1, #28
79; CHECK-LE-NEXT:    itt mi
80; CHECK-LE-NEXT:    ldrmi r0, [r0, #12]
81; CHECK-LE-NEXT:    vmovmi.32 q0[3], r0
82; CHECK-LE-NEXT:    add sp, #4
83; CHECK-LE-NEXT:    bx lr
84;
85; CHECK-BE-LABEL: masked_v4i32_align1_undef:
86; CHECK-BE:       @ %bb.0: @ %entry
87; CHECK-BE-NEXT:    .pad #4
88; CHECK-BE-NEXT:    sub sp, #4
89; CHECK-BE-NEXT:    vrev64.32 q1, q0
90; CHECK-BE-NEXT:    vcmp.s32 gt, q1, zr
91; CHECK-BE-NEXT:    @ implicit-def: $q1
92; CHECK-BE-NEXT:    vmrs r2, p0
93; CHECK-BE-NEXT:    ubfx r1, r2, #12, #1
94; CHECK-BE-NEXT:    rsbs r3, r1, #0
95; CHECK-BE-NEXT:    movs r1, #0
96; CHECK-BE-NEXT:    bfi r1, r3, #0, #1
97; CHECK-BE-NEXT:    ubfx r3, r2, #8, #1
98; CHECK-BE-NEXT:    rsbs r3, r3, #0
99; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
100; CHECK-BE-NEXT:    ubfx r3, r2, #4, #1
101; CHECK-BE-NEXT:    and r2, r2, #1
102; CHECK-BE-NEXT:    rsbs r3, r3, #0
103; CHECK-BE-NEXT:    bfi r1, r3, #2, #1
104; CHECK-BE-NEXT:    rsbs r2, r2, #0
105; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
106; CHECK-BE-NEXT:    lsls r2, r1, #28
107; CHECK-BE-NEXT:    itt mi
108; CHECK-BE-NEXT:    ldrmi r2, [r0]
109; CHECK-BE-NEXT:    vmovmi.32 q1[0], r2
110; CHECK-BE-NEXT:    lsls r2, r1, #29
111; CHECK-BE-NEXT:    itt mi
112; CHECK-BE-NEXT:    ldrmi r2, [r0, #4]
113; CHECK-BE-NEXT:    vmovmi.32 q1[1], r2
114; CHECK-BE-NEXT:    lsls r2, r1, #30
115; CHECK-BE-NEXT:    itt mi
116; CHECK-BE-NEXT:    ldrmi r2, [r0, #8]
117; CHECK-BE-NEXT:    vmovmi.32 q1[2], r2
118; CHECK-BE-NEXT:    lsls r1, r1, #31
119; CHECK-BE-NEXT:    itt ne
120; CHECK-BE-NEXT:    ldrne r0, [r0, #12]
121; CHECK-BE-NEXT:    vmovne.32 q1[3], r0
122; CHECK-BE-NEXT:    vrev64.32 q0, q1
123; CHECK-BE-NEXT:    add sp, #4
124; CHECK-BE-NEXT:    bx lr
125entry:
126  %c = icmp sgt <4 x i32> %a, zeroinitializer
127  %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i32> undef)
128  ret <4 x i32> %l
129}
130
131define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_other(ptr %dest, <4 x i32> %a) {
132; CHECK-LE-LABEL: masked_v4i32_align4_other:
133; CHECK-LE:       @ %bb.0: @ %entry
134; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
135; CHECK-LE-NEXT:    vldrwt.u32 q1, [r0]
136; CHECK-LE-NEXT:    vpsel q0, q1, q0
137; CHECK-LE-NEXT:    bx lr
138;
139; CHECK-BE-LABEL: masked_v4i32_align4_other:
140; CHECK-BE:       @ %bb.0: @ %entry
141; CHECK-BE-NEXT:    vrev64.32 q1, q0
142; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
143; CHECK-BE-NEXT:    vldrwt.u32 q0, [r0]
144; CHECK-BE-NEXT:    vpsel q1, q0, q1
145; CHECK-BE-NEXT:    vrev64.32 q0, q1
146; CHECK-BE-NEXT:    bx lr
147entry:
148  %c = icmp sgt <4 x i32> %a, zeroinitializer
149  %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x i32> %a)
150  ret <4 x i32> %l
151}
152
153define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_zero(ptr %dest, <4 x i32> %a) {
154; CHECK-LE-LABEL: zext16_masked_v4i32_align2_zero:
155; CHECK-LE:       @ %bb.0: @ %entry
156; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
157; CHECK-LE-NEXT:    vldrht.u32 q0, [r0]
158; CHECK-LE-NEXT:    bx lr
159;
160; CHECK-BE-LABEL: zext16_masked_v4i32_align2_zero:
161; CHECK-BE:       @ %bb.0: @ %entry
162; CHECK-BE-NEXT:    vrev64.32 q1, q0
163; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
164; CHECK-BE-NEXT:    vldrht.u32 q1, [r0]
165; CHECK-BE-NEXT:    vrev64.32 q0, q1
166; CHECK-BE-NEXT:    bx lr
167entry:
168  %c = icmp sgt <4 x i32> %a, zeroinitializer
169  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer)
170  %ext = zext <4 x i16> %l to <4 x i32>
171  ret <4 x i32> %ext
172}
173
174define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_undef(ptr %dest, <4 x i32> %a) {
175; CHECK-LE-LABEL: zext16_masked_v4i32_align2_undef:
176; CHECK-LE:       @ %bb.0: @ %entry
177; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
178; CHECK-LE-NEXT:    vldrht.u32 q0, [r0]
179; CHECK-LE-NEXT:    bx lr
180;
181; CHECK-BE-LABEL: zext16_masked_v4i32_align2_undef:
182; CHECK-BE:       @ %bb.0: @ %entry
183; CHECK-BE-NEXT:    vrev64.32 q1, q0
184; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
185; CHECK-BE-NEXT:    vldrht.u32 q1, [r0]
186; CHECK-BE-NEXT:    vrev64.32 q0, q1
187; CHECK-BE-NEXT:    bx lr
188entry:
189  %c = icmp sgt <4 x i32> %a, zeroinitializer
190  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> undef)
191  %ext = zext <4 x i16> %l to <4 x i32>
192  ret <4 x i32> %ext
193}
194
195define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align1_undef(ptr %dest, <4 x i32> %a) {
196; CHECK-LE-LABEL: zext16_masked_v4i32_align1_undef:
197; CHECK-LE:       @ %bb.0: @ %entry
198; CHECK-LE-NEXT:    .pad #4
199; CHECK-LE-NEXT:    sub sp, #4
200; CHECK-LE-NEXT:    vcmp.s32 gt, q0, zr
201; CHECK-LE-NEXT:    @ implicit-def: $q0
202; CHECK-LE-NEXT:    vmrs r2, p0
203; CHECK-LE-NEXT:    and r1, r2, #1
204; CHECK-LE-NEXT:    rsbs r3, r1, #0
205; CHECK-LE-NEXT:    movs r1, #0
206; CHECK-LE-NEXT:    bfi r1, r3, #0, #1
207; CHECK-LE-NEXT:    ubfx r3, r2, #4, #1
208; CHECK-LE-NEXT:    rsbs r3, r3, #0
209; CHECK-LE-NEXT:    bfi r1, r3, #1, #1
210; CHECK-LE-NEXT:    ubfx r3, r2, #8, #1
211; CHECK-LE-NEXT:    ubfx r2, r2, #12, #1
212; CHECK-LE-NEXT:    rsbs r3, r3, #0
213; CHECK-LE-NEXT:    bfi r1, r3, #2, #1
214; CHECK-LE-NEXT:    rsbs r2, r2, #0
215; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
216; CHECK-LE-NEXT:    lsls r2, r1, #31
217; CHECK-LE-NEXT:    itt ne
218; CHECK-LE-NEXT:    ldrhne r2, [r0]
219; CHECK-LE-NEXT:    vmovne.32 q0[0], r2
220; CHECK-LE-NEXT:    lsls r2, r1, #30
221; CHECK-LE-NEXT:    itt mi
222; CHECK-LE-NEXT:    ldrhmi r2, [r0, #2]
223; CHECK-LE-NEXT:    vmovmi.32 q0[1], r2
224; CHECK-LE-NEXT:    lsls r2, r1, #29
225; CHECK-LE-NEXT:    itt mi
226; CHECK-LE-NEXT:    ldrhmi r2, [r0, #4]
227; CHECK-LE-NEXT:    vmovmi.32 q0[2], r2
228; CHECK-LE-NEXT:    lsls r1, r1, #28
229; CHECK-LE-NEXT:    itt mi
230; CHECK-LE-NEXT:    ldrhmi r0, [r0, #6]
231; CHECK-LE-NEXT:    vmovmi.32 q0[3], r0
232; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
233; CHECK-LE-NEXT:    add sp, #4
234; CHECK-LE-NEXT:    bx lr
235;
236; CHECK-BE-LABEL: zext16_masked_v4i32_align1_undef:
237; CHECK-BE:       @ %bb.0: @ %entry
238; CHECK-BE-NEXT:    .pad #4
239; CHECK-BE-NEXT:    sub sp, #4
240; CHECK-BE-NEXT:    vrev64.32 q1, q0
241; CHECK-BE-NEXT:    @ implicit-def: $q0
242; CHECK-BE-NEXT:    vcmp.s32 gt, q1, zr
243; CHECK-BE-NEXT:    vmrs r2, p0
244; CHECK-BE-NEXT:    ubfx r1, r2, #12, #1
245; CHECK-BE-NEXT:    rsbs r3, r1, #0
246; CHECK-BE-NEXT:    movs r1, #0
247; CHECK-BE-NEXT:    bfi r1, r3, #0, #1
248; CHECK-BE-NEXT:    ubfx r3, r2, #8, #1
249; CHECK-BE-NEXT:    rsbs r3, r3, #0
250; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
251; CHECK-BE-NEXT:    ubfx r3, r2, #4, #1
252; CHECK-BE-NEXT:    and r2, r2, #1
253; CHECK-BE-NEXT:    rsbs r3, r3, #0
254; CHECK-BE-NEXT:    bfi r1, r3, #2, #1
255; CHECK-BE-NEXT:    rsbs r2, r2, #0
256; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
257; CHECK-BE-NEXT:    lsls r2, r1, #28
258; CHECK-BE-NEXT:    itt mi
259; CHECK-BE-NEXT:    ldrhmi r2, [r0]
260; CHECK-BE-NEXT:    vmovmi.32 q0[0], r2
261; CHECK-BE-NEXT:    lsls r2, r1, #29
262; CHECK-BE-NEXT:    itt mi
263; CHECK-BE-NEXT:    ldrhmi r2, [r0, #2]
264; CHECK-BE-NEXT:    vmovmi.32 q0[1], r2
265; CHECK-BE-NEXT:    lsls r2, r1, #30
266; CHECK-BE-NEXT:    itt mi
267; CHECK-BE-NEXT:    ldrhmi r2, [r0, #4]
268; CHECK-BE-NEXT:    vmovmi.32 q0[2], r2
269; CHECK-BE-NEXT:    lsls r1, r1, #31
270; CHECK-BE-NEXT:    itt ne
271; CHECK-BE-NEXT:    ldrhne r0, [r0, #6]
272; CHECK-BE-NEXT:    vmovne.32 q0[3], r0
273; CHECK-BE-NEXT:    vmovlb.s16 q1, q0
274; CHECK-BE-NEXT:    vrev64.32 q0, q1
275; CHECK-BE-NEXT:    add sp, #4
276; CHECK-BE-NEXT:    bx lr
277entry:
278  %c = icmp sgt <4 x i32> %a, zeroinitializer
279  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i16> undef)
280  %ext = sext <4 x i16> %l to <4 x i32>
281  ret <4 x i32> %ext
282}
283
284define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_other(ptr %dest, <4 x i16> %a) {
285; CHECK-LE-LABEL: zext16_masked_v4i32_align2_other:
286; CHECK-LE:       @ %bb.0: @ %entry
287; CHECK-LE-NEXT:    vmovlb.u16 q1, q0
288; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
289; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
290; CHECK-LE-NEXT:    vldrht.u32 q0, [r0]
291; CHECK-LE-NEXT:    vpsel q0, q0, q1
292; CHECK-LE-NEXT:    bx lr
293;
294; CHECK-BE-LABEL: zext16_masked_v4i32_align2_other:
295; CHECK-BE:       @ %bb.0: @ %entry
296; CHECK-BE-NEXT:    vrev64.32 q1, q0
297; CHECK-BE-NEXT:    vmovlb.u16 q0, q1
298; CHECK-BE-NEXT:    vmovlb.s16 q1, q1
299; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
300; CHECK-BE-NEXT:    vldrht.u32 q1, [r0]
301; CHECK-BE-NEXT:    vpsel q1, q1, q0
302; CHECK-BE-NEXT:    vrev64.32 q0, q1
303; CHECK-BE-NEXT:    bx lr
304entry:
305  %c = icmp sgt <4 x i16> %a, zeroinitializer
306  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> %a)
307  %ext = zext <4 x i16> %l to <4 x i32>
308  ret <4 x i32> %ext
309}
310
311define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align2_zero(ptr %dest, <4 x i32> %a) {
312; CHECK-LE-LABEL: sext16_masked_v4i32_align2_zero:
313; CHECK-LE:       @ %bb.0: @ %entry
314; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
315; CHECK-LE-NEXT:    vldrht.s32 q0, [r0]
316; CHECK-LE-NEXT:    bx lr
317;
318; CHECK-BE-LABEL: sext16_masked_v4i32_align2_zero:
319; CHECK-BE:       @ %bb.0: @ %entry
320; CHECK-BE-NEXT:    vrev64.32 q1, q0
321; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
322; CHECK-BE-NEXT:    vldrht.s32 q1, [r0]
323; CHECK-BE-NEXT:    vrev64.32 q0, q1
324; CHECK-BE-NEXT:    bx lr
325entry:
326  %c = icmp sgt <4 x i32> %a, zeroinitializer
327  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer)
328  %sext = sext <4 x i16> %l to <4 x i32>
329  ret <4 x i32> %sext
330}
331
332define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align2_undef(ptr %dest, <4 x i32> %a) {
333; CHECK-LE-LABEL: sext16_masked_v4i32_align2_undef:
334; CHECK-LE:       @ %bb.0: @ %entry
335; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
336; CHECK-LE-NEXT:    vldrht.s32 q0, [r0]
337; CHECK-LE-NEXT:    bx lr
338;
339; CHECK-BE-LABEL: sext16_masked_v4i32_align2_undef:
340; CHECK-BE:       @ %bb.0: @ %entry
341; CHECK-BE-NEXT:    vrev64.32 q1, q0
342; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
343; CHECK-BE-NEXT:    vldrht.s32 q1, [r0]
344; CHECK-BE-NEXT:    vrev64.32 q0, q1
345; CHECK-BE-NEXT:    bx lr
346entry:
347  %c = icmp sgt <4 x i32> %a, zeroinitializer
348  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> undef)
349  %sext = sext <4 x i16> %l to <4 x i32>
350  ret <4 x i32> %sext
351}
352
353define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align1_undef(ptr %dest, <4 x i32> %a) {
354; CHECK-LE-LABEL: sext16_masked_v4i32_align1_undef:
355; CHECK-LE:       @ %bb.0: @ %entry
356; CHECK-LE-NEXT:    .pad #4
357; CHECK-LE-NEXT:    sub sp, #4
358; CHECK-LE-NEXT:    vcmp.s32 gt, q0, zr
359; CHECK-LE-NEXT:    @ implicit-def: $q0
360; CHECK-LE-NEXT:    vmrs r2, p0
361; CHECK-LE-NEXT:    and r1, r2, #1
362; CHECK-LE-NEXT:    rsbs r3, r1, #0
363; CHECK-LE-NEXT:    movs r1, #0
364; CHECK-LE-NEXT:    bfi r1, r3, #0, #1
365; CHECK-LE-NEXT:    ubfx r3, r2, #4, #1
366; CHECK-LE-NEXT:    rsbs r3, r3, #0
367; CHECK-LE-NEXT:    bfi r1, r3, #1, #1
368; CHECK-LE-NEXT:    ubfx r3, r2, #8, #1
369; CHECK-LE-NEXT:    ubfx r2, r2, #12, #1
370; CHECK-LE-NEXT:    rsbs r3, r3, #0
371; CHECK-LE-NEXT:    bfi r1, r3, #2, #1
372; CHECK-LE-NEXT:    rsbs r2, r2, #0
373; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
374; CHECK-LE-NEXT:    lsls r2, r1, #31
375; CHECK-LE-NEXT:    itt ne
376; CHECK-LE-NEXT:    ldrhne r2, [r0]
377; CHECK-LE-NEXT:    vmovne.32 q0[0], r2
378; CHECK-LE-NEXT:    lsls r2, r1, #30
379; CHECK-LE-NEXT:    itt mi
380; CHECK-LE-NEXT:    ldrhmi r2, [r0, #2]
381; CHECK-LE-NEXT:    vmovmi.32 q0[1], r2
382; CHECK-LE-NEXT:    lsls r2, r1, #29
383; CHECK-LE-NEXT:    itt mi
384; CHECK-LE-NEXT:    ldrhmi r2, [r0, #4]
385; CHECK-LE-NEXT:    vmovmi.32 q0[2], r2
386; CHECK-LE-NEXT:    lsls r1, r1, #28
387; CHECK-LE-NEXT:    itt mi
388; CHECK-LE-NEXT:    ldrhmi r0, [r0, #6]
389; CHECK-LE-NEXT:    vmovmi.32 q0[3], r0
390; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
391; CHECK-LE-NEXT:    add sp, #4
392; CHECK-LE-NEXT:    bx lr
393;
394; CHECK-BE-LABEL: sext16_masked_v4i32_align1_undef:
395; CHECK-BE:       @ %bb.0: @ %entry
396; CHECK-BE-NEXT:    .pad #4
397; CHECK-BE-NEXT:    sub sp, #4
398; CHECK-BE-NEXT:    vrev64.32 q1, q0
399; CHECK-BE-NEXT:    @ implicit-def: $q0
400; CHECK-BE-NEXT:    vcmp.s32 gt, q1, zr
401; CHECK-BE-NEXT:    vmrs r2, p0
402; CHECK-BE-NEXT:    ubfx r1, r2, #12, #1
403; CHECK-BE-NEXT:    rsbs r3, r1, #0
404; CHECK-BE-NEXT:    movs r1, #0
405; CHECK-BE-NEXT:    bfi r1, r3, #0, #1
406; CHECK-BE-NEXT:    ubfx r3, r2, #8, #1
407; CHECK-BE-NEXT:    rsbs r3, r3, #0
408; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
409; CHECK-BE-NEXT:    ubfx r3, r2, #4, #1
410; CHECK-BE-NEXT:    and r2, r2, #1
411; CHECK-BE-NEXT:    rsbs r3, r3, #0
412; CHECK-BE-NEXT:    bfi r1, r3, #2, #1
413; CHECK-BE-NEXT:    rsbs r2, r2, #0
414; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
415; CHECK-BE-NEXT:    lsls r2, r1, #28
416; CHECK-BE-NEXT:    itt mi
417; CHECK-BE-NEXT:    ldrhmi r2, [r0]
418; CHECK-BE-NEXT:    vmovmi.32 q0[0], r2
419; CHECK-BE-NEXT:    lsls r2, r1, #29
420; CHECK-BE-NEXT:    itt mi
421; CHECK-BE-NEXT:    ldrhmi r2, [r0, #2]
422; CHECK-BE-NEXT:    vmovmi.32 q0[1], r2
423; CHECK-BE-NEXT:    lsls r2, r1, #30
424; CHECK-BE-NEXT:    itt mi
425; CHECK-BE-NEXT:    ldrhmi r2, [r0, #4]
426; CHECK-BE-NEXT:    vmovmi.32 q0[2], r2
427; CHECK-BE-NEXT:    lsls r1, r1, #31
428; CHECK-BE-NEXT:    itt ne
429; CHECK-BE-NEXT:    ldrhne r0, [r0, #6]
430; CHECK-BE-NEXT:    vmovne.32 q0[3], r0
431; CHECK-BE-NEXT:    vmovlb.s16 q1, q0
432; CHECK-BE-NEXT:    vrev64.32 q0, q1
433; CHECK-BE-NEXT:    add sp, #4
434; CHECK-BE-NEXT:    bx lr
435entry:
436  %c = icmp sgt <4 x i32> %a, zeroinitializer
437  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i16> undef)
438  %sext = sext <4 x i16> %l to <4 x i32>
439  ret <4 x i32> %sext
440}
441
442define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align2_other(ptr %dest, <4 x i16> %a) {
443; CHECK-LE-LABEL: sext16_masked_v4i32_align2_other:
444; CHECK-LE:       @ %bb.0: @ %entry
445; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
446; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
447; CHECK-LE-NEXT:    vldrht.s32 q1, [r0]
448; CHECK-LE-NEXT:    vpsel q0, q1, q0
449; CHECK-LE-NEXT:    bx lr
450;
451; CHECK-BE-LABEL: sext16_masked_v4i32_align2_other:
452; CHECK-BE:       @ %bb.0: @ %entry
453; CHECK-BE-NEXT:    vrev64.32 q1, q0
454; CHECK-BE-NEXT:    vmovlb.s16 q0, q1
455; CHECK-BE-NEXT:    vpt.s32 gt, q0, zr
456; CHECK-BE-NEXT:    vldrht.s32 q1, [r0]
457; CHECK-BE-NEXT:    vpsel q1, q1, q0
458; CHECK-BE-NEXT:    vrev64.32 q0, q1
459; CHECK-BE-NEXT:    bx lr
460entry:
461  %c = icmp sgt <4 x i16> %a, zeroinitializer
462  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> %a)
463  %sext = sext <4 x i16> %l to <4 x i32>
464  ret <4 x i32> %sext
465}
466
467define arm_aapcs_vfpcc ptr @masked_v4i32_preinc(ptr %x, ptr %y, <4 x i32> %a) {
468; CHECK-LE-LABEL: masked_v4i32_preinc:
469; CHECK-LE:       @ %bb.0: @ %entry
470; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
471; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0, #4]!
472; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
473; CHECK-LE-NEXT:    bx lr
474;
475; CHECK-BE-LABEL: masked_v4i32_preinc:
476; CHECK-BE:       @ %bb.0: @ %entry
477; CHECK-BE-NEXT:    vrev64.32 q1, q0
478; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
479; CHECK-BE-NEXT:    vldrwt.u32 q0, [r0, #4]!
480; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
481; CHECK-BE-NEXT:    bx lr
482entry:
483  %z = getelementptr inbounds i8, ptr %x, i32 4
484  %c = icmp sgt <4 x i32> %a, zeroinitializer
485  %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef)
486  store <4 x i32> %0, ptr %y, align 4
487  ret ptr %z
488}
489
490define arm_aapcs_vfpcc ptr @masked_v4i32_postinc(ptr %x, ptr %y, <4 x i32> %a) {
491; CHECK-LE-LABEL: masked_v4i32_postinc:
492; CHECK-LE:       @ %bb.0: @ %entry
493; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
494; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0], #4
495; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
496; CHECK-LE-NEXT:    bx lr
497;
498; CHECK-BE-LABEL: masked_v4i32_postinc:
499; CHECK-BE:       @ %bb.0: @ %entry
500; CHECK-BE-NEXT:    vrev64.32 q1, q0
501; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
502; CHECK-BE-NEXT:    vldrwt.u32 q0, [r0], #4
503; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
504; CHECK-BE-NEXT:    bx lr
505entry:
506  %z = getelementptr inbounds i8, ptr %x, i32 4
507  %c = icmp sgt <4 x i32> %a, zeroinitializer
508  %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %x, i32 4, <4 x i1> %c, <4 x i32> undef)
509  store <4 x i32> %0, ptr %y, align 4
510  ret ptr %z
511}
512
513define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_zero(ptr %dest, <8 x i16> %a) {
514; CHECK-LE-LABEL: masked_v8i16_align4_zero:
515; CHECK-LE:       @ %bb.0: @ %entry
516; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
517; CHECK-LE-NEXT:    vldrht.u16 q0, [r0]
518; CHECK-LE-NEXT:    bx lr
519;
520; CHECK-BE-LABEL: masked_v8i16_align4_zero:
521; CHECK-BE:       @ %bb.0: @ %entry
522; CHECK-BE-NEXT:    vrev64.16 q1, q0
523; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
524; CHECK-BE-NEXT:    vldrht.u16 q1, [r0]
525; CHECK-BE-NEXT:    vrev64.16 q0, q1
526; CHECK-BE-NEXT:    bx lr
527entry:
528  %c = icmp sgt <8 x i16> %a, zeroinitializer
529  %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x i16> zeroinitializer)
530  ret <8 x i16> %l
531}
532
533define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align2_undef(ptr %dest, <8 x i16> %a) {
534; CHECK-LE-LABEL: masked_v8i16_align2_undef:
535; CHECK-LE:       @ %bb.0: @ %entry
536; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
537; CHECK-LE-NEXT:    vldrht.u16 q0, [r0]
538; CHECK-LE-NEXT:    bx lr
539;
540; CHECK-BE-LABEL: masked_v8i16_align2_undef:
541; CHECK-BE:       @ %bb.0: @ %entry
542; CHECK-BE-NEXT:    vrev64.16 q1, q0
543; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
544; CHECK-BE-NEXT:    vldrht.u16 q1, [r0]
545; CHECK-BE-NEXT:    vrev64.16 q0, q1
546; CHECK-BE-NEXT:    bx lr
547entry:
548  %c = icmp sgt <8 x i16> %a, zeroinitializer
549  %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x i16> undef)
550  ret <8 x i16> %l
551}
552
553define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align1_undef(ptr %dest, <8 x i16> %a) {
554; CHECK-LE-LABEL: masked_v8i16_align1_undef:
555; CHECK-LE:       @ %bb.0: @ %entry
556; CHECK-LE-NEXT:    .pad #4
557; CHECK-LE-NEXT:    sub sp, #4
558; CHECK-LE-NEXT:    vcmp.s16 gt, q0, zr
559; CHECK-LE-NEXT:    @ implicit-def: $q0
560; CHECK-LE-NEXT:    vmrs r1, p0
561; CHECK-LE-NEXT:    and r2, r1, #1
562; CHECK-LE-NEXT:    rsbs r3, r2, #0
563; CHECK-LE-NEXT:    movs r2, #0
564; CHECK-LE-NEXT:    bfi r2, r3, #0, #1
565; CHECK-LE-NEXT:    ubfx r3, r1, #2, #1
566; CHECK-LE-NEXT:    rsbs r3, r3, #0
567; CHECK-LE-NEXT:    bfi r2, r3, #1, #1
568; CHECK-LE-NEXT:    ubfx r3, r1, #4, #1
569; CHECK-LE-NEXT:    rsbs r3, r3, #0
570; CHECK-LE-NEXT:    bfi r2, r3, #2, #1
571; CHECK-LE-NEXT:    ubfx r3, r1, #6, #1
572; CHECK-LE-NEXT:    rsbs r3, r3, #0
573; CHECK-LE-NEXT:    bfi r2, r3, #3, #1
574; CHECK-LE-NEXT:    ubfx r3, r1, #8, #1
575; CHECK-LE-NEXT:    rsbs r3, r3, #0
576; CHECK-LE-NEXT:    bfi r2, r3, #4, #1
577; CHECK-LE-NEXT:    ubfx r3, r1, #10, #1
578; CHECK-LE-NEXT:    rsbs r3, r3, #0
579; CHECK-LE-NEXT:    bfi r2, r3, #5, #1
580; CHECK-LE-NEXT:    ubfx r3, r1, #12, #1
581; CHECK-LE-NEXT:    ubfx r1, r1, #14, #1
582; CHECK-LE-NEXT:    rsbs r3, r3, #0
583; CHECK-LE-NEXT:    bfi r2, r3, #6, #1
584; CHECK-LE-NEXT:    rsbs r1, r1, #0
585; CHECK-LE-NEXT:    bfi r2, r1, #7, #1
586; CHECK-LE-NEXT:    uxtb r1, r2
587; CHECK-LE-NEXT:    lsls r2, r2, #31
588; CHECK-LE-NEXT:    itt ne
589; CHECK-LE-NEXT:    ldrhne r2, [r0]
590; CHECK-LE-NEXT:    vmovne.16 q0[0], r2
591; CHECK-LE-NEXT:    lsls r2, r1, #30
592; CHECK-LE-NEXT:    itt mi
593; CHECK-LE-NEXT:    ldrhmi r2, [r0, #2]
594; CHECK-LE-NEXT:    vmovmi.16 q0[1], r2
595; CHECK-LE-NEXT:    lsls r2, r1, #29
596; CHECK-LE-NEXT:    itt mi
597; CHECK-LE-NEXT:    ldrhmi r2, [r0, #4]
598; CHECK-LE-NEXT:    vmovmi.16 q0[2], r2
599; CHECK-LE-NEXT:    lsls r2, r1, #28
600; CHECK-LE-NEXT:    itt mi
601; CHECK-LE-NEXT:    ldrhmi r2, [r0, #6]
602; CHECK-LE-NEXT:    vmovmi.16 q0[3], r2
603; CHECK-LE-NEXT:    lsls r2, r1, #27
604; CHECK-LE-NEXT:    itt mi
605; CHECK-LE-NEXT:    ldrhmi r2, [r0, #8]
606; CHECK-LE-NEXT:    vmovmi.16 q0[4], r2
607; CHECK-LE-NEXT:    lsls r2, r1, #26
608; CHECK-LE-NEXT:    itt mi
609; CHECK-LE-NEXT:    ldrhmi r2, [r0, #10]
610; CHECK-LE-NEXT:    vmovmi.16 q0[5], r2
611; CHECK-LE-NEXT:    lsls r2, r1, #25
612; CHECK-LE-NEXT:    itt mi
613; CHECK-LE-NEXT:    ldrhmi r2, [r0, #12]
614; CHECK-LE-NEXT:    vmovmi.16 q0[6], r2
615; CHECK-LE-NEXT:    lsls r1, r1, #24
616; CHECK-LE-NEXT:    itt mi
617; CHECK-LE-NEXT:    ldrhmi r0, [r0, #14]
618; CHECK-LE-NEXT:    vmovmi.16 q0[7], r0
619; CHECK-LE-NEXT:    add sp, #4
620; CHECK-LE-NEXT:    bx lr
621;
622; CHECK-BE-LABEL: masked_v8i16_align1_undef:
623; CHECK-BE:       @ %bb.0: @ %entry
624; CHECK-BE-NEXT:    .pad #4
625; CHECK-BE-NEXT:    sub sp, #4
626; CHECK-BE-NEXT:    vrev64.16 q1, q0
627; CHECK-BE-NEXT:    vcmp.s16 gt, q1, zr
628; CHECK-BE-NEXT:    @ implicit-def: $q1
629; CHECK-BE-NEXT:    vmrs r1, p0
630; CHECK-BE-NEXT:    ubfx r2, r1, #14, #1
631; CHECK-BE-NEXT:    rsbs r3, r2, #0
632; CHECK-BE-NEXT:    movs r2, #0
633; CHECK-BE-NEXT:    bfi r2, r3, #0, #1
634; CHECK-BE-NEXT:    ubfx r3, r1, #12, #1
635; CHECK-BE-NEXT:    rsbs r3, r3, #0
636; CHECK-BE-NEXT:    bfi r2, r3, #1, #1
637; CHECK-BE-NEXT:    ubfx r3, r1, #10, #1
638; CHECK-BE-NEXT:    rsbs r3, r3, #0
639; CHECK-BE-NEXT:    bfi r2, r3, #2, #1
640; CHECK-BE-NEXT:    ubfx r3, r1, #8, #1
641; CHECK-BE-NEXT:    rsbs r3, r3, #0
642; CHECK-BE-NEXT:    bfi r2, r3, #3, #1
643; CHECK-BE-NEXT:    ubfx r3, r1, #6, #1
644; CHECK-BE-NEXT:    rsbs r3, r3, #0
645; CHECK-BE-NEXT:    bfi r2, r3, #4, #1
646; CHECK-BE-NEXT:    ubfx r3, r1, #4, #1
647; CHECK-BE-NEXT:    rsbs r3, r3, #0
648; CHECK-BE-NEXT:    bfi r2, r3, #5, #1
649; CHECK-BE-NEXT:    ubfx r3, r1, #2, #1
650; CHECK-BE-NEXT:    and r1, r1, #1
651; CHECK-BE-NEXT:    rsbs r3, r3, #0
652; CHECK-BE-NEXT:    bfi r2, r3, #6, #1
653; CHECK-BE-NEXT:    rsbs r1, r1, #0
654; CHECK-BE-NEXT:    bfi r2, r1, #7, #1
655; CHECK-BE-NEXT:    uxtb r1, r2
656; CHECK-BE-NEXT:    lsls r2, r2, #24
657; CHECK-BE-NEXT:    itt mi
658; CHECK-BE-NEXT:    ldrhmi r2, [r0]
659; CHECK-BE-NEXT:    vmovmi.16 q1[0], r2
660; CHECK-BE-NEXT:    lsls r2, r1, #25
661; CHECK-BE-NEXT:    itt mi
662; CHECK-BE-NEXT:    ldrhmi r2, [r0, #2]
663; CHECK-BE-NEXT:    vmovmi.16 q1[1], r2
664; CHECK-BE-NEXT:    lsls r2, r1, #26
665; CHECK-BE-NEXT:    itt mi
666; CHECK-BE-NEXT:    ldrhmi r2, [r0, #4]
667; CHECK-BE-NEXT:    vmovmi.16 q1[2], r2
668; CHECK-BE-NEXT:    lsls r2, r1, #27
669; CHECK-BE-NEXT:    itt mi
670; CHECK-BE-NEXT:    ldrhmi r2, [r0, #6]
671; CHECK-BE-NEXT:    vmovmi.16 q1[3], r2
672; CHECK-BE-NEXT:    lsls r2, r1, #28
673; CHECK-BE-NEXT:    itt mi
674; CHECK-BE-NEXT:    ldrhmi r2, [r0, #8]
675; CHECK-BE-NEXT:    vmovmi.16 q1[4], r2
676; CHECK-BE-NEXT:    lsls r2, r1, #29
677; CHECK-BE-NEXT:    itt mi
678; CHECK-BE-NEXT:    ldrhmi r2, [r0, #10]
679; CHECK-BE-NEXT:    vmovmi.16 q1[5], r2
680; CHECK-BE-NEXT:    lsls r2, r1, #30
681; CHECK-BE-NEXT:    itt mi
682; CHECK-BE-NEXT:    ldrhmi r2, [r0, #12]
683; CHECK-BE-NEXT:    vmovmi.16 q1[6], r2
684; CHECK-BE-NEXT:    lsls r1, r1, #31
685; CHECK-BE-NEXT:    itt ne
686; CHECK-BE-NEXT:    ldrhne r0, [r0, #14]
687; CHECK-BE-NEXT:    vmovne.16 q1[7], r0
688; CHECK-BE-NEXT:    vrev64.16 q0, q1
689; CHECK-BE-NEXT:    add sp, #4
690; CHECK-BE-NEXT:    bx lr
691entry:
692  %c = icmp sgt <8 x i16> %a, zeroinitializer
693  %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i16> undef)
694  ret <8 x i16> %l
695}
696
697define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_other(ptr %dest, <8 x i16> %a) {
698; CHECK-LE-LABEL: masked_v8i16_align4_other:
699; CHECK-LE:       @ %bb.0: @ %entry
700; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
701; CHECK-LE-NEXT:    vldrht.u16 q1, [r0]
702; CHECK-LE-NEXT:    vpsel q0, q1, q0
703; CHECK-LE-NEXT:    bx lr
704;
705; CHECK-BE-LABEL: masked_v8i16_align4_other:
706; CHECK-BE:       @ %bb.0: @ %entry
707; CHECK-BE-NEXT:    vrev64.16 q1, q0
708; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
709; CHECK-BE-NEXT:    vldrht.u16 q0, [r0]
710; CHECK-BE-NEXT:    vpsel q1, q0, q1
711; CHECK-BE-NEXT:    vrev64.16 q0, q1
712; CHECK-BE-NEXT:    bx lr
713entry:
714  %c = icmp sgt <8 x i16> %a, zeroinitializer
715  %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x i16> %a)
716  ret <8 x i16> %l
717}
718
719define arm_aapcs_vfpcc <8 x i16> @sext8_masked_v8i16_align1_zero(ptr %dest, <8 x i8> %a) {
720; CHECK-LE-LABEL: sext8_masked_v8i16_align1_zero:
721; CHECK-LE:       @ %bb.0: @ %entry
722; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
723; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
724; CHECK-LE-NEXT:    vldrbt.s16 q0, [r0]
725; CHECK-LE-NEXT:    bx lr
726;
727; CHECK-BE-LABEL: sext8_masked_v8i16_align1_zero:
728; CHECK-BE:       @ %bb.0: @ %entry
729; CHECK-BE-NEXT:    vrev64.16 q1, q0
730; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
731; CHECK-BE-NEXT:    vpt.s16 gt, q0, zr
732; CHECK-BE-NEXT:    vldrbt.s16 q1, [r0]
733; CHECK-BE-NEXT:    vrev64.16 q0, q1
734; CHECK-BE-NEXT:    bx lr
735entry:
736  %c = icmp sgt <8 x i8> %a, zeroinitializer
737  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> zeroinitializer)
738  %ext = sext <8 x i8> %l to <8 x i16>
739  ret <8 x i16> %ext
740}
741
742define arm_aapcs_vfpcc <8 x i16> @sext8_masked_v8i16_align1_undef(ptr %dest, <8 x i8> %a) {
743; CHECK-LE-LABEL: sext8_masked_v8i16_align1_undef:
744; CHECK-LE:       @ %bb.0: @ %entry
745; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
746; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
747; CHECK-LE-NEXT:    vldrbt.s16 q0, [r0]
748; CHECK-LE-NEXT:    bx lr
749;
750; CHECK-BE-LABEL: sext8_masked_v8i16_align1_undef:
751; CHECK-BE:       @ %bb.0: @ %entry
752; CHECK-BE-NEXT:    vrev64.16 q1, q0
753; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
754; CHECK-BE-NEXT:    vpt.s16 gt, q0, zr
755; CHECK-BE-NEXT:    vldrbt.s16 q1, [r0]
756; CHECK-BE-NEXT:    vrev64.16 q0, q1
757; CHECK-BE-NEXT:    bx lr
758entry:
759  %c = icmp sgt <8 x i8> %a, zeroinitializer
760  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> undef)
761  %ext = sext <8 x i8> %l to <8 x i16>
762  ret <8 x i16> %ext
763}
764
765define arm_aapcs_vfpcc <8 x i16> @sext8_masked_v8i16_align1_other(ptr %dest, <8 x i8> %a) {
766; CHECK-LE-LABEL: sext8_masked_v8i16_align1_other:
767; CHECK-LE:       @ %bb.0: @ %entry
768; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
769; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
770; CHECK-LE-NEXT:    vldrbt.s16 q1, [r0]
771; CHECK-LE-NEXT:    vpsel q0, q1, q0
772; CHECK-LE-NEXT:    bx lr
773;
774; CHECK-BE-LABEL: sext8_masked_v8i16_align1_other:
775; CHECK-BE:       @ %bb.0: @ %entry
776; CHECK-BE-NEXT:    vrev64.16 q1, q0
777; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
778; CHECK-BE-NEXT:    vpt.s16 gt, q0, zr
779; CHECK-BE-NEXT:    vldrbt.s16 q1, [r0]
780; CHECK-BE-NEXT:    vpsel q1, q1, q0
781; CHECK-BE-NEXT:    vrev64.16 q0, q1
782; CHECK-BE-NEXT:    bx lr
783entry:
784  %c = icmp sgt <8 x i8> %a, zeroinitializer
785  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> %a)
786  %ext = sext <8 x i8> %l to <8 x i16>
787  ret <8 x i16> %ext
788}
789
790define arm_aapcs_vfpcc <4 x i32> @sext8_masked_v4i32_align1_zero(ptr %dest, <4 x i8> %a) {
791; CHECK-LE-LABEL: sext8_masked_v4i32_align1_zero:
792; CHECK-LE:       @ %bb.0: @ %entry
793; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
794; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
795; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
796; CHECK-LE-NEXT:    vldrbt.s32 q0, [r0]
797; CHECK-LE-NEXT:    bx lr
798;
799; CHECK-BE-LABEL: sext8_masked_v4i32_align1_zero:
800; CHECK-BE:       @ %bb.0: @ %entry
801; CHECK-BE-NEXT:    vrev64.32 q1, q0
802; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
803; CHECK-BE-NEXT:    vmovlb.s16 q0, q0
804; CHECK-BE-NEXT:    vpt.s32 gt, q0, zr
805; CHECK-BE-NEXT:    vldrbt.s32 q1, [r0]
806; CHECK-BE-NEXT:    vrev64.32 q0, q1
807; CHECK-BE-NEXT:    bx lr
808entry:
809  %c = icmp sgt <4 x i8> %a, zeroinitializer
810  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> zeroinitializer)
811  %ext = sext <4 x i8> %l to <4 x i32>
812  ret <4 x i32> %ext
813}
814
815define arm_aapcs_vfpcc <4 x i32> @sext8_masked_v4i32_align1_undef(ptr %dest, <4 x i8> %a) {
816; CHECK-LE-LABEL: sext8_masked_v4i32_align1_undef:
817; CHECK-LE:       @ %bb.0: @ %entry
818; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
819; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
820; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
821; CHECK-LE-NEXT:    vldrbt.s32 q0, [r0]
822; CHECK-LE-NEXT:    bx lr
823;
824; CHECK-BE-LABEL: sext8_masked_v4i32_align1_undef:
825; CHECK-BE:       @ %bb.0: @ %entry
826; CHECK-BE-NEXT:    vrev64.32 q1, q0
827; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
828; CHECK-BE-NEXT:    vmovlb.s16 q0, q0
829; CHECK-BE-NEXT:    vpt.s32 gt, q0, zr
830; CHECK-BE-NEXT:    vldrbt.s32 q1, [r0]
831; CHECK-BE-NEXT:    vrev64.32 q0, q1
832; CHECK-BE-NEXT:    bx lr
833entry:
834  %c = icmp sgt <4 x i8> %a, zeroinitializer
835  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> undef)
836  %ext = sext <4 x i8> %l to <4 x i32>
837  ret <4 x i32> %ext
838}
839
840define arm_aapcs_vfpcc <4 x i32> @sext8_masked_v4i32_align1_other(ptr %dest, <4 x i8> %a) {
841; CHECK-LE-LABEL: sext8_masked_v4i32_align1_other:
842; CHECK-LE:       @ %bb.0: @ %entry
843; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
844; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
845; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
846; CHECK-LE-NEXT:    vldrbt.s32 q1, [r0]
847; CHECK-LE-NEXT:    vpsel q0, q1, q0
848; CHECK-LE-NEXT:    bx lr
849;
850; CHECK-BE-LABEL: sext8_masked_v4i32_align1_other:
851; CHECK-BE:       @ %bb.0: @ %entry
852; CHECK-BE-NEXT:    vrev64.32 q1, q0
853; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
854; CHECK-BE-NEXT:    vmovlb.s16 q0, q0
855; CHECK-BE-NEXT:    vpt.s32 gt, q0, zr
856; CHECK-BE-NEXT:    vldrbt.s32 q1, [r0]
857; CHECK-BE-NEXT:    vpsel q1, q1, q0
858; CHECK-BE-NEXT:    vrev64.32 q0, q1
859; CHECK-BE-NEXT:    bx lr
860entry:
861  %c = icmp sgt <4 x i8> %a, zeroinitializer
862  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> %a)
863  %ext = sext <4 x i8> %l to <4 x i32>
864  ret <4 x i32> %ext
865}
866
867define arm_aapcs_vfpcc <4 x i32> @zext8_masked_v4i32_align1_zero(ptr %dest, <4 x i8> %a) {
868; CHECK-LE-LABEL: zext8_masked_v4i32_align1_zero:
869; CHECK-LE:       @ %bb.0: @ %entry
870; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
871; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
872; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
873; CHECK-LE-NEXT:    vldrbt.u32 q0, [r0]
874; CHECK-LE-NEXT:    bx lr
875;
876; CHECK-BE-LABEL: zext8_masked_v4i32_align1_zero:
877; CHECK-BE:       @ %bb.0: @ %entry
878; CHECK-BE-NEXT:    vrev64.32 q1, q0
879; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
880; CHECK-BE-NEXT:    vmovlb.s16 q0, q0
881; CHECK-BE-NEXT:    vpt.s32 gt, q0, zr
882; CHECK-BE-NEXT:    vldrbt.u32 q1, [r0]
883; CHECK-BE-NEXT:    vrev64.32 q0, q1
884; CHECK-BE-NEXT:    bx lr
885entry:
886  %c = icmp sgt <4 x i8> %a, zeroinitializer
887  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> zeroinitializer)
888  %ext = zext <4 x i8> %l to <4 x i32>
889  ret <4 x i32> %ext
890}
891
892define arm_aapcs_vfpcc <4 x i32> @zext8_masked_v4i32_align1_undef(ptr %dest, <4 x i8> %a) {
893; CHECK-LE-LABEL: zext8_masked_v4i32_align1_undef:
894; CHECK-LE:       @ %bb.0: @ %entry
895; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
896; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
897; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
898; CHECK-LE-NEXT:    vldrbt.u32 q0, [r0]
899; CHECK-LE-NEXT:    bx lr
900;
901; CHECK-BE-LABEL: zext8_masked_v4i32_align1_undef:
902; CHECK-BE:       @ %bb.0: @ %entry
903; CHECK-BE-NEXT:    vrev64.32 q1, q0
904; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
905; CHECK-BE-NEXT:    vmovlb.s16 q0, q0
906; CHECK-BE-NEXT:    vpt.s32 gt, q0, zr
907; CHECK-BE-NEXT:    vldrbt.u32 q1, [r0]
908; CHECK-BE-NEXT:    vrev64.32 q0, q1
909; CHECK-BE-NEXT:    bx lr
910entry:
911  %c = icmp sgt <4 x i8> %a, zeroinitializer
912  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> undef)
913  %ext = zext <4 x i8> %l to <4 x i32>
914  ret <4 x i32> %ext
915}
916
917define arm_aapcs_vfpcc <4 x i32> @zext8_masked_v4i32_align1_other(ptr %dest, <4 x i8> %a) {
918; CHECK-LE-LABEL: zext8_masked_v4i32_align1_other:
919; CHECK-LE:       @ %bb.0: @ %entry
920; CHECK-LE-NEXT:    vmov.i32 q1, #0xff
921; CHECK-LE-NEXT:    vand q1, q0, q1
922; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
923; CHECK-LE-NEXT:    vmovlb.s16 q0, q0
924; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
925; CHECK-LE-NEXT:    vldrbt.u32 q0, [r0]
926; CHECK-LE-NEXT:    vpsel q0, q0, q1
927; CHECK-LE-NEXT:    bx lr
928;
929; CHECK-BE-LABEL: zext8_masked_v4i32_align1_other:
930; CHECK-BE:       @ %bb.0: @ %entry
931; CHECK-BE-NEXT:    vmov.i32 q1, #0xff
932; CHECK-BE-NEXT:    vrev64.32 q2, q0
933; CHECK-BE-NEXT:    vand q0, q2, q1
934; CHECK-BE-NEXT:    vmovlb.s8 q1, q2
935; CHECK-BE-NEXT:    vmovlb.s16 q1, q1
936; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
937; CHECK-BE-NEXT:    vldrbt.u32 q1, [r0]
938; CHECK-BE-NEXT:    vpsel q1, q1, q0
939; CHECK-BE-NEXT:    vrev64.32 q0, q1
940; CHECK-BE-NEXT:    bx lr
941entry:
942  %c = icmp sgt <4 x i8> %a, zeroinitializer
943  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> %a)
944  %ext = zext <4 x i8> %l to <4 x i32>
945  ret <4 x i32> %ext
946}
947
948define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_zero(ptr %dest, <8 x i8> %a) {
949; CHECK-LE-LABEL: zext8_masked_v8i16_align1_zero:
950; CHECK-LE:       @ %bb.0: @ %entry
951; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
952; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
953; CHECK-LE-NEXT:    vldrbt.u16 q0, [r0]
954; CHECK-LE-NEXT:    bx lr
955;
956; CHECK-BE-LABEL: zext8_masked_v8i16_align1_zero:
957; CHECK-BE:       @ %bb.0: @ %entry
958; CHECK-BE-NEXT:    vrev64.16 q1, q0
959; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
960; CHECK-BE-NEXT:    vpt.s16 gt, q0, zr
961; CHECK-BE-NEXT:    vldrbt.u16 q1, [r0]
962; CHECK-BE-NEXT:    vrev64.16 q0, q1
963; CHECK-BE-NEXT:    bx lr
964entry:
965  %c = icmp sgt <8 x i8> %a, zeroinitializer
966  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> zeroinitializer)
967  %ext = zext <8 x i8> %l to <8 x i16>
968  ret <8 x i16> %ext
969}
970
971define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_undef(ptr %dest, <8 x i8> %a) {
972; CHECK-LE-LABEL: zext8_masked_v8i16_align1_undef:
973; CHECK-LE:       @ %bb.0: @ %entry
974; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
975; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
976; CHECK-LE-NEXT:    vldrbt.u16 q0, [r0]
977; CHECK-LE-NEXT:    bx lr
978;
979; CHECK-BE-LABEL: zext8_masked_v8i16_align1_undef:
980; CHECK-BE:       @ %bb.0: @ %entry
981; CHECK-BE-NEXT:    vrev64.16 q1, q0
982; CHECK-BE-NEXT:    vmovlb.s8 q0, q1
983; CHECK-BE-NEXT:    vpt.s16 gt, q0, zr
984; CHECK-BE-NEXT:    vldrbt.u16 q1, [r0]
985; CHECK-BE-NEXT:    vrev64.16 q0, q1
986; CHECK-BE-NEXT:    bx lr
987entry:
988  %c = icmp sgt <8 x i8> %a, zeroinitializer
989  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> undef)
990  %ext = zext <8 x i8> %l to <8 x i16>
991  ret <8 x i16> %ext
992}
993
994define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_other(ptr %dest, <8 x i8> %a) {
995; CHECK-LE-LABEL: zext8_masked_v8i16_align1_other:
996; CHECK-LE:       @ %bb.0: @ %entry
997; CHECK-LE-NEXT:    vmovlb.u8 q1, q0
998; CHECK-LE-NEXT:    vmovlb.s8 q0, q0
999; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1000; CHECK-LE-NEXT:    vldrbt.u16 q0, [r0]
1001; CHECK-LE-NEXT:    vpsel q0, q0, q1
1002; CHECK-LE-NEXT:    bx lr
1003;
1004; CHECK-BE-LABEL: zext8_masked_v8i16_align1_other:
1005; CHECK-BE:       @ %bb.0: @ %entry
1006; CHECK-BE-NEXT:    vrev64.16 q1, q0
1007; CHECK-BE-NEXT:    vmovlb.u8 q0, q1
1008; CHECK-BE-NEXT:    vmovlb.s8 q1, q1
1009; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1010; CHECK-BE-NEXT:    vldrbt.u16 q1, [r0]
1011; CHECK-BE-NEXT:    vpsel q1, q1, q0
1012; CHECK-BE-NEXT:    vrev64.16 q0, q1
1013; CHECK-BE-NEXT:    bx lr
1014entry:
1015  %c = icmp sgt <8 x i8> %a, zeroinitializer
1016  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> %a)
1017  %ext = zext <8 x i8> %l to <8 x i16>
1018  ret <8 x i16> %ext
1019}
1020
1021define ptr @masked_v8i16_preinc(ptr %x, ptr %y, <8 x i16> %a) {
1022; CHECK-LE-LABEL: masked_v8i16_preinc:
1023; CHECK-LE:       @ %bb.0: @ %entry
1024; CHECK-LE-NEXT:    vldr d1, [sp]
1025; CHECK-LE-NEXT:    vmov d0, r2, r3
1026; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1027; CHECK-LE-NEXT:    vldrht.u16 q0, [r0, #4]!
1028; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1029; CHECK-LE-NEXT:    bx lr
1030;
1031; CHECK-BE-LABEL: masked_v8i16_preinc:
1032; CHECK-BE:       @ %bb.0: @ %entry
1033; CHECK-BE-NEXT:    vldr d1, [sp]
1034; CHECK-BE-NEXT:    vmov d0, r3, r2
1035; CHECK-BE-NEXT:    vrev64.16 q1, q0
1036; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1037; CHECK-BE-NEXT:    vldrht.u16 q0, [r0, #4]!
1038; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1039; CHECK-BE-NEXT:    bx lr
1040entry:
1041  %z = getelementptr inbounds i8, ptr %x, i32 4
1042  %c = icmp sgt <8 x i16> %a, zeroinitializer
1043  %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 4, <8 x i1> %c, <8 x i16> undef)
1044  store <8 x i16> %0, ptr %y, align 4
1045  ret ptr %z
1046}
1047
1048define arm_aapcs_vfpcc ptr @masked_v8i16_postinc(ptr %x, ptr %y, <8 x i16> %a) {
1049; CHECK-LE-LABEL: masked_v8i16_postinc:
1050; CHECK-LE:       @ %bb.0: @ %entry
1051; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1052; CHECK-LE-NEXT:    vldrht.u16 q0, [r0], #4
1053; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1054; CHECK-LE-NEXT:    bx lr
1055;
1056; CHECK-BE-LABEL: masked_v8i16_postinc:
1057; CHECK-BE:       @ %bb.0: @ %entry
1058; CHECK-BE-NEXT:    vrev64.16 q1, q0
1059; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1060; CHECK-BE-NEXT:    vldrht.u16 q0, [r0], #4
1061; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1062; CHECK-BE-NEXT:    bx lr
1063entry:
1064  %z = getelementptr inbounds i8, ptr %x, i32 4
1065  %c = icmp sgt <8 x i16> %a, zeroinitializer
1066  %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %x, i32 4, <8 x i1> %c, <8 x i16> undef)
1067  store <8 x i16> %0, ptr %y, align 4
1068  ret ptr %z
1069}
1070
1071
1072define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_zero(ptr %dest, <16 x i8> %a) {
1073; CHECK-LE-LABEL: masked_v16i8_align4_zero:
1074; CHECK-LE:       @ %bb.0: @ %entry
1075; CHECK-LE-NEXT:    vpt.s8 gt, q0, zr
1076; CHECK-LE-NEXT:    vldrbt.u8 q0, [r0]
1077; CHECK-LE-NEXT:    bx lr
1078;
1079; CHECK-BE-LABEL: masked_v16i8_align4_zero:
1080; CHECK-BE:       @ %bb.0: @ %entry
1081; CHECK-BE-NEXT:    vrev64.8 q1, q0
1082; CHECK-BE-NEXT:    vpt.s8 gt, q1, zr
1083; CHECK-BE-NEXT:    vldrbt.u8 q1, [r0]
1084; CHECK-BE-NEXT:    vrev64.8 q0, q1
1085; CHECK-BE-NEXT:    bx lr
1086entry:
1087  %c = icmp sgt <16 x i8> %a, zeroinitializer
1088  %l = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %dest, i32 1, <16 x i1> %c, <16 x i8> zeroinitializer)
1089  ret <16 x i8> %l
1090}
1091
1092define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_undef(ptr %dest, <16 x i8> %a) {
1093; CHECK-LE-LABEL: masked_v16i8_align4_undef:
1094; CHECK-LE:       @ %bb.0: @ %entry
1095; CHECK-LE-NEXT:    vpt.s8 gt, q0, zr
1096; CHECK-LE-NEXT:    vldrbt.u8 q0, [r0]
1097; CHECK-LE-NEXT:    bx lr
1098;
1099; CHECK-BE-LABEL: masked_v16i8_align4_undef:
1100; CHECK-BE:       @ %bb.0: @ %entry
1101; CHECK-BE-NEXT:    vrev64.8 q1, q0
1102; CHECK-BE-NEXT:    vpt.s8 gt, q1, zr
1103; CHECK-BE-NEXT:    vldrbt.u8 q1, [r0]
1104; CHECK-BE-NEXT:    vrev64.8 q0, q1
1105; CHECK-BE-NEXT:    bx lr
1106entry:
1107  %c = icmp sgt <16 x i8> %a, zeroinitializer
1108  %l = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %dest, i32 1, <16 x i1> %c, <16 x i8> undef)
1109  ret <16 x i8> %l
1110}
1111
1112define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_other(ptr %dest, <16 x i8> %a) {
1113; CHECK-LE-LABEL: masked_v16i8_align4_other:
1114; CHECK-LE:       @ %bb.0: @ %entry
1115; CHECK-LE-NEXT:    vpt.s8 gt, q0, zr
1116; CHECK-LE-NEXT:    vldrbt.u8 q1, [r0]
1117; CHECK-LE-NEXT:    vpsel q0, q1, q0
1118; CHECK-LE-NEXT:    bx lr
1119;
1120; CHECK-BE-LABEL: masked_v16i8_align4_other:
1121; CHECK-BE:       @ %bb.0: @ %entry
1122; CHECK-BE-NEXT:    vrev64.8 q1, q0
1123; CHECK-BE-NEXT:    vpt.s8 gt, q1, zr
1124; CHECK-BE-NEXT:    vldrbt.u8 q0, [r0]
1125; CHECK-BE-NEXT:    vpsel q1, q0, q1
1126; CHECK-BE-NEXT:    vrev64.8 q0, q1
1127; CHECK-BE-NEXT:    bx lr
1128entry:
1129  %c = icmp sgt <16 x i8> %a, zeroinitializer
1130  %l = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %dest, i32 1, <16 x i1> %c, <16 x i8> %a)
1131  ret <16 x i8> %l
1132}
1133
1134define arm_aapcs_vfpcc ptr @masked_v16i8_preinc(ptr %x, ptr %y, <16 x i8> %a) {
1135; CHECK-LE-LABEL: masked_v16i8_preinc:
1136; CHECK-LE:       @ %bb.0: @ %entry
1137; CHECK-LE-NEXT:    vpt.s8 gt, q0, zr
1138; CHECK-LE-NEXT:    vldrbt.u8 q0, [r0, #4]!
1139; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1140; CHECK-LE-NEXT:    bx lr
1141;
1142; CHECK-BE-LABEL: masked_v16i8_preinc:
1143; CHECK-BE:       @ %bb.0: @ %entry
1144; CHECK-BE-NEXT:    vrev64.8 q1, q0
1145; CHECK-BE-NEXT:    vpt.s8 gt, q1, zr
1146; CHECK-BE-NEXT:    vldrbt.u8 q0, [r0, #4]!
1147; CHECK-BE-NEXT:    vstrb.8 q0, [r1]
1148; CHECK-BE-NEXT:    bx lr
1149entry:
1150  %z = getelementptr inbounds i8, ptr %x, i32 4
1151  %c = icmp sgt <16 x i8> %a, zeroinitializer
1152  %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 4, <16 x i1> %c, <16 x i8> undef)
1153  store <16 x i8> %0, ptr %y, align 4
1154  ret ptr %z
1155}
1156
1157define arm_aapcs_vfpcc ptr @masked_v16i8_postinc(ptr %x, ptr %y, <16 x i8> %a) {
1158; CHECK-LE-LABEL: masked_v16i8_postinc:
1159; CHECK-LE:       @ %bb.0: @ %entry
1160; CHECK-LE-NEXT:    vpt.s8 gt, q0, zr
1161; CHECK-LE-NEXT:    vldrbt.u8 q0, [r0], #4
1162; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1163; CHECK-LE-NEXT:    bx lr
1164;
1165; CHECK-BE-LABEL: masked_v16i8_postinc:
1166; CHECK-BE:       @ %bb.0: @ %entry
1167; CHECK-BE-NEXT:    vrev64.8 q1, q0
1168; CHECK-BE-NEXT:    vpt.s8 gt, q1, zr
1169; CHECK-BE-NEXT:    vldrbt.u8 q0, [r0], #4
1170; CHECK-BE-NEXT:    vstrb.8 q0, [r1]
1171; CHECK-BE-NEXT:    bx lr
1172entry:
1173  %z = getelementptr inbounds i8, ptr %x, i32 4
1174  %c = icmp sgt <16 x i8> %a, zeroinitializer
1175  %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %x, i32 4, <16 x i1> %c, <16 x i8> undef)
1176  store <16 x i8> %0, ptr %y, align 4
1177  ret ptr %z
1178}
1179
1180
1181define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_zero(ptr %dest, <4 x i32> %a) {
1182; CHECK-LE-LABEL: masked_v4f32_align4_zero:
1183; CHECK-LE:       @ %bb.0: @ %entry
1184; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
1185; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0]
1186; CHECK-LE-NEXT:    bx lr
1187;
1188; CHECK-BE-LABEL: masked_v4f32_align4_zero:
1189; CHECK-BE:       @ %bb.0: @ %entry
1190; CHECK-BE-NEXT:    vrev64.32 q1, q0
1191; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
1192; CHECK-BE-NEXT:    vldrwt.u32 q1, [r0]
1193; CHECK-BE-NEXT:    vrev64.32 q0, q1
1194; CHECK-BE-NEXT:    bx lr
1195entry:
1196  %c = icmp sgt <4 x i32> %a, zeroinitializer
1197  %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x float> zeroinitializer)
1198  ret <4 x float> %l
1199}
1200
1201define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_undef(ptr %dest, <4 x i32> %a) {
1202; CHECK-LE-LABEL: masked_v4f32_align4_undef:
1203; CHECK-LE:       @ %bb.0: @ %entry
1204; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
1205; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0]
1206; CHECK-LE-NEXT:    bx lr
1207;
1208; CHECK-BE-LABEL: masked_v4f32_align4_undef:
1209; CHECK-BE:       @ %bb.0: @ %entry
1210; CHECK-BE-NEXT:    vrev64.32 q1, q0
1211; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
1212; CHECK-BE-NEXT:    vldrwt.u32 q1, [r0]
1213; CHECK-BE-NEXT:    vrev64.32 q0, q1
1214; CHECK-BE-NEXT:    bx lr
1215entry:
1216  %c = icmp sgt <4 x i32> %a, zeroinitializer
1217  %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x float> undef)
1218  ret <4 x float> %l
1219}
1220
1221define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align1_undef(ptr %dest, <4 x i32> %a) {
1222; CHECK-LE-LABEL: masked_v4f32_align1_undef:
1223; CHECK-LE:       @ %bb.0: @ %entry
1224; CHECK-LE-NEXT:    .pad #4
1225; CHECK-LE-NEXT:    sub sp, #4
1226; CHECK-LE-NEXT:    vcmp.s32 gt, q0, zr
1227; CHECK-LE-NEXT:    @ implicit-def: $q0
1228; CHECK-LE-NEXT:    vmrs r2, p0
1229; CHECK-LE-NEXT:    and r1, r2, #1
1230; CHECK-LE-NEXT:    rsbs r3, r1, #0
1231; CHECK-LE-NEXT:    movs r1, #0
1232; CHECK-LE-NEXT:    bfi r1, r3, #0, #1
1233; CHECK-LE-NEXT:    ubfx r3, r2, #4, #1
1234; CHECK-LE-NEXT:    rsbs r3, r3, #0
1235; CHECK-LE-NEXT:    bfi r1, r3, #1, #1
1236; CHECK-LE-NEXT:    ubfx r3, r2, #8, #1
1237; CHECK-LE-NEXT:    ubfx r2, r2, #12, #1
1238; CHECK-LE-NEXT:    rsbs r3, r3, #0
1239; CHECK-LE-NEXT:    bfi r1, r3, #2, #1
1240; CHECK-LE-NEXT:    rsbs r2, r2, #0
1241; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
1242; CHECK-LE-NEXT:    lsls r2, r1, #31
1243; CHECK-LE-NEXT:    itt ne
1244; CHECK-LE-NEXT:    ldrne r2, [r0]
1245; CHECK-LE-NEXT:    vmovne s0, r2
1246; CHECK-LE-NEXT:    lsls r2, r1, #30
1247; CHECK-LE-NEXT:    itt mi
1248; CHECK-LE-NEXT:    ldrmi r2, [r0, #4]
1249; CHECK-LE-NEXT:    vmovmi s1, r2
1250; CHECK-LE-NEXT:    lsls r2, r1, #29
1251; CHECK-LE-NEXT:    itt mi
1252; CHECK-LE-NEXT:    ldrmi r2, [r0, #8]
1253; CHECK-LE-NEXT:    vmovmi s2, r2
1254; CHECK-LE-NEXT:    lsls r1, r1, #28
1255; CHECK-LE-NEXT:    itt mi
1256; CHECK-LE-NEXT:    ldrmi r0, [r0, #12]
1257; CHECK-LE-NEXT:    vmovmi s3, r0
1258; CHECK-LE-NEXT:    add sp, #4
1259; CHECK-LE-NEXT:    bx lr
1260;
1261; CHECK-BE-LABEL: masked_v4f32_align1_undef:
1262; CHECK-BE:       @ %bb.0: @ %entry
1263; CHECK-BE-NEXT:    .pad #4
1264; CHECK-BE-NEXT:    sub sp, #4
1265; CHECK-BE-NEXT:    vrev64.32 q1, q0
1266; CHECK-BE-NEXT:    vcmp.s32 gt, q1, zr
1267; CHECK-BE-NEXT:    @ implicit-def: $q1
1268; CHECK-BE-NEXT:    vmrs r2, p0
1269; CHECK-BE-NEXT:    ubfx r1, r2, #12, #1
1270; CHECK-BE-NEXT:    rsbs r3, r1, #0
1271; CHECK-BE-NEXT:    movs r1, #0
1272; CHECK-BE-NEXT:    bfi r1, r3, #0, #1
1273; CHECK-BE-NEXT:    ubfx r3, r2, #8, #1
1274; CHECK-BE-NEXT:    rsbs r3, r3, #0
1275; CHECK-BE-NEXT:    bfi r1, r3, #1, #1
1276; CHECK-BE-NEXT:    ubfx r3, r2, #4, #1
1277; CHECK-BE-NEXT:    and r2, r2, #1
1278; CHECK-BE-NEXT:    rsbs r3, r3, #0
1279; CHECK-BE-NEXT:    bfi r1, r3, #2, #1
1280; CHECK-BE-NEXT:    rsbs r2, r2, #0
1281; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
1282; CHECK-BE-NEXT:    lsls r2, r1, #28
1283; CHECK-BE-NEXT:    itt mi
1284; CHECK-BE-NEXT:    ldrmi r2, [r0]
1285; CHECK-BE-NEXT:    vmovmi s4, r2
1286; CHECK-BE-NEXT:    lsls r2, r1, #29
1287; CHECK-BE-NEXT:    itt mi
1288; CHECK-BE-NEXT:    ldrmi r2, [r0, #4]
1289; CHECK-BE-NEXT:    vmovmi s5, r2
1290; CHECK-BE-NEXT:    lsls r2, r1, #30
1291; CHECK-BE-NEXT:    itt mi
1292; CHECK-BE-NEXT:    ldrmi r2, [r0, #8]
1293; CHECK-BE-NEXT:    vmovmi s6, r2
1294; CHECK-BE-NEXT:    lsls r1, r1, #31
1295; CHECK-BE-NEXT:    itt ne
1296; CHECK-BE-NEXT:    ldrne r0, [r0, #12]
1297; CHECK-BE-NEXT:    vmovne s7, r0
1298; CHECK-BE-NEXT:    vrev64.32 q0, q1
1299; CHECK-BE-NEXT:    add sp, #4
1300; CHECK-BE-NEXT:    bx lr
1301entry:
1302  %c = icmp sgt <4 x i32> %a, zeroinitializer
1303  %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x float> undef)
1304  ret <4 x float> %l
1305}
1306
1307define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_other(ptr %dest, <4 x i32> %a, <4 x float> %b) {
1308; CHECK-LE-LABEL: masked_v4f32_align4_other:
1309; CHECK-LE:       @ %bb.0: @ %entry
1310; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
1311; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0]
1312; CHECK-LE-NEXT:    vpsel q0, q0, q1
1313; CHECK-LE-NEXT:    bx lr
1314;
1315; CHECK-BE-LABEL: masked_v4f32_align4_other:
1316; CHECK-BE:       @ %bb.0: @ %entry
1317; CHECK-BE-NEXT:    vrev64.32 q2, q1
1318; CHECK-BE-NEXT:    vrev64.32 q1, q0
1319; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
1320; CHECK-BE-NEXT:    vldrwt.u32 q0, [r0]
1321; CHECK-BE-NEXT:    vpsel q1, q0, q2
1322; CHECK-BE-NEXT:    vrev64.32 q0, q1
1323; CHECK-BE-NEXT:    bx lr
1324entry:
1325  %c = icmp sgt <4 x i32> %a, zeroinitializer
1326  %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x float> %b)
1327  ret <4 x float> %l
1328}
1329
1330define arm_aapcs_vfpcc ptr @masked_v4f32_preinc(ptr %x, ptr %y, <4 x i32> %a) {
1331; CHECK-LE-LABEL: masked_v4f32_preinc:
1332; CHECK-LE:       @ %bb.0: @ %entry
1333; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
1334; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0, #4]!
1335; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1336; CHECK-LE-NEXT:    bx lr
1337;
1338; CHECK-BE-LABEL: masked_v4f32_preinc:
1339; CHECK-BE:       @ %bb.0: @ %entry
1340; CHECK-BE-NEXT:    vrev64.32 q1, q0
1341; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
1342; CHECK-BE-NEXT:    vldrwt.u32 q0, [r0, #4]!
1343; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
1344; CHECK-BE-NEXT:    bx lr
1345entry:
1346  %z = getelementptr inbounds i8, ptr %x, i32 4
1347  %c = icmp sgt <4 x i32> %a, zeroinitializer
1348  %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef)
1349  store <4 x float> %0, ptr %y, align 4
1350  ret ptr %z
1351}
1352
1353define arm_aapcs_vfpcc ptr @masked_v4f32_postinc(ptr %x, ptr %y, <4 x i32> %a) {
1354; CHECK-LE-LABEL: masked_v4f32_postinc:
1355; CHECK-LE:       @ %bb.0: @ %entry
1356; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
1357; CHECK-LE-NEXT:    vldrwt.u32 q0, [r0], #4
1358; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1359; CHECK-LE-NEXT:    bx lr
1360;
1361; CHECK-BE-LABEL: masked_v4f32_postinc:
1362; CHECK-BE:       @ %bb.0: @ %entry
1363; CHECK-BE-NEXT:    vrev64.32 q1, q0
1364; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
1365; CHECK-BE-NEXT:    vldrwt.u32 q0, [r0], #4
1366; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
1367; CHECK-BE-NEXT:    bx lr
1368entry:
1369  %z = getelementptr inbounds i8, ptr %x, i32 4
1370  %c = icmp sgt <4 x i32> %a, zeroinitializer
1371  %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %x, i32 4, <4 x i1> %c, <4 x float> undef)
1372  store <4 x float> %0, ptr %y, align 4
1373  ret ptr %z
1374}
1375
1376
1377define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_zero(ptr %dest, <8 x i16> %a) {
1378; CHECK-LE-LABEL: masked_v8f16_align4_zero:
1379; CHECK-LE:       @ %bb.0: @ %entry
1380; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1381; CHECK-LE-NEXT:    vldrht.u16 q0, [r0]
1382; CHECK-LE-NEXT:    bx lr
1383;
1384; CHECK-BE-LABEL: masked_v8f16_align4_zero:
1385; CHECK-BE:       @ %bb.0: @ %entry
1386; CHECK-BE-NEXT:    vrev64.16 q1, q0
1387; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1388; CHECK-BE-NEXT:    vldrht.u16 q1, [r0]
1389; CHECK-BE-NEXT:    vrev64.16 q0, q1
1390; CHECK-BE-NEXT:    bx lr
1391entry:
1392  %c = icmp sgt <8 x i16> %a, zeroinitializer
1393  %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x half> zeroinitializer)
1394  ret <8 x half> %l
1395}
1396
1397define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_undef(ptr %dest, <8 x i16> %a) {
1398; CHECK-LE-LABEL: masked_v8f16_align4_undef:
1399; CHECK-LE:       @ %bb.0: @ %entry
1400; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1401; CHECK-LE-NEXT:    vldrht.u16 q0, [r0]
1402; CHECK-LE-NEXT:    bx lr
1403;
1404; CHECK-BE-LABEL: masked_v8f16_align4_undef:
1405; CHECK-BE:       @ %bb.0: @ %entry
1406; CHECK-BE-NEXT:    vrev64.16 q1, q0
1407; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1408; CHECK-BE-NEXT:    vldrht.u16 q1, [r0]
1409; CHECK-BE-NEXT:    vrev64.16 q0, q1
1410; CHECK-BE-NEXT:    bx lr
1411entry:
1412  %c = icmp sgt <8 x i16> %a, zeroinitializer
1413  %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x half> undef)
1414  ret <8 x half> %l
1415}
1416
1417define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align1_undef(ptr %dest, <8 x i16> %a) {
1418; CHECK-LE-LABEL: masked_v8f16_align1_undef:
1419; CHECK-LE:       @ %bb.0: @ %entry
1420; CHECK-LE-NEXT:    .pad #36
1421; CHECK-LE-NEXT:    sub sp, #36
1422; CHECK-LE-NEXT:    vcmp.s16 gt, q0, zr
1423; CHECK-LE-NEXT:    @ implicit-def: $q0
1424; CHECK-LE-NEXT:    vmrs r1, p0
1425; CHECK-LE-NEXT:    and r2, r1, #1
1426; CHECK-LE-NEXT:    rsbs r3, r2, #0
1427; CHECK-LE-NEXT:    movs r2, #0
1428; CHECK-LE-NEXT:    bfi r2, r3, #0, #1
1429; CHECK-LE-NEXT:    ubfx r3, r1, #2, #1
1430; CHECK-LE-NEXT:    rsbs r3, r3, #0
1431; CHECK-LE-NEXT:    bfi r2, r3, #1, #1
1432; CHECK-LE-NEXT:    ubfx r3, r1, #4, #1
1433; CHECK-LE-NEXT:    rsbs r3, r3, #0
1434; CHECK-LE-NEXT:    bfi r2, r3, #2, #1
1435; CHECK-LE-NEXT:    ubfx r3, r1, #6, #1
1436; CHECK-LE-NEXT:    rsbs r3, r3, #0
1437; CHECK-LE-NEXT:    bfi r2, r3, #3, #1
1438; CHECK-LE-NEXT:    ubfx r3, r1, #8, #1
1439; CHECK-LE-NEXT:    rsbs r3, r3, #0
1440; CHECK-LE-NEXT:    bfi r2, r3, #4, #1
1441; CHECK-LE-NEXT:    ubfx r3, r1, #10, #1
1442; CHECK-LE-NEXT:    rsbs r3, r3, #0
1443; CHECK-LE-NEXT:    bfi r2, r3, #5, #1
1444; CHECK-LE-NEXT:    ubfx r3, r1, #12, #1
1445; CHECK-LE-NEXT:    ubfx r1, r1, #14, #1
1446; CHECK-LE-NEXT:    rsbs r3, r3, #0
1447; CHECK-LE-NEXT:    bfi r2, r3, #6, #1
1448; CHECK-LE-NEXT:    rsbs r1, r1, #0
1449; CHECK-LE-NEXT:    bfi r2, r1, #7, #1
1450; CHECK-LE-NEXT:    uxtb r1, r2
1451; CHECK-LE-NEXT:    lsls r2, r2, #31
1452; CHECK-LE-NEXT:    bne .LBB45_9
1453; CHECK-LE-NEXT:  @ %bb.1: @ %else
1454; CHECK-LE-NEXT:    lsls r2, r1, #30
1455; CHECK-LE-NEXT:    bmi .LBB45_10
1456; CHECK-LE-NEXT:  .LBB45_2: @ %else2
1457; CHECK-LE-NEXT:    lsls r2, r1, #29
1458; CHECK-LE-NEXT:    bmi .LBB45_11
1459; CHECK-LE-NEXT:  .LBB45_3: @ %else5
1460; CHECK-LE-NEXT:    lsls r2, r1, #28
1461; CHECK-LE-NEXT:    bmi .LBB45_12
1462; CHECK-LE-NEXT:  .LBB45_4: @ %else8
1463; CHECK-LE-NEXT:    lsls r2, r1, #27
1464; CHECK-LE-NEXT:    bmi .LBB45_13
1465; CHECK-LE-NEXT:  .LBB45_5: @ %else11
1466; CHECK-LE-NEXT:    lsls r2, r1, #26
1467; CHECK-LE-NEXT:    bmi .LBB45_14
1468; CHECK-LE-NEXT:  .LBB45_6: @ %else14
1469; CHECK-LE-NEXT:    lsls r2, r1, #25
1470; CHECK-LE-NEXT:    bmi .LBB45_15
1471; CHECK-LE-NEXT:  .LBB45_7: @ %else17
1472; CHECK-LE-NEXT:    lsls r1, r1, #24
1473; CHECK-LE-NEXT:    bmi .LBB45_16
1474; CHECK-LE-NEXT:  .LBB45_8: @ %else20
1475; CHECK-LE-NEXT:    add sp, #36
1476; CHECK-LE-NEXT:    bx lr
1477; CHECK-LE-NEXT:  .LBB45_9: @ %cond.load
1478; CHECK-LE-NEXT:    ldrh r2, [r0]
1479; CHECK-LE-NEXT:    strh.w r2, [sp, #28]
1480; CHECK-LE-NEXT:    vldr.16 s0, [sp, #28]
1481; CHECK-LE-NEXT:    lsls r2, r1, #30
1482; CHECK-LE-NEXT:    bpl .LBB45_2
1483; CHECK-LE-NEXT:  .LBB45_10: @ %cond.load1
1484; CHECK-LE-NEXT:    ldrh r2, [r0, #2]
1485; CHECK-LE-NEXT:    strh.w r2, [sp, #24]
1486; CHECK-LE-NEXT:    vldr.16 s4, [sp, #24]
1487; CHECK-LE-NEXT:    vins.f16 s0, s4
1488; CHECK-LE-NEXT:    lsls r2, r1, #29
1489; CHECK-LE-NEXT:    bpl .LBB45_3
1490; CHECK-LE-NEXT:  .LBB45_11: @ %cond.load4
1491; CHECK-LE-NEXT:    ldrh r2, [r0, #4]
1492; CHECK-LE-NEXT:    strh.w r2, [sp, #20]
1493; CHECK-LE-NEXT:    vldr.16 s4, [sp, #20]
1494; CHECK-LE-NEXT:    vmov r2, s4
1495; CHECK-LE-NEXT:    vmov.16 q0[2], r2
1496; CHECK-LE-NEXT:    lsls r2, r1, #28
1497; CHECK-LE-NEXT:    bpl .LBB45_4
1498; CHECK-LE-NEXT:  .LBB45_12: @ %cond.load7
1499; CHECK-LE-NEXT:    ldrh r2, [r0, #6]
1500; CHECK-LE-NEXT:    strh.w r2, [sp, #16]
1501; CHECK-LE-NEXT:    vldr.16 s4, [sp, #16]
1502; CHECK-LE-NEXT:    vins.f16 s1, s4
1503; CHECK-LE-NEXT:    lsls r2, r1, #27
1504; CHECK-LE-NEXT:    bpl .LBB45_5
1505; CHECK-LE-NEXT:  .LBB45_13: @ %cond.load10
1506; CHECK-LE-NEXT:    ldrh r2, [r0, #8]
1507; CHECK-LE-NEXT:    strh.w r2, [sp, #12]
1508; CHECK-LE-NEXT:    vldr.16 s4, [sp, #12]
1509; CHECK-LE-NEXT:    vmov r2, s4
1510; CHECK-LE-NEXT:    vmov.16 q0[4], r2
1511; CHECK-LE-NEXT:    lsls r2, r1, #26
1512; CHECK-LE-NEXT:    bpl .LBB45_6
1513; CHECK-LE-NEXT:  .LBB45_14: @ %cond.load13
1514; CHECK-LE-NEXT:    ldrh r2, [r0, #10]
1515; CHECK-LE-NEXT:    strh.w r2, [sp, #8]
1516; CHECK-LE-NEXT:    vldr.16 s4, [sp, #8]
1517; CHECK-LE-NEXT:    vins.f16 s2, s4
1518; CHECK-LE-NEXT:    lsls r2, r1, #25
1519; CHECK-LE-NEXT:    bpl .LBB45_7
1520; CHECK-LE-NEXT:  .LBB45_15: @ %cond.load16
1521; CHECK-LE-NEXT:    ldrh r2, [r0, #12]
1522; CHECK-LE-NEXT:    strh.w r2, [sp, #4]
1523; CHECK-LE-NEXT:    vldr.16 s4, [sp, #4]
1524; CHECK-LE-NEXT:    vmov r2, s4
1525; CHECK-LE-NEXT:    vmov.16 q0[6], r2
1526; CHECK-LE-NEXT:    lsls r1, r1, #24
1527; CHECK-LE-NEXT:    bpl .LBB45_8
1528; CHECK-LE-NEXT:  .LBB45_16: @ %cond.load19
1529; CHECK-LE-NEXT:    ldrh r0, [r0, #14]
1530; CHECK-LE-NEXT:    strh.w r0, [sp]
1531; CHECK-LE-NEXT:    vldr.16 s4, [sp]
1532; CHECK-LE-NEXT:    vins.f16 s3, s4
1533; CHECK-LE-NEXT:    add sp, #36
1534; CHECK-LE-NEXT:    bx lr
1535;
1536; CHECK-BE-LABEL: masked_v8f16_align1_undef:
1537; CHECK-BE:       @ %bb.0: @ %entry
1538; CHECK-BE-NEXT:    .pad #36
1539; CHECK-BE-NEXT:    sub sp, #36
1540; CHECK-BE-NEXT:    vrev64.16 q1, q0
1541; CHECK-BE-NEXT:    vcmp.s16 gt, q1, zr
1542; CHECK-BE-NEXT:    @ implicit-def: $q1
1543; CHECK-BE-NEXT:    vmrs r1, p0
1544; CHECK-BE-NEXT:    ubfx r2, r1, #14, #1
1545; CHECK-BE-NEXT:    rsbs r3, r2, #0
1546; CHECK-BE-NEXT:    movs r2, #0
1547; CHECK-BE-NEXT:    bfi r2, r3, #0, #1
1548; CHECK-BE-NEXT:    ubfx r3, r1, #12, #1
1549; CHECK-BE-NEXT:    rsbs r3, r3, #0
1550; CHECK-BE-NEXT:    bfi r2, r3, #1, #1
1551; CHECK-BE-NEXT:    ubfx r3, r1, #10, #1
1552; CHECK-BE-NEXT:    rsbs r3, r3, #0
1553; CHECK-BE-NEXT:    bfi r2, r3, #2, #1
1554; CHECK-BE-NEXT:    ubfx r3, r1, #8, #1
1555; CHECK-BE-NEXT:    rsbs r3, r3, #0
1556; CHECK-BE-NEXT:    bfi r2, r3, #3, #1
1557; CHECK-BE-NEXT:    ubfx r3, r1, #6, #1
1558; CHECK-BE-NEXT:    rsbs r3, r3, #0
1559; CHECK-BE-NEXT:    bfi r2, r3, #4, #1
1560; CHECK-BE-NEXT:    ubfx r3, r1, #4, #1
1561; CHECK-BE-NEXT:    rsbs r3, r3, #0
1562; CHECK-BE-NEXT:    bfi r2, r3, #5, #1
1563; CHECK-BE-NEXT:    ubfx r3, r1, #2, #1
1564; CHECK-BE-NEXT:    and r1, r1, #1
1565; CHECK-BE-NEXT:    rsbs r3, r3, #0
1566; CHECK-BE-NEXT:    bfi r2, r3, #6, #1
1567; CHECK-BE-NEXT:    rsbs r1, r1, #0
1568; CHECK-BE-NEXT:    bfi r2, r1, #7, #1
1569; CHECK-BE-NEXT:    uxtb r1, r2
1570; CHECK-BE-NEXT:    lsls r2, r2, #24
1571; CHECK-BE-NEXT:    bmi .LBB45_10
1572; CHECK-BE-NEXT:  @ %bb.1: @ %else
1573; CHECK-BE-NEXT:    lsls r2, r1, #25
1574; CHECK-BE-NEXT:    bmi .LBB45_11
1575; CHECK-BE-NEXT:  .LBB45_2: @ %else2
1576; CHECK-BE-NEXT:    lsls r2, r1, #26
1577; CHECK-BE-NEXT:    bmi .LBB45_12
1578; CHECK-BE-NEXT:  .LBB45_3: @ %else5
1579; CHECK-BE-NEXT:    lsls r2, r1, #27
1580; CHECK-BE-NEXT:    bmi .LBB45_13
1581; CHECK-BE-NEXT:  .LBB45_4: @ %else8
1582; CHECK-BE-NEXT:    lsls r2, r1, #28
1583; CHECK-BE-NEXT:    bmi .LBB45_14
1584; CHECK-BE-NEXT:  .LBB45_5: @ %else11
1585; CHECK-BE-NEXT:    lsls r2, r1, #29
1586; CHECK-BE-NEXT:    bmi .LBB45_15
1587; CHECK-BE-NEXT:  .LBB45_6: @ %else14
1588; CHECK-BE-NEXT:    lsls r2, r1, #30
1589; CHECK-BE-NEXT:    bmi .LBB45_16
1590; CHECK-BE-NEXT:  .LBB45_7: @ %else17
1591; CHECK-BE-NEXT:    lsls r1, r1, #31
1592; CHECK-BE-NEXT:    beq .LBB45_9
1593; CHECK-BE-NEXT:  .LBB45_8: @ %cond.load19
1594; CHECK-BE-NEXT:    ldrh r0, [r0, #14]
1595; CHECK-BE-NEXT:    strh.w r0, [sp]
1596; CHECK-BE-NEXT:    vldr.16 s0, [sp]
1597; CHECK-BE-NEXT:    vins.f16 s7, s0
1598; CHECK-BE-NEXT:  .LBB45_9: @ %else20
1599; CHECK-BE-NEXT:    vrev64.16 q0, q1
1600; CHECK-BE-NEXT:    add sp, #36
1601; CHECK-BE-NEXT:    bx lr
1602; CHECK-BE-NEXT:  .LBB45_10: @ %cond.load
1603; CHECK-BE-NEXT:    ldrh r2, [r0]
1604; CHECK-BE-NEXT:    strh.w r2, [sp, #28]
1605; CHECK-BE-NEXT:    vldr.16 s4, [sp, #28]
1606; CHECK-BE-NEXT:    lsls r2, r1, #25
1607; CHECK-BE-NEXT:    bpl .LBB45_2
1608; CHECK-BE-NEXT:  .LBB45_11: @ %cond.load1
1609; CHECK-BE-NEXT:    ldrh r2, [r0, #2]
1610; CHECK-BE-NEXT:    strh.w r2, [sp, #24]
1611; CHECK-BE-NEXT:    vldr.16 s0, [sp, #24]
1612; CHECK-BE-NEXT:    vins.f16 s4, s0
1613; CHECK-BE-NEXT:    lsls r2, r1, #26
1614; CHECK-BE-NEXT:    bpl .LBB45_3
1615; CHECK-BE-NEXT:  .LBB45_12: @ %cond.load4
1616; CHECK-BE-NEXT:    ldrh r2, [r0, #4]
1617; CHECK-BE-NEXT:    strh.w r2, [sp, #20]
1618; CHECK-BE-NEXT:    vldr.16 s0, [sp, #20]
1619; CHECK-BE-NEXT:    vmov r2, s0
1620; CHECK-BE-NEXT:    vmov.16 q1[2], r2
1621; CHECK-BE-NEXT:    lsls r2, r1, #27
1622; CHECK-BE-NEXT:    bpl .LBB45_4
1623; CHECK-BE-NEXT:  .LBB45_13: @ %cond.load7
1624; CHECK-BE-NEXT:    ldrh r2, [r0, #6]
1625; CHECK-BE-NEXT:    strh.w r2, [sp, #16]
1626; CHECK-BE-NEXT:    vldr.16 s0, [sp, #16]
1627; CHECK-BE-NEXT:    vins.f16 s5, s0
1628; CHECK-BE-NEXT:    lsls r2, r1, #28
1629; CHECK-BE-NEXT:    bpl .LBB45_5
1630; CHECK-BE-NEXT:  .LBB45_14: @ %cond.load10
1631; CHECK-BE-NEXT:    ldrh r2, [r0, #8]
1632; CHECK-BE-NEXT:    strh.w r2, [sp, #12]
1633; CHECK-BE-NEXT:    vldr.16 s0, [sp, #12]
1634; CHECK-BE-NEXT:    vmov r2, s0
1635; CHECK-BE-NEXT:    vmov.16 q1[4], r2
1636; CHECK-BE-NEXT:    lsls r2, r1, #29
1637; CHECK-BE-NEXT:    bpl .LBB45_6
1638; CHECK-BE-NEXT:  .LBB45_15: @ %cond.load13
1639; CHECK-BE-NEXT:    ldrh r2, [r0, #10]
1640; CHECK-BE-NEXT:    strh.w r2, [sp, #8]
1641; CHECK-BE-NEXT:    vldr.16 s0, [sp, #8]
1642; CHECK-BE-NEXT:    vins.f16 s6, s0
1643; CHECK-BE-NEXT:    lsls r2, r1, #30
1644; CHECK-BE-NEXT:    bpl .LBB45_7
1645; CHECK-BE-NEXT:  .LBB45_16: @ %cond.load16
1646; CHECK-BE-NEXT:    ldrh r2, [r0, #12]
1647; CHECK-BE-NEXT:    strh.w r2, [sp, #4]
1648; CHECK-BE-NEXT:    vldr.16 s0, [sp, #4]
1649; CHECK-BE-NEXT:    vmov r2, s0
1650; CHECK-BE-NEXT:    vmov.16 q1[6], r2
1651; CHECK-BE-NEXT:    lsls r1, r1, #31
1652; CHECK-BE-NEXT:    bne .LBB45_8
1653; CHECK-BE-NEXT:    b .LBB45_9
1654entry:
1655  %c = icmp sgt <8 x i16> %a, zeroinitializer
1656  %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x half> undef)
1657  ret <8 x half> %l
1658}
1659
1660define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_other(ptr %dest, <8 x i16> %a, <8 x half> %b) {
1661; CHECK-LE-LABEL: masked_v8f16_align4_other:
1662; CHECK-LE:       @ %bb.0: @ %entry
1663; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1664; CHECK-LE-NEXT:    vldrht.u16 q0, [r0]
1665; CHECK-LE-NEXT:    vpsel q0, q0, q1
1666; CHECK-LE-NEXT:    bx lr
1667;
1668; CHECK-BE-LABEL: masked_v8f16_align4_other:
1669; CHECK-BE:       @ %bb.0: @ %entry
1670; CHECK-BE-NEXT:    vrev64.16 q2, q1
1671; CHECK-BE-NEXT:    vrev64.16 q1, q0
1672; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1673; CHECK-BE-NEXT:    vldrht.u16 q0, [r0]
1674; CHECK-BE-NEXT:    vpsel q1, q0, q2
1675; CHECK-BE-NEXT:    vrev64.16 q0, q1
1676; CHECK-BE-NEXT:    bx lr
1677entry:
1678  %c = icmp sgt <8 x i16> %a, zeroinitializer
1679  %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x half> %b)
1680  ret <8 x half> %l
1681}
1682
1683define arm_aapcs_vfpcc ptr @masked_v8f16_preinc(ptr %x, ptr %y, <8 x i16> %a) {
1684; CHECK-LE-LABEL: masked_v8f16_preinc:
1685; CHECK-LE:       @ %bb.0: @ %entry
1686; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1687; CHECK-LE-NEXT:    vldrht.u16 q0, [r0, #4]!
1688; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1689; CHECK-LE-NEXT:    bx lr
1690;
1691; CHECK-BE-LABEL: masked_v8f16_preinc:
1692; CHECK-BE:       @ %bb.0: @ %entry
1693; CHECK-BE-NEXT:    vrev64.16 q1, q0
1694; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1695; CHECK-BE-NEXT:    vldrht.u16 q0, [r0, #4]!
1696; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1697; CHECK-BE-NEXT:    bx lr
1698entry:
1699  %z = getelementptr inbounds i8, ptr %x, i32 4
1700  %c = icmp sgt <8 x i16> %a, zeroinitializer
1701  %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 4, <8 x i1> %c, <8 x half> undef)
1702  store <8 x half> %0, ptr %y, align 4
1703  ret ptr %z
1704}
1705
1706define arm_aapcs_vfpcc ptr @masked_v8f16_postinc(ptr %x, ptr %y, <8 x i16> %a) {
1707; CHECK-LE-LABEL: masked_v8f16_postinc:
1708; CHECK-LE:       @ %bb.0: @ %entry
1709; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
1710; CHECK-LE-NEXT:    vldrht.u16 q0, [r0], #4
1711; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1712; CHECK-LE-NEXT:    bx lr
1713;
1714; CHECK-BE-LABEL: masked_v8f16_postinc:
1715; CHECK-BE:       @ %bb.0: @ %entry
1716; CHECK-BE-NEXT:    vrev64.16 q1, q0
1717; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
1718; CHECK-BE-NEXT:    vldrht.u16 q0, [r0], #4
1719; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1720; CHECK-BE-NEXT:    bx lr
1721entry:
1722  %z = getelementptr inbounds i8, ptr %x, i32 4
1723  %c = icmp sgt <8 x i16> %a, zeroinitializer
1724  %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %x, i32 4, <8 x i1> %c, <8 x half> undef)
1725  store <8 x half> %0, ptr %y, align 4
1726  ret ptr %z
1727}
1728
1729
1730define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(ptr %dest, <2 x i64> %a) {
1731; CHECK-LE-LABEL: masked_v2i64_align4_zero:
1732; CHECK-LE:       @ %bb.0: @ %entry
1733; CHECK-LE-NEXT:    .save {r7, lr}
1734; CHECK-LE-NEXT:    push {r7, lr}
1735; CHECK-LE-NEXT:    .pad #4
1736; CHECK-LE-NEXT:    sub sp, #4
1737; CHECK-LE-NEXT:    vmov r2, r3, d0
1738; CHECK-LE-NEXT:    movs r1, #0
1739; CHECK-LE-NEXT:    vmov r12, lr, d1
1740; CHECK-LE-NEXT:    rsbs r2, r2, #0
1741; CHECK-LE-NEXT:    sbcs.w r2, r1, r3
1742; CHECK-LE-NEXT:    csetm r2, lt
1743; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
1744; CHECK-LE-NEXT:    sbcs.w r3, r1, lr
1745; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
1746; CHECK-LE-NEXT:    csetm r2, lt
1747; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
1748; CHECK-LE-NEXT:    lsls r2, r1, #31
1749; CHECK-LE-NEXT:    beq .LBB49_2
1750; CHECK-LE-NEXT:  @ %bb.1: @ %cond.load
1751; CHECK-LE-NEXT:    vldr d1, .LCPI49_0
1752; CHECK-LE-NEXT:    vldr d0, [r0]
1753; CHECK-LE-NEXT:    b .LBB49_3
1754; CHECK-LE-NEXT:  .LBB49_2:
1755; CHECK-LE-NEXT:    vmov.i32 q0, #0x0
1756; CHECK-LE-NEXT:  .LBB49_3: @ %else
1757; CHECK-LE-NEXT:    lsls r1, r1, #30
1758; CHECK-LE-NEXT:    it mi
1759; CHECK-LE-NEXT:    vldrmi d1, [r0, #8]
1760; CHECK-LE-NEXT:    add sp, #4
1761; CHECK-LE-NEXT:    pop {r7, pc}
1762; CHECK-LE-NEXT:    .p2align 3
1763; CHECK-LE-NEXT:  @ %bb.4:
1764; CHECK-LE-NEXT:  .LCPI49_0:
1765; CHECK-LE-NEXT:    .long 0 @ double 0
1766; CHECK-LE-NEXT:    .long 0
1767;
1768; CHECK-BE-LABEL: masked_v2i64_align4_zero:
1769; CHECK-BE:       @ %bb.0: @ %entry
1770; CHECK-BE-NEXT:    .save {r7, lr}
1771; CHECK-BE-NEXT:    push {r7, lr}
1772; CHECK-BE-NEXT:    .pad #4
1773; CHECK-BE-NEXT:    sub sp, #4
1774; CHECK-BE-NEXT:    vrev64.32 q1, q0
1775; CHECK-BE-NEXT:    movs r1, #0
1776; CHECK-BE-NEXT:    vmov r2, r3, d3
1777; CHECK-BE-NEXT:    vmov r12, lr, d2
1778; CHECK-BE-NEXT:    rsbs r3, r3, #0
1779; CHECK-BE-NEXT:    sbcs.w r2, r1, r2
1780; CHECK-BE-NEXT:    csetm r2, lt
1781; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
1782; CHECK-BE-NEXT:    sbcs.w r3, r1, r12
1783; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
1784; CHECK-BE-NEXT:    csetm r2, lt
1785; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
1786; CHECK-BE-NEXT:    lsls r2, r1, #30
1787; CHECK-BE-NEXT:    bpl .LBB49_2
1788; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
1789; CHECK-BE-NEXT:    vldr d1, .LCPI49_0
1790; CHECK-BE-NEXT:    vldr d0, [r0]
1791; CHECK-BE-NEXT:    b .LBB49_3
1792; CHECK-BE-NEXT:  .LBB49_2:
1793; CHECK-BE-NEXT:    vmov.i32 q0, #0x0
1794; CHECK-BE-NEXT:  .LBB49_3: @ %else
1795; CHECK-BE-NEXT:    lsls r1, r1, #31
1796; CHECK-BE-NEXT:    it ne
1797; CHECK-BE-NEXT:    vldrne d1, [r0, #8]
1798; CHECK-BE-NEXT:    add sp, #4
1799; CHECK-BE-NEXT:    pop {r7, pc}
1800; CHECK-BE-NEXT:    .p2align 3
1801; CHECK-BE-NEXT:  @ %bb.4:
1802; CHECK-BE-NEXT:  .LCPI49_0:
1803; CHECK-BE-NEXT:    .long 0 @ double 0
1804; CHECK-BE-NEXT:    .long 0
1805entry:
1806  %c = icmp sgt <2 x i64> %a, zeroinitializer
1807  %l = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %dest, i32 8, <2 x i1> %c, <2 x i64> zeroinitializer)
1808  ret <2 x i64> %l
1809}
1810
1811define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(ptr %dest, <2 x double> %a, <2 x i64> %b) {
1812; CHECK-LE-LABEL: masked_v2f64_align4_zero:
1813; CHECK-LE:       @ %bb.0: @ %entry
1814; CHECK-LE-NEXT:    .save {r7, lr}
1815; CHECK-LE-NEXT:    push {r7, lr}
1816; CHECK-LE-NEXT:    .pad #4
1817; CHECK-LE-NEXT:    sub sp, #4
1818; CHECK-LE-NEXT:    vmov r2, r3, d2
1819; CHECK-LE-NEXT:    movs r1, #0
1820; CHECK-LE-NEXT:    vmov r12, lr, d3
1821; CHECK-LE-NEXT:    rsbs r2, r2, #0
1822; CHECK-LE-NEXT:    sbcs.w r2, r1, r3
1823; CHECK-LE-NEXT:    csetm r2, lt
1824; CHECK-LE-NEXT:    rsbs.w r3, r12, #0
1825; CHECK-LE-NEXT:    sbcs.w r3, r1, lr
1826; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
1827; CHECK-LE-NEXT:    csetm r2, lt
1828; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
1829; CHECK-LE-NEXT:    lsls r2, r1, #31
1830; CHECK-LE-NEXT:    beq .LBB50_2
1831; CHECK-LE-NEXT:  @ %bb.1: @ %cond.load
1832; CHECK-LE-NEXT:    vldr d1, .LCPI50_0
1833; CHECK-LE-NEXT:    vldr d0, [r0]
1834; CHECK-LE-NEXT:    b .LBB50_3
1835; CHECK-LE-NEXT:  .LBB50_2:
1836; CHECK-LE-NEXT:    vmov.i32 q0, #0x0
1837; CHECK-LE-NEXT:  .LBB50_3: @ %else
1838; CHECK-LE-NEXT:    lsls r1, r1, #30
1839; CHECK-LE-NEXT:    it mi
1840; CHECK-LE-NEXT:    vldrmi d1, [r0, #8]
1841; CHECK-LE-NEXT:    add sp, #4
1842; CHECK-LE-NEXT:    pop {r7, pc}
1843; CHECK-LE-NEXT:    .p2align 3
1844; CHECK-LE-NEXT:  @ %bb.4:
1845; CHECK-LE-NEXT:  .LCPI50_0:
1846; CHECK-LE-NEXT:    .long 0 @ double 0
1847; CHECK-LE-NEXT:    .long 0
1848;
1849; CHECK-BE-LABEL: masked_v2f64_align4_zero:
1850; CHECK-BE:       @ %bb.0: @ %entry
1851; CHECK-BE-NEXT:    .save {r7, lr}
1852; CHECK-BE-NEXT:    push {r7, lr}
1853; CHECK-BE-NEXT:    .pad #4
1854; CHECK-BE-NEXT:    sub sp, #4
1855; CHECK-BE-NEXT:    vrev64.32 q0, q1
1856; CHECK-BE-NEXT:    movs r1, #0
1857; CHECK-BE-NEXT:    vmov r2, r3, d1
1858; CHECK-BE-NEXT:    vmov r12, lr, d0
1859; CHECK-BE-NEXT:    rsbs r3, r3, #0
1860; CHECK-BE-NEXT:    sbcs.w r2, r1, r2
1861; CHECK-BE-NEXT:    csetm r2, lt
1862; CHECK-BE-NEXT:    rsbs.w r3, lr, #0
1863; CHECK-BE-NEXT:    sbcs.w r3, r1, r12
1864; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
1865; CHECK-BE-NEXT:    csetm r2, lt
1866; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
1867; CHECK-BE-NEXT:    lsls r2, r1, #30
1868; CHECK-BE-NEXT:    bpl .LBB50_2
1869; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
1870; CHECK-BE-NEXT:    vldr d1, .LCPI50_0
1871; CHECK-BE-NEXT:    vldr d0, [r0]
1872; CHECK-BE-NEXT:    b .LBB50_3
1873; CHECK-BE-NEXT:  .LBB50_2:
1874; CHECK-BE-NEXT:    vmov.i32 q0, #0x0
1875; CHECK-BE-NEXT:  .LBB50_3: @ %else
1876; CHECK-BE-NEXT:    lsls r1, r1, #31
1877; CHECK-BE-NEXT:    it ne
1878; CHECK-BE-NEXT:    vldrne d1, [r0, #8]
1879; CHECK-BE-NEXT:    add sp, #4
1880; CHECK-BE-NEXT:    pop {r7, pc}
1881; CHECK-BE-NEXT:    .p2align 3
1882; CHECK-BE-NEXT:  @ %bb.4:
1883; CHECK-BE-NEXT:  .LCPI50_0:
1884; CHECK-BE-NEXT:    .long 0 @ double 0
1885; CHECK-BE-NEXT:    .long 0
1886entry:
1887  %c = icmp sgt <2 x i64> %b, zeroinitializer
1888  %l = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %dest, i32 8, <2 x i1> %c, <2 x double> zeroinitializer)
1889  ret <2 x double> %l
1890}
1891
1892define arm_aapcs_vfpcc <4 x i16> @anyext_v4i16(ptr %dest, <4 x i32> %a) {
1893; CHECK-LE-LABEL: anyext_v4i16:
1894; CHECK-LE:       @ %bb.0: @ %entry
1895; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
1896; CHECK-LE-NEXT:    vldrht.u32 q0, [r0]
1897; CHECK-LE-NEXT:    bx lr
1898;
1899; CHECK-BE-LABEL: anyext_v4i16:
1900; CHECK-BE:       @ %bb.0: @ %entry
1901; CHECK-BE-NEXT:    vrev64.32 q1, q0
1902; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
1903; CHECK-BE-NEXT:    vldrht.u32 q1, [r0]
1904; CHECK-BE-NEXT:    vrev64.32 q0, q1
1905; CHECK-BE-NEXT:    bx lr
1906entry:
1907  %c = icmp sgt <4 x i32> %a, zeroinitializer
1908  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer)
1909  ret <4 x i16> %l
1910}
1911
1912define arm_aapcs_vfpcc <4 x i16> @anyext_v4i16_align1(ptr %dest, <4 x i32> %a) {
1913; CHECK-LE-LABEL: anyext_v4i16_align1:
1914; CHECK-LE:       @ %bb.0: @ %entry
1915; CHECK-LE-NEXT:    .pad #4
1916; CHECK-LE-NEXT:    sub sp, #4
1917; CHECK-LE-NEXT:    vcmp.s32 gt, q0, zr
1918; CHECK-LE-NEXT:    mov.w r12, #0
1919; CHECK-LE-NEXT:    vmrs r3, p0
1920; CHECK-LE-NEXT:    and r1, r3, #1
1921; CHECK-LE-NEXT:    rsbs r2, r1, #0
1922; CHECK-LE-NEXT:    movs r1, #0
1923; CHECK-LE-NEXT:    bfi r1, r2, #0, #1
1924; CHECK-LE-NEXT:    ubfx r2, r3, #4, #1
1925; CHECK-LE-NEXT:    rsbs r2, r2, #0
1926; CHECK-LE-NEXT:    bfi r1, r2, #1, #1
1927; CHECK-LE-NEXT:    ubfx r2, r3, #8, #1
1928; CHECK-LE-NEXT:    rsbs r2, r2, #0
1929; CHECK-LE-NEXT:    bfi r1, r2, #2, #1
1930; CHECK-LE-NEXT:    ubfx r2, r3, #12, #1
1931; CHECK-LE-NEXT:    rsbs r2, r2, #0
1932; CHECK-LE-NEXT:    bfi r1, r2, #3, #1
1933; CHECK-LE-NEXT:    lsls r2, r1, #31
1934; CHECK-LE-NEXT:    beq .LBB52_2
1935; CHECK-LE-NEXT:  @ %bb.1: @ %cond.load
1936; CHECK-LE-NEXT:    ldrh r2, [r0]
1937; CHECK-LE-NEXT:    vdup.32 q0, r12
1938; CHECK-LE-NEXT:    vmov.32 q0[0], r2
1939; CHECK-LE-NEXT:    b .LBB52_3
1940; CHECK-LE-NEXT:  .LBB52_2:
1941; CHECK-LE-NEXT:    vmov.i32 q0, #0x0
1942; CHECK-LE-NEXT:  .LBB52_3: @ %else
1943; CHECK-LE-NEXT:    lsls r2, r1, #30
1944; CHECK-LE-NEXT:    itt mi
1945; CHECK-LE-NEXT:    ldrhmi r2, [r0, #2]
1946; CHECK-LE-NEXT:    vmovmi.32 q0[1], r2
1947; CHECK-LE-NEXT:    lsls r2, r1, #29
1948; CHECK-LE-NEXT:    itt mi
1949; CHECK-LE-NEXT:    ldrhmi r2, [r0, #4]
1950; CHECK-LE-NEXT:    vmovmi.32 q0[2], r2
1951; CHECK-LE-NEXT:    lsls r1, r1, #28
1952; CHECK-LE-NEXT:    itt mi
1953; CHECK-LE-NEXT:    ldrhmi r0, [r0, #6]
1954; CHECK-LE-NEXT:    vmovmi.32 q0[3], r0
1955; CHECK-LE-NEXT:    add sp, #4
1956; CHECK-LE-NEXT:    bx lr
1957;
1958; CHECK-BE-LABEL: anyext_v4i16_align1:
1959; CHECK-BE:       @ %bb.0: @ %entry
1960; CHECK-BE-NEXT:    .pad #4
1961; CHECK-BE-NEXT:    sub sp, #4
1962; CHECK-BE-NEXT:    vrev64.32 q1, q0
1963; CHECK-BE-NEXT:    mov.w r12, #0
1964; CHECK-BE-NEXT:    vcmp.s32 gt, q1, zr
1965; CHECK-BE-NEXT:    vmrs r3, p0
1966; CHECK-BE-NEXT:    ubfx r1, r3, #12, #1
1967; CHECK-BE-NEXT:    rsbs r2, r1, #0
1968; CHECK-BE-NEXT:    movs r1, #0
1969; CHECK-BE-NEXT:    bfi r1, r2, #0, #1
1970; CHECK-BE-NEXT:    ubfx r2, r3, #8, #1
1971; CHECK-BE-NEXT:    rsbs r2, r2, #0
1972; CHECK-BE-NEXT:    bfi r1, r2, #1, #1
1973; CHECK-BE-NEXT:    ubfx r2, r3, #4, #1
1974; CHECK-BE-NEXT:    rsbs r2, r2, #0
1975; CHECK-BE-NEXT:    bfi r1, r2, #2, #1
1976; CHECK-BE-NEXT:    and r2, r3, #1
1977; CHECK-BE-NEXT:    rsbs r2, r2, #0
1978; CHECK-BE-NEXT:    bfi r1, r2, #3, #1
1979; CHECK-BE-NEXT:    lsls r2, r1, #28
1980; CHECK-BE-NEXT:    bpl .LBB52_2
1981; CHECK-BE-NEXT:  @ %bb.1: @ %cond.load
1982; CHECK-BE-NEXT:    ldrh r2, [r0]
1983; CHECK-BE-NEXT:    vdup.32 q1, r12
1984; CHECK-BE-NEXT:    vmov.32 q1[0], r2
1985; CHECK-BE-NEXT:    b .LBB52_3
1986; CHECK-BE-NEXT:  .LBB52_2:
1987; CHECK-BE-NEXT:    vmov.i32 q1, #0x0
1988; CHECK-BE-NEXT:  .LBB52_3: @ %else
1989; CHECK-BE-NEXT:    lsls r2, r1, #29
1990; CHECK-BE-NEXT:    itt mi
1991; CHECK-BE-NEXT:    ldrhmi r2, [r0, #2]
1992; CHECK-BE-NEXT:    vmovmi.32 q1[1], r2
1993; CHECK-BE-NEXT:    lsls r2, r1, #30
1994; CHECK-BE-NEXT:    itt mi
1995; CHECK-BE-NEXT:    ldrhmi r2, [r0, #4]
1996; CHECK-BE-NEXT:    vmovmi.32 q1[2], r2
1997; CHECK-BE-NEXT:    lsls r1, r1, #31
1998; CHECK-BE-NEXT:    itt ne
1999; CHECK-BE-NEXT:    ldrhne r0, [r0, #6]
2000; CHECK-BE-NEXT:    vmovne.32 q1[3], r0
2001; CHECK-BE-NEXT:    vrev64.32 q0, q1
2002; CHECK-BE-NEXT:    add sp, #4
2003; CHECK-BE-NEXT:    bx lr
2004entry:
2005  %c = icmp sgt <4 x i32> %a, zeroinitializer
2006  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i16> zeroinitializer)
2007  ret <4 x i16> %l
2008}
2009
2010define arm_aapcs_vfpcc <4 x i8> @anyext_v4i8(ptr %dest, <4 x i32> %a) {
2011; CHECK-LE-LABEL: anyext_v4i8:
2012; CHECK-LE:       @ %bb.0: @ %entry
2013; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
2014; CHECK-LE-NEXT:    vldrbt.u32 q0, [r0]
2015; CHECK-LE-NEXT:    bx lr
2016;
2017; CHECK-BE-LABEL: anyext_v4i8:
2018; CHECK-BE:       @ %bb.0: @ %entry
2019; CHECK-BE-NEXT:    vrev64.32 q1, q0
2020; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
2021; CHECK-BE-NEXT:    vldrbt.u32 q1, [r0]
2022; CHECK-BE-NEXT:    vrev64.32 q0, q1
2023; CHECK-BE-NEXT:    bx lr
2024entry:
2025  %c = icmp sgt <4 x i32> %a, zeroinitializer
2026  %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> zeroinitializer)
2027  ret <4 x i8> %l
2028}
2029
2030define arm_aapcs_vfpcc <8 x i8> @anyext_v8i8(ptr %dest, <8 x i16> %a) {
2031; CHECK-LE-LABEL: anyext_v8i8:
2032; CHECK-LE:       @ %bb.0: @ %entry
2033; CHECK-LE-NEXT:    vpt.s16 gt, q0, zr
2034; CHECK-LE-NEXT:    vldrbt.u16 q0, [r0]
2035; CHECK-LE-NEXT:    bx lr
2036;
2037; CHECK-BE-LABEL: anyext_v8i8:
2038; CHECK-BE:       @ %bb.0: @ %entry
2039; CHECK-BE-NEXT:    vrev64.16 q1, q0
2040; CHECK-BE-NEXT:    vpt.s16 gt, q1, zr
2041; CHECK-BE-NEXT:    vldrbt.u16 q1, [r0]
2042; CHECK-BE-NEXT:    vrev64.16 q0, q1
2043; CHECK-BE-NEXT:    bx lr
2044entry:
2045  %c = icmp sgt <8 x i16> %a, zeroinitializer
2046  %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> zeroinitializer)
2047  ret <8 x i8> %l
2048}
2049
2050define arm_aapcs_vfpcc <4 x i32> @multi_user_zext(ptr %dest, <4 x i32> %a) {
2051; CHECK-LE-LABEL: multi_user_zext:
2052; CHECK-LE:       @ %bb.0: @ %entry
2053; CHECK-LE-NEXT:    .save {r7, lr}
2054; CHECK-LE-NEXT:    push {r7, lr}
2055; CHECK-LE-NEXT:    .vsave {d8, d9}
2056; CHECK-LE-NEXT:    vpush {d8, d9}
2057; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
2058; CHECK-LE-NEXT:    vldrht.u32 q4, [r0]
2059; CHECK-LE-NEXT:    vmov r0, r1, d8
2060; CHECK-LE-NEXT:    vmov r2, r3, d9
2061; CHECK-LE-NEXT:    bl foo
2062; CHECK-LE-NEXT:    vmov q0, q4
2063; CHECK-LE-NEXT:    vpop {d8, d9}
2064; CHECK-LE-NEXT:    pop {r7, pc}
2065;
2066; CHECK-BE-LABEL: multi_user_zext:
2067; CHECK-BE:       @ %bb.0: @ %entry
2068; CHECK-BE-NEXT:    .save {r7, lr}
2069; CHECK-BE-NEXT:    push {r7, lr}
2070; CHECK-BE-NEXT:    .vsave {d8, d9}
2071; CHECK-BE-NEXT:    vpush {d8, d9}
2072; CHECK-BE-NEXT:    vrev64.32 q1, q0
2073; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
2074; CHECK-BE-NEXT:    vldrht.u32 q0, [r0]
2075; CHECK-BE-NEXT:    vrev64.32 q4, q0
2076; CHECK-BE-NEXT:    vmov r1, r0, d8
2077; CHECK-BE-NEXT:    vmov r3, r2, d9
2078; CHECK-BE-NEXT:    bl foo
2079; CHECK-BE-NEXT:    vmov q0, q4
2080; CHECK-BE-NEXT:    vpop {d8, d9}
2081; CHECK-BE-NEXT:    pop {r7, pc}
2082entry:
2083  %c = icmp sgt <4 x i32> %a, zeroinitializer
2084  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer)
2085  call void @foo(<4 x i16> %l)
2086  %ext = zext <4 x i16> %l to <4 x i32>
2087  ret <4 x i32> %ext
2088}
2089
2090define arm_aapcs_vfpcc <4 x i32> @multi_user_sext(ptr %dest, <4 x i32> %a) {
2091; CHECK-LE-LABEL: multi_user_sext:
2092; CHECK-LE:       @ %bb.0: @ %entry
2093; CHECK-LE-NEXT:    .save {r7, lr}
2094; CHECK-LE-NEXT:    push {r7, lr}
2095; CHECK-LE-NEXT:    .vsave {d8, d9}
2096; CHECK-LE-NEXT:    vpush {d8, d9}
2097; CHECK-LE-NEXT:    vpt.s32 gt, q0, zr
2098; CHECK-LE-NEXT:    vldrht.u32 q4, [r0]
2099; CHECK-LE-NEXT:    vmov r0, r1, d8
2100; CHECK-LE-NEXT:    vmov r2, r3, d9
2101; CHECK-LE-NEXT:    bl foo
2102; CHECK-LE-NEXT:    vmovlb.s16 q0, q4
2103; CHECK-LE-NEXT:    vpop {d8, d9}
2104; CHECK-LE-NEXT:    pop {r7, pc}
2105;
2106; CHECK-BE-LABEL: multi_user_sext:
2107; CHECK-BE:       @ %bb.0: @ %entry
2108; CHECK-BE-NEXT:    .save {r7, lr}
2109; CHECK-BE-NEXT:    push {r7, lr}
2110; CHECK-BE-NEXT:    .vsave {d8, d9}
2111; CHECK-BE-NEXT:    vpush {d8, d9}
2112; CHECK-BE-NEXT:    vrev64.32 q1, q0
2113; CHECK-BE-NEXT:    vpt.s32 gt, q1, zr
2114; CHECK-BE-NEXT:    vldrht.u32 q4, [r0]
2115; CHECK-BE-NEXT:    vrev64.32 q0, q4
2116; CHECK-BE-NEXT:    vmov r1, r0, d0
2117; CHECK-BE-NEXT:    vmov r3, r2, d1
2118; CHECK-BE-NEXT:    bl foo
2119; CHECK-BE-NEXT:    vmovlb.s16 q1, q4
2120; CHECK-BE-NEXT:    vrev64.32 q0, q1
2121; CHECK-BE-NEXT:    vpop {d8, d9}
2122; CHECK-BE-NEXT:    pop {r7, pc}
2123entry:
2124  %c = icmp sgt <4 x i32> %a, zeroinitializer
2125  %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer)
2126  call void @foo(<4 x i16> %l)
2127  %ext = sext <4 x i16> %l to <4 x i32>
2128  ret <4 x i32> %ext
2129}
2130
2131declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>)
2132declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
2133declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>)
2134declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>)
2135declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>)
2136declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>)
2137declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>)
2138declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>)
2139declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>)
2140declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>)
2141declare void @foo(<4 x i16>)
2142