xref: /llvm-project/llvm/test/CodeGen/ARM/cttz_vector.ll (revision e36caaeeb25fafba0851e4a1905c7ceb08c337a8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s
3
4; This test checks the @llvm.cttz.* intrinsics for vectors.
5
6declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1)
7declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1)
8declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1)
9declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1)
10declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1)
11
12declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1)
13declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1)
14declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1)
15declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1)
16
17declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1)
18declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
19declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1)
20
21declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1)
22declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
23
24;------------------------------------------------------------------------------
25
26define void @test_v1i8(ptr %p) {
27; CHECK-LABEL: test_v1i8:
28; CHECK:       @ %bb.0:
29; CHECK-NEXT:    ldrb r1, [r0]
30; CHECK-NEXT:    orr r1, r1, #256
31; CHECK-NEXT:    rbit r1, r1
32; CHECK-NEXT:    clz r1, r1
33; CHECK-NEXT:    strb r1, [r0]
34; CHECK-NEXT:    bx lr
35  %a = load <1 x i8>, ptr %p
36  %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false)
37  store <1 x i8> %tmp, ptr %p
38  ret void
39}
40
41define void @test_v2i8(ptr %p) {
42; CHECK-LABEL: test_v2i8:
43; CHECK:       @ %bb.0:
44; CHECK-NEXT:    vld1.16 {d16[0]}, [r0:16]
45; CHECK-NEXT:    vmovl.u8 q8, d16
46; CHECK-NEXT:    vmovl.u16 q8, d16
47; CHECK-NEXT:    vorr.i32 d16, #0x100
48; CHECK-NEXT:    vneg.s32 d18, d16
49; CHECK-NEXT:    vand d16, d16, d18
50; CHECK-NEXT:    vmov.i32 d17, #0x1f
51; CHECK-NEXT:    vclz.i32 d16, d16
52; CHECK-NEXT:    vsub.i32 d16, d17, d16
53; CHECK-NEXT:    vmov.32 r1, d16[1]
54; CHECK-NEXT:    vmov.32 r2, d16[0]
55; CHECK-NEXT:    strb r1, [r0, #1]
56; CHECK-NEXT:    strb r2, [r0]
57; CHECK-NEXT:    bx lr
58  %a = load <2 x i8>, ptr %p
59  %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false)
60  store <2 x i8> %tmp, ptr %p
61  ret void
62}
63
64define void @test_v4i8(ptr %p) {
65; CHECK-LABEL: test_v4i8:
66; CHECK:       @ %bb.0:
67; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
68; CHECK-NEXT:    vmovl.u8 q8, d16
69; CHECK-NEXT:    vorr.i16 d16, #0x100
70; CHECK-NEXT:    vneg.s16 d18, d16
71; CHECK-NEXT:    vand d16, d16, d18
72; CHECK-NEXT:    vmov.i16 d17, #0xf
73; CHECK-NEXT:    vclz.i16 d16, d16
74; CHECK-NEXT:    vsub.i16 d16, d17, d16
75; CHECK-NEXT:    vuzp.8 d16, d17
76; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
77; CHECK-NEXT:    bx lr
78  %a = load <4 x i8>, ptr %p
79  %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false)
80  store <4 x i8> %tmp, ptr %p
81  ret void
82}
83
84define void @test_v8i8(ptr %p) {
85; CHECK-LABEL: test_v8i8:
86; CHECK:       @ %bb.0:
87; CHECK-NEXT:    vldr d16, [r0]
88; CHECK-NEXT:    vmov.i8 d18, #0x1
89; CHECK-NEXT:    vneg.s8 d17, d16
90; CHECK-NEXT:    vand d16, d16, d17
91; CHECK-NEXT:    vsub.i8 d16, d16, d18
92; CHECK-NEXT:    vcnt.8 d16, d16
93; CHECK-NEXT:    vstr d16, [r0]
94; CHECK-NEXT:    bx lr
95  %a = load <8 x i8>, ptr %p
96  %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false)
97  store <8 x i8> %tmp, ptr %p
98  ret void
99}
100
101define void @test_v16i8(ptr %p) {
102; CHECK-LABEL: test_v16i8:
103; CHECK:       @ %bb.0:
104; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
105; CHECK-NEXT:    vmov.i8 q10, #0x1
106; CHECK-NEXT:    vneg.s8 q9, q8
107; CHECK-NEXT:    vand q8, q8, q9
108; CHECK-NEXT:    vsub.i8 q8, q8, q10
109; CHECK-NEXT:    vcnt.8 q8, q8
110; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
111; CHECK-NEXT:    bx lr
112  %a = load <16 x i8>, ptr %p
113  %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
114  store <16 x i8> %tmp, ptr %p
115  ret void
116}
117
118define void @test_v1i16(ptr %p) {
119; CHECK-LABEL: test_v1i16:
120; CHECK:       @ %bb.0:
121; CHECK-NEXT:    ldrh r1, [r0]
122; CHECK-NEXT:    orr r1, r1, #65536
123; CHECK-NEXT:    rbit r1, r1
124; CHECK-NEXT:    clz r1, r1
125; CHECK-NEXT:    strh r1, [r0]
126; CHECK-NEXT:    bx lr
127  %a = load <1 x i16>, ptr %p
128  %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false)
129  store <1 x i16> %tmp, ptr %p
130  ret void
131}
132
133define void @test_v2i16(ptr %p) {
134; CHECK-LABEL: test_v2i16:
135; CHECK:       @ %bb.0:
136; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
137; CHECK-NEXT:    vmovl.u16 q8, d16
138; CHECK-NEXT:    vorr.i32 d16, #0x10000
139; CHECK-NEXT:    vneg.s32 d18, d16
140; CHECK-NEXT:    vand d16, d16, d18
141; CHECK-NEXT:    vmov.i32 d17, #0x1f
142; CHECK-NEXT:    vclz.i32 d16, d16
143; CHECK-NEXT:    vsub.i32 d16, d17, d16
144; CHECK-NEXT:    vuzp.16 d16, d17
145; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
146; CHECK-NEXT:    bx lr
147  %a = load <2 x i16>, ptr %p
148  %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false)
149  store <2 x i16> %tmp, ptr %p
150  ret void
151}
152
153define void @test_v4i16(ptr %p) {
154; CHECK-LABEL: test_v4i16:
155; CHECK:       @ %bb.0:
156; CHECK-NEXT:    vldr d16, [r0]
157; CHECK-NEXT:    vmov.i16 d18, #0x1
158; CHECK-NEXT:    vneg.s16 d17, d16
159; CHECK-NEXT:    vand d16, d16, d17
160; CHECK-NEXT:    vsub.i16 d16, d16, d18
161; CHECK-NEXT:    vcnt.8 d16, d16
162; CHECK-NEXT:    vpaddl.u8 d16, d16
163; CHECK-NEXT:    vstr d16, [r0]
164; CHECK-NEXT:    bx lr
165  %a = load <4 x i16>, ptr %p
166  %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false)
167  store <4 x i16> %tmp, ptr %p
168  ret void
169}
170
171define void @test_v8i16(ptr %p) {
172; CHECK-LABEL: test_v8i16:
173; CHECK:       @ %bb.0:
174; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
175; CHECK-NEXT:    vmov.i16 q10, #0x1
176; CHECK-NEXT:    vneg.s16 q9, q8
177; CHECK-NEXT:    vand q8, q8, q9
178; CHECK-NEXT:    vsub.i16 q8, q8, q10
179; CHECK-NEXT:    vcnt.8 q8, q8
180; CHECK-NEXT:    vpaddl.u8 q8, q8
181; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
182; CHECK-NEXT:    bx lr
183  %a = load <8 x i16>, ptr %p
184  %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
185  store <8 x i16> %tmp, ptr %p
186  ret void
187}
188
189define void @test_v1i32(ptr %p) {
190; CHECK-LABEL: test_v1i32:
191; CHECK:       @ %bb.0:
192; CHECK-NEXT:    ldr r1, [r0]
193; CHECK-NEXT:    rbit r1, r1
194; CHECK-NEXT:    clz r1, r1
195; CHECK-NEXT:    str r1, [r0]
196; CHECK-NEXT:    bx lr
197  %a = load <1 x i32>, ptr %p
198  %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false)
199  store <1 x i32> %tmp, ptr %p
200  ret void
201}
202
203define void @test_v2i32(ptr %p) {
204; CHECK-LABEL: test_v2i32:
205; CHECK:       @ %bb.0:
206; CHECK-NEXT:    vldr d16, [r0]
207; CHECK-NEXT:    vmov.i32 d18, #0x1
208; CHECK-NEXT:    vneg.s32 d17, d16
209; CHECK-NEXT:    vand d16, d16, d17
210; CHECK-NEXT:    vsub.i32 d16, d16, d18
211; CHECK-NEXT:    vcnt.8 d16, d16
212; CHECK-NEXT:    vpaddl.u8 d16, d16
213; CHECK-NEXT:    vpaddl.u16 d16, d16
214; CHECK-NEXT:    vstr d16, [r0]
215; CHECK-NEXT:    bx lr
216  %a = load <2 x i32>, ptr %p
217  %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
218  store <2 x i32> %tmp, ptr %p
219  ret void
220}
221
222define void @test_v4i32(ptr %p) {
223; CHECK-LABEL: test_v4i32:
224; CHECK:       @ %bb.0:
225; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
226; CHECK-NEXT:    vmov.i32 q10, #0x1
227; CHECK-NEXT:    vneg.s32 q9, q8
228; CHECK-NEXT:    vand q8, q8, q9
229; CHECK-NEXT:    vsub.i32 q8, q8, q10
230; CHECK-NEXT:    vcnt.8 q8, q8
231; CHECK-NEXT:    vpaddl.u8 q8, q8
232; CHECK-NEXT:    vpaddl.u16 q8, q8
233; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
234; CHECK-NEXT:    bx lr
235  %a = load <4 x i32>, ptr %p
236  %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
237  store <4 x i32> %tmp, ptr %p
238  ret void
239}
240
241define void @test_v1i64(ptr %p) {
242; CHECK-LABEL: test_v1i64:
243; CHECK:       @ %bb.0:
244; CHECK-NEXT:    vmov.i32 d16, #0x0
245; CHECK-NEXT:    vldr d17, [r0]
246; CHECK-NEXT:    vmov.i64 d18, #0xffffffffffffffff
247; CHECK-NEXT:    vsub.i64 d16, d16, d17
248; CHECK-NEXT:    vand d16, d17, d16
249; CHECK-NEXT:    vadd.i64 d16, d16, d18
250; CHECK-NEXT:    vcnt.8 d16, d16
251; CHECK-NEXT:    vpaddl.u8 d16, d16
252; CHECK-NEXT:    vpaddl.u16 d16, d16
253; CHECK-NEXT:    vpaddl.u32 d16, d16
254; CHECK-NEXT:    vstr d16, [r0]
255; CHECK-NEXT:    bx lr
256  %a = load <1 x i64>, ptr %p
257  %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false)
258  store <1 x i64> %tmp, ptr %p
259  ret void
260}
261
262define void @test_v2i64(ptr %p) {
263; CHECK-LABEL: test_v2i64:
264; CHECK:       @ %bb.0:
265; CHECK-NEXT:    vmov.i32 q8, #0x0
266; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
267; CHECK-NEXT:    vmov.i64 q10, #0xffffffffffffffff
268; CHECK-NEXT:    vsub.i64 q8, q8, q9
269; CHECK-NEXT:    vand q8, q9, q8
270; CHECK-NEXT:    vadd.i64 q8, q8, q10
271; CHECK-NEXT:    vcnt.8 q8, q8
272; CHECK-NEXT:    vpaddl.u8 q8, q8
273; CHECK-NEXT:    vpaddl.u16 q8, q8
274; CHECK-NEXT:    vpaddl.u32 q8, q8
275; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
276; CHECK-NEXT:    bx lr
277  %a = load <2 x i64>, ptr %p
278  %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
279  store <2 x i64> %tmp, ptr %p
280  ret void
281}
282
283;------------------------------------------------------------------------------
284
285define void @test_v1i8_zero_undef(ptr %p) {
286; CHECK-LABEL: test_v1i8_zero_undef:
287; CHECK:       @ %bb.0:
288; CHECK-NEXT:    ldrb r1, [r0]
289; CHECK-NEXT:    rbit r1, r1
290; CHECK-NEXT:    clz r1, r1
291; CHECK-NEXT:    strb r1, [r0]
292; CHECK-NEXT:    bx lr
293  %a = load <1 x i8>, ptr %p
294  %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true)
295  store <1 x i8> %tmp, ptr %p
296  ret void
297}
298
299define void @test_v2i8_zero_undef(ptr %p) {
300; CHECK-LABEL: test_v2i8_zero_undef:
301; CHECK:       @ %bb.0:
302; CHECK-NEXT:    vld1.16 {d16[0]}, [r0:16]
303; CHECK-NEXT:    vmovl.u8 q8, d16
304; CHECK-NEXT:    vmovl.u16 q8, d16
305; CHECK-NEXT:    vneg.s32 d18, d16
306; CHECK-NEXT:    vand d16, d16, d18
307; CHECK-NEXT:    vmov.i32 d17, #0x1f
308; CHECK-NEXT:    vclz.i32 d16, d16
309; CHECK-NEXT:    vsub.i32 d16, d17, d16
310; CHECK-NEXT:    vmov.32 r1, d16[1]
311; CHECK-NEXT:    vmov.32 r2, d16[0]
312; CHECK-NEXT:    strb r1, [r0, #1]
313; CHECK-NEXT:    strb r2, [r0]
314; CHECK-NEXT:    bx lr
315  %a = load <2 x i8>, ptr %p
316  %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
317  store <2 x i8> %tmp, ptr %p
318  ret void
319}
320
321define void @test_v4i8_zero_undef(ptr %p) {
322; CHECK-LABEL: test_v4i8_zero_undef:
323; CHECK:       @ %bb.0:
324; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
325; CHECK-NEXT:    vmovl.u8 q8, d16
326; CHECK-NEXT:    vneg.s16 d18, d16
327; CHECK-NEXT:    vand d16, d16, d18
328; CHECK-NEXT:    vmov.i16 d17, #0xf
329; CHECK-NEXT:    vclz.i16 d16, d16
330; CHECK-NEXT:    vsub.i16 d16, d17, d16
331; CHECK-NEXT:    vuzp.8 d16, d17
332; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
333; CHECK-NEXT:    bx lr
334  %a = load <4 x i8>, ptr %p
335  %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
336  store <4 x i8> %tmp, ptr %p
337  ret void
338}
339
340define void @test_v8i8_zero_undef(ptr %p) {
341; CHECK-LABEL: test_v8i8_zero_undef:
342; CHECK:       @ %bb.0:
343; CHECK-NEXT:    vldr d16, [r0]
344; CHECK-NEXT:    vmov.i8 d18, #0x1
345; CHECK-NEXT:    vneg.s8 d17, d16
346; CHECK-NEXT:    vand d16, d16, d17
347; CHECK-NEXT:    vsub.i8 d16, d16, d18
348; CHECK-NEXT:    vcnt.8 d16, d16
349; CHECK-NEXT:    vstr d16, [r0]
350; CHECK-NEXT:    bx lr
351  %a = load <8 x i8>, ptr %p
352  %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
353  store <8 x i8> %tmp, ptr %p
354  ret void
355}
356
357define void @test_v16i8_zero_undef(ptr %p) {
358; CHECK-LABEL: test_v16i8_zero_undef:
359; CHECK:       @ %bb.0:
360; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
361; CHECK-NEXT:    vmov.i8 q10, #0x1
362; CHECK-NEXT:    vneg.s8 q9, q8
363; CHECK-NEXT:    vand q8, q8, q9
364; CHECK-NEXT:    vsub.i8 q8, q8, q10
365; CHECK-NEXT:    vcnt.8 q8, q8
366; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
367; CHECK-NEXT:    bx lr
368  %a = load <16 x i8>, ptr %p
369  %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
370  store <16 x i8> %tmp, ptr %p
371  ret void
372}
373
374define void @test_v1i16_zero_undef(ptr %p) {
375; CHECK-LABEL: test_v1i16_zero_undef:
376; CHECK:       @ %bb.0:
377; CHECK-NEXT:    ldrh r1, [r0]
378; CHECK-NEXT:    rbit r1, r1
379; CHECK-NEXT:    clz r1, r1
380; CHECK-NEXT:    strh r1, [r0]
381; CHECK-NEXT:    bx lr
382  %a = load <1 x i16>, ptr %p
383  %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true)
384  store <1 x i16> %tmp, ptr %p
385  ret void
386}
387
388define void @test_v2i16_zero_undef(ptr %p) {
389; CHECK-LABEL: test_v2i16_zero_undef:
390; CHECK:       @ %bb.0:
391; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
392; CHECK-NEXT:    vmovl.u16 q8, d16
393; CHECK-NEXT:    vneg.s32 d18, d16
394; CHECK-NEXT:    vand d16, d16, d18
395; CHECK-NEXT:    vmov.i32 d17, #0x1f
396; CHECK-NEXT:    vclz.i32 d16, d16
397; CHECK-NEXT:    vsub.i32 d16, d17, d16
398; CHECK-NEXT:    vuzp.16 d16, d17
399; CHECK-NEXT:    vst1.32 {d16[0]}, [r0:32]
400; CHECK-NEXT:    bx lr
401  %a = load <2 x i16>, ptr %p
402  %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
403  store <2 x i16> %tmp, ptr %p
404  ret void
405}
406
407define void @test_v4i16_zero_undef(ptr %p) {
408; CHECK-LABEL: test_v4i16_zero_undef:
409; CHECK:       @ %bb.0:
410; CHECK-NEXT:    vldr d16, [r0]
411; CHECK-NEXT:    vneg.s16 d17, d16
412; CHECK-NEXT:    vand d16, d16, d17
413; CHECK-NEXT:    vmov.i16 d17, #0xf
414; CHECK-NEXT:    vclz.i16 d16, d16
415; CHECK-NEXT:    vsub.i16 d16, d17, d16
416; CHECK-NEXT:    vstr d16, [r0]
417; CHECK-NEXT:    bx lr
418  %a = load <4 x i16>, ptr %p
419  %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
420  store <4 x i16> %tmp, ptr %p
421  ret void
422}
423
424define void @test_v8i16_zero_undef(ptr %p) {
425; CHECK-LABEL: test_v8i16_zero_undef:
426; CHECK:       @ %bb.0:
427; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
428; CHECK-NEXT:    vneg.s16 q9, q8
429; CHECK-NEXT:    vand q8, q8, q9
430; CHECK-NEXT:    vmov.i16 q9, #0xf
431; CHECK-NEXT:    vclz.i16 q8, q8
432; CHECK-NEXT:    vsub.i16 q8, q9, q8
433; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
434; CHECK-NEXT:    bx lr
435  %a = load <8 x i16>, ptr %p
436  %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
437  store <8 x i16> %tmp, ptr %p
438  ret void
439}
440
441define void @test_v1i32_zero_undef(ptr %p) {
442; CHECK-LABEL: test_v1i32_zero_undef:
443; CHECK:       @ %bb.0:
444; CHECK-NEXT:    ldr r1, [r0]
445; CHECK-NEXT:    rbit r1, r1
446; CHECK-NEXT:    clz r1, r1
447; CHECK-NEXT:    str r1, [r0]
448; CHECK-NEXT:    bx lr
449  %a = load <1 x i32>, ptr %p
450  %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true)
451  store <1 x i32> %tmp, ptr %p
452  ret void
453}
454
455define void @test_v2i32_zero_undef(ptr %p) {
456; CHECK-LABEL: test_v2i32_zero_undef:
457; CHECK:       @ %bb.0:
458; CHECK-NEXT:    vldr d16, [r0]
459; CHECK-NEXT:    vneg.s32 d17, d16
460; CHECK-NEXT:    vand d16, d16, d17
461; CHECK-NEXT:    vmov.i32 d17, #0x1f
462; CHECK-NEXT:    vclz.i32 d16, d16
463; CHECK-NEXT:    vsub.i32 d16, d17, d16
464; CHECK-NEXT:    vstr d16, [r0]
465; CHECK-NEXT:    bx lr
466  %a = load <2 x i32>, ptr %p
467  %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
468  store <2 x i32> %tmp, ptr %p
469  ret void
470}
471
472define void @test_v4i32_zero_undef(ptr %p) {
473; CHECK-LABEL: test_v4i32_zero_undef:
474; CHECK:       @ %bb.0:
475; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
476; CHECK-NEXT:    vneg.s32 q9, q8
477; CHECK-NEXT:    vand q8, q8, q9
478; CHECK-NEXT:    vmov.i32 q9, #0x1f
479; CHECK-NEXT:    vclz.i32 q8, q8
480; CHECK-NEXT:    vsub.i32 q8, q9, q8
481; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
482; CHECK-NEXT:    bx lr
483  %a = load <4 x i32>, ptr %p
484  %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
485  store <4 x i32> %tmp, ptr %p
486  ret void
487}
488
489define void @test_v1i64_zero_undef(ptr %p) {
490; CHECK-LABEL: test_v1i64_zero_undef:
491; CHECK:       @ %bb.0:
492; CHECK-NEXT:    vmov.i32 d16, #0x0
493; CHECK-NEXT:    vldr d17, [r0]
494; CHECK-NEXT:    vmov.i64 d18, #0xffffffffffffffff
495; CHECK-NEXT:    vsub.i64 d16, d16, d17
496; CHECK-NEXT:    vand d16, d17, d16
497; CHECK-NEXT:    vadd.i64 d16, d16, d18
498; CHECK-NEXT:    vcnt.8 d16, d16
499; CHECK-NEXT:    vpaddl.u8 d16, d16
500; CHECK-NEXT:    vpaddl.u16 d16, d16
501; CHECK-NEXT:    vpaddl.u32 d16, d16
502; CHECK-NEXT:    vstr d16, [r0]
503; CHECK-NEXT:    bx lr
504  %a = load <1 x i64>, ptr %p
505  %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true)
506  store <1 x i64> %tmp, ptr %p
507  ret void
508}
509
510define void @test_v2i64_zero_undef(ptr %p) {
511; CHECK-LABEL: test_v2i64_zero_undef:
512; CHECK:       @ %bb.0:
513; CHECK-NEXT:    vmov.i32 q8, #0x0
514; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
515; CHECK-NEXT:    vmov.i64 q10, #0xffffffffffffffff
516; CHECK-NEXT:    vsub.i64 q8, q8, q9
517; CHECK-NEXT:    vand q8, q9, q8
518; CHECK-NEXT:    vadd.i64 q8, q8, q10
519; CHECK-NEXT:    vcnt.8 q8, q8
520; CHECK-NEXT:    vpaddl.u8 q8, q8
521; CHECK-NEXT:    vpaddl.u16 q8, q8
522; CHECK-NEXT:    vpaddl.u32 q8, q8
523; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
524; CHECK-NEXT:    bx lr
525  %a = load <2 x i64>, ptr %p
526  %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
527  store <2 x i64> %tmp, ptr %p
528  ret void
529}
530