1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple armv7-linux-gnueabihf -mattr=+neon | FileCheck %s 3 4; This test checks the @llvm.cttz.* intrinsics for vectors. 5 6declare <1 x i8> @llvm.cttz.v1i8(<1 x i8>, i1) 7declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) 8declare <4 x i8> @llvm.cttz.v4i8(<4 x i8>, i1) 9declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) 10declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 11 12declare <1 x i16> @llvm.cttz.v1i16(<1 x i16>, i1) 13declare <2 x i16> @llvm.cttz.v2i16(<2 x i16>, i1) 14declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) 15declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 16 17declare <1 x i32> @llvm.cttz.v1i32(<1 x i32>, i1) 18declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) 19declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 20 21declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1) 22declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 23 24;------------------------------------------------------------------------------ 25 26define void @test_v1i8(ptr %p) { 27; CHECK-LABEL: test_v1i8: 28; CHECK: @ %bb.0: 29; CHECK-NEXT: ldrb r1, [r0] 30; CHECK-NEXT: orr r1, r1, #256 31; CHECK-NEXT: rbit r1, r1 32; CHECK-NEXT: clz r1, r1 33; CHECK-NEXT: strb r1, [r0] 34; CHECK-NEXT: bx lr 35 %a = load <1 x i8>, ptr %p 36 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 false) 37 store <1 x i8> %tmp, ptr %p 38 ret void 39} 40 41define void @test_v2i8(ptr %p) { 42; CHECK-LABEL: test_v2i8: 43; CHECK: @ %bb.0: 44; CHECK-NEXT: vld1.16 {d16[0]}, [r0:16] 45; CHECK-NEXT: vmovl.u8 q8, d16 46; CHECK-NEXT: vmovl.u16 q8, d16 47; CHECK-NEXT: vorr.i32 d16, #0x100 48; CHECK-NEXT: vneg.s32 d18, d16 49; CHECK-NEXT: vand d16, d16, d18 50; CHECK-NEXT: vmov.i32 d17, #0x1f 51; CHECK-NEXT: vclz.i32 d16, d16 52; CHECK-NEXT: vsub.i32 d16, d17, d16 53; CHECK-NEXT: vmov.32 r1, d16[1] 54; CHECK-NEXT: vmov.32 r2, d16[0] 55; CHECK-NEXT: strb r1, [r0, #1] 56; CHECK-NEXT: strb r2, [r0] 57; CHECK-NEXT: bx lr 58 %a = load <2 x i8>, ptr %p 59 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 false) 60 store <2 x i8> %tmp, ptr %p 61 ret void 62} 63 64define void @test_v4i8(ptr %p) { 65; CHECK-LABEL: test_v4i8: 66; CHECK: @ %bb.0: 67; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 68; CHECK-NEXT: vmovl.u8 q8, d16 69; CHECK-NEXT: vorr.i16 d16, #0x100 70; CHECK-NEXT: vneg.s16 d18, d16 71; CHECK-NEXT: vand d16, d16, d18 72; CHECK-NEXT: vmov.i16 d17, #0xf 73; CHECK-NEXT: vclz.i16 d16, d16 74; CHECK-NEXT: vsub.i16 d16, d17, d16 75; CHECK-NEXT: vuzp.8 d16, d17 76; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 77; CHECK-NEXT: bx lr 78 %a = load <4 x i8>, ptr %p 79 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 false) 80 store <4 x i8> %tmp, ptr %p 81 ret void 82} 83 84define void @test_v8i8(ptr %p) { 85; CHECK-LABEL: test_v8i8: 86; CHECK: @ %bb.0: 87; CHECK-NEXT: vldr d16, [r0] 88; CHECK-NEXT: vmov.i8 d18, #0x1 89; CHECK-NEXT: vneg.s8 d17, d16 90; CHECK-NEXT: vand d16, d16, d17 91; CHECK-NEXT: vsub.i8 d16, d16, d18 92; CHECK-NEXT: vcnt.8 d16, d16 93; CHECK-NEXT: vstr d16, [r0] 94; CHECK-NEXT: bx lr 95 %a = load <8 x i8>, ptr %p 96 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 false) 97 store <8 x i8> %tmp, ptr %p 98 ret void 99} 100 101define void @test_v16i8(ptr %p) { 102; CHECK-LABEL: test_v16i8: 103; CHECK: @ %bb.0: 104; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 105; CHECK-NEXT: vmov.i8 q10, #0x1 106; CHECK-NEXT: vneg.s8 q9, q8 107; CHECK-NEXT: vand q8, q8, q9 108; CHECK-NEXT: vsub.i8 q8, q8, q10 109; CHECK-NEXT: vcnt.8 q8, q8 110; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 111; CHECK-NEXT: bx lr 112 %a = load <16 x i8>, ptr %p 113 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) 114 store <16 x i8> %tmp, ptr %p 115 ret void 116} 117 118define void @test_v1i16(ptr %p) { 119; CHECK-LABEL: test_v1i16: 120; CHECK: @ %bb.0: 121; CHECK-NEXT: ldrh r1, [r0] 122; CHECK-NEXT: orr r1, r1, #65536 123; CHECK-NEXT: rbit r1, r1 124; CHECK-NEXT: clz r1, r1 125; CHECK-NEXT: strh r1, [r0] 126; CHECK-NEXT: bx lr 127 %a = load <1 x i16>, ptr %p 128 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 false) 129 store <1 x i16> %tmp, ptr %p 130 ret void 131} 132 133define void @test_v2i16(ptr %p) { 134; CHECK-LABEL: test_v2i16: 135; CHECK: @ %bb.0: 136; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 137; CHECK-NEXT: vmovl.u16 q8, d16 138; CHECK-NEXT: vorr.i32 d16, #0x10000 139; CHECK-NEXT: vneg.s32 d18, d16 140; CHECK-NEXT: vand d16, d16, d18 141; CHECK-NEXT: vmov.i32 d17, #0x1f 142; CHECK-NEXT: vclz.i32 d16, d16 143; CHECK-NEXT: vsub.i32 d16, d17, d16 144; CHECK-NEXT: vuzp.16 d16, d17 145; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 146; CHECK-NEXT: bx lr 147 %a = load <2 x i16>, ptr %p 148 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 false) 149 store <2 x i16> %tmp, ptr %p 150 ret void 151} 152 153define void @test_v4i16(ptr %p) { 154; CHECK-LABEL: test_v4i16: 155; CHECK: @ %bb.0: 156; CHECK-NEXT: vldr d16, [r0] 157; CHECK-NEXT: vmov.i16 d18, #0x1 158; CHECK-NEXT: vneg.s16 d17, d16 159; CHECK-NEXT: vand d16, d16, d17 160; CHECK-NEXT: vsub.i16 d16, d16, d18 161; CHECK-NEXT: vcnt.8 d16, d16 162; CHECK-NEXT: vpaddl.u8 d16, d16 163; CHECK-NEXT: vstr d16, [r0] 164; CHECK-NEXT: bx lr 165 %a = load <4 x i16>, ptr %p 166 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 false) 167 store <4 x i16> %tmp, ptr %p 168 ret void 169} 170 171define void @test_v8i16(ptr %p) { 172; CHECK-LABEL: test_v8i16: 173; CHECK: @ %bb.0: 174; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 175; CHECK-NEXT: vmov.i16 q10, #0x1 176; CHECK-NEXT: vneg.s16 q9, q8 177; CHECK-NEXT: vand q8, q8, q9 178; CHECK-NEXT: vsub.i16 q8, q8, q10 179; CHECK-NEXT: vcnt.8 q8, q8 180; CHECK-NEXT: vpaddl.u8 q8, q8 181; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 182; CHECK-NEXT: bx lr 183 %a = load <8 x i16>, ptr %p 184 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) 185 store <8 x i16> %tmp, ptr %p 186 ret void 187} 188 189define void @test_v1i32(ptr %p) { 190; CHECK-LABEL: test_v1i32: 191; CHECK: @ %bb.0: 192; CHECK-NEXT: ldr r1, [r0] 193; CHECK-NEXT: rbit r1, r1 194; CHECK-NEXT: clz r1, r1 195; CHECK-NEXT: str r1, [r0] 196; CHECK-NEXT: bx lr 197 %a = load <1 x i32>, ptr %p 198 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 false) 199 store <1 x i32> %tmp, ptr %p 200 ret void 201} 202 203define void @test_v2i32(ptr %p) { 204; CHECK-LABEL: test_v2i32: 205; CHECK: @ %bb.0: 206; CHECK-NEXT: vldr d16, [r0] 207; CHECK-NEXT: vmov.i32 d18, #0x1 208; CHECK-NEXT: vneg.s32 d17, d16 209; CHECK-NEXT: vand d16, d16, d17 210; CHECK-NEXT: vsub.i32 d16, d16, d18 211; CHECK-NEXT: vcnt.8 d16, d16 212; CHECK-NEXT: vpaddl.u8 d16, d16 213; CHECK-NEXT: vpaddl.u16 d16, d16 214; CHECK-NEXT: vstr d16, [r0] 215; CHECK-NEXT: bx lr 216 %a = load <2 x i32>, ptr %p 217 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) 218 store <2 x i32> %tmp, ptr %p 219 ret void 220} 221 222define void @test_v4i32(ptr %p) { 223; CHECK-LABEL: test_v4i32: 224; CHECK: @ %bb.0: 225; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 226; CHECK-NEXT: vmov.i32 q10, #0x1 227; CHECK-NEXT: vneg.s32 q9, q8 228; CHECK-NEXT: vand q8, q8, q9 229; CHECK-NEXT: vsub.i32 q8, q8, q10 230; CHECK-NEXT: vcnt.8 q8, q8 231; CHECK-NEXT: vpaddl.u8 q8, q8 232; CHECK-NEXT: vpaddl.u16 q8, q8 233; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 234; CHECK-NEXT: bx lr 235 %a = load <4 x i32>, ptr %p 236 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) 237 store <4 x i32> %tmp, ptr %p 238 ret void 239} 240 241define void @test_v1i64(ptr %p) { 242; CHECK-LABEL: test_v1i64: 243; CHECK: @ %bb.0: 244; CHECK-NEXT: vmov.i32 d16, #0x0 245; CHECK-NEXT: vldr d17, [r0] 246; CHECK-NEXT: vmov.i64 d18, #0xffffffffffffffff 247; CHECK-NEXT: vsub.i64 d16, d16, d17 248; CHECK-NEXT: vand d16, d17, d16 249; CHECK-NEXT: vadd.i64 d16, d16, d18 250; CHECK-NEXT: vcnt.8 d16, d16 251; CHECK-NEXT: vpaddl.u8 d16, d16 252; CHECK-NEXT: vpaddl.u16 d16, d16 253; CHECK-NEXT: vpaddl.u32 d16, d16 254; CHECK-NEXT: vstr d16, [r0] 255; CHECK-NEXT: bx lr 256 %a = load <1 x i64>, ptr %p 257 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 false) 258 store <1 x i64> %tmp, ptr %p 259 ret void 260} 261 262define void @test_v2i64(ptr %p) { 263; CHECK-LABEL: test_v2i64: 264; CHECK: @ %bb.0: 265; CHECK-NEXT: vmov.i32 q8, #0x0 266; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 267; CHECK-NEXT: vmov.i64 q10, #0xffffffffffffffff 268; CHECK-NEXT: vsub.i64 q8, q8, q9 269; CHECK-NEXT: vand q8, q9, q8 270; CHECK-NEXT: vadd.i64 q8, q8, q10 271; CHECK-NEXT: vcnt.8 q8, q8 272; CHECK-NEXT: vpaddl.u8 q8, q8 273; CHECK-NEXT: vpaddl.u16 q8, q8 274; CHECK-NEXT: vpaddl.u32 q8, q8 275; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 276; CHECK-NEXT: bx lr 277 %a = load <2 x i64>, ptr %p 278 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) 279 store <2 x i64> %tmp, ptr %p 280 ret void 281} 282 283;------------------------------------------------------------------------------ 284 285define void @test_v1i8_zero_undef(ptr %p) { 286; CHECK-LABEL: test_v1i8_zero_undef: 287; CHECK: @ %bb.0: 288; CHECK-NEXT: ldrb r1, [r0] 289; CHECK-NEXT: rbit r1, r1 290; CHECK-NEXT: clz r1, r1 291; CHECK-NEXT: strb r1, [r0] 292; CHECK-NEXT: bx lr 293 %a = load <1 x i8>, ptr %p 294 %tmp = call <1 x i8> @llvm.cttz.v1i8(<1 x i8> %a, i1 true) 295 store <1 x i8> %tmp, ptr %p 296 ret void 297} 298 299define void @test_v2i8_zero_undef(ptr %p) { 300; CHECK-LABEL: test_v2i8_zero_undef: 301; CHECK: @ %bb.0: 302; CHECK-NEXT: vld1.16 {d16[0]}, [r0:16] 303; CHECK-NEXT: vmovl.u8 q8, d16 304; CHECK-NEXT: vmovl.u16 q8, d16 305; CHECK-NEXT: vneg.s32 d18, d16 306; CHECK-NEXT: vand d16, d16, d18 307; CHECK-NEXT: vmov.i32 d17, #0x1f 308; CHECK-NEXT: vclz.i32 d16, d16 309; CHECK-NEXT: vsub.i32 d16, d17, d16 310; CHECK-NEXT: vmov.32 r1, d16[1] 311; CHECK-NEXT: vmov.32 r2, d16[0] 312; CHECK-NEXT: strb r1, [r0, #1] 313; CHECK-NEXT: strb r2, [r0] 314; CHECK-NEXT: bx lr 315 %a = load <2 x i8>, ptr %p 316 %tmp = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true) 317 store <2 x i8> %tmp, ptr %p 318 ret void 319} 320 321define void @test_v4i8_zero_undef(ptr %p) { 322; CHECK-LABEL: test_v4i8_zero_undef: 323; CHECK: @ %bb.0: 324; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 325; CHECK-NEXT: vmovl.u8 q8, d16 326; CHECK-NEXT: vneg.s16 d18, d16 327; CHECK-NEXT: vand d16, d16, d18 328; CHECK-NEXT: vmov.i16 d17, #0xf 329; CHECK-NEXT: vclz.i16 d16, d16 330; CHECK-NEXT: vsub.i16 d16, d17, d16 331; CHECK-NEXT: vuzp.8 d16, d17 332; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 333; CHECK-NEXT: bx lr 334 %a = load <4 x i8>, ptr %p 335 %tmp = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true) 336 store <4 x i8> %tmp, ptr %p 337 ret void 338} 339 340define void @test_v8i8_zero_undef(ptr %p) { 341; CHECK-LABEL: test_v8i8_zero_undef: 342; CHECK: @ %bb.0: 343; CHECK-NEXT: vldr d16, [r0] 344; CHECK-NEXT: vmov.i8 d18, #0x1 345; CHECK-NEXT: vneg.s8 d17, d16 346; CHECK-NEXT: vand d16, d16, d17 347; CHECK-NEXT: vsub.i8 d16, d16, d18 348; CHECK-NEXT: vcnt.8 d16, d16 349; CHECK-NEXT: vstr d16, [r0] 350; CHECK-NEXT: bx lr 351 %a = load <8 x i8>, ptr %p 352 %tmp = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true) 353 store <8 x i8> %tmp, ptr %p 354 ret void 355} 356 357define void @test_v16i8_zero_undef(ptr %p) { 358; CHECK-LABEL: test_v16i8_zero_undef: 359; CHECK: @ %bb.0: 360; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 361; CHECK-NEXT: vmov.i8 q10, #0x1 362; CHECK-NEXT: vneg.s8 q9, q8 363; CHECK-NEXT: vand q8, q8, q9 364; CHECK-NEXT: vsub.i8 q8, q8, q10 365; CHECK-NEXT: vcnt.8 q8, q8 366; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 367; CHECK-NEXT: bx lr 368 %a = load <16 x i8>, ptr %p 369 %tmp = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) 370 store <16 x i8> %tmp, ptr %p 371 ret void 372} 373 374define void @test_v1i16_zero_undef(ptr %p) { 375; CHECK-LABEL: test_v1i16_zero_undef: 376; CHECK: @ %bb.0: 377; CHECK-NEXT: ldrh r1, [r0] 378; CHECK-NEXT: rbit r1, r1 379; CHECK-NEXT: clz r1, r1 380; CHECK-NEXT: strh r1, [r0] 381; CHECK-NEXT: bx lr 382 %a = load <1 x i16>, ptr %p 383 %tmp = call <1 x i16> @llvm.cttz.v1i16(<1 x i16> %a, i1 true) 384 store <1 x i16> %tmp, ptr %p 385 ret void 386} 387 388define void @test_v2i16_zero_undef(ptr %p) { 389; CHECK-LABEL: test_v2i16_zero_undef: 390; CHECK: @ %bb.0: 391; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] 392; CHECK-NEXT: vmovl.u16 q8, d16 393; CHECK-NEXT: vneg.s32 d18, d16 394; CHECK-NEXT: vand d16, d16, d18 395; CHECK-NEXT: vmov.i32 d17, #0x1f 396; CHECK-NEXT: vclz.i32 d16, d16 397; CHECK-NEXT: vsub.i32 d16, d17, d16 398; CHECK-NEXT: vuzp.16 d16, d17 399; CHECK-NEXT: vst1.32 {d16[0]}, [r0:32] 400; CHECK-NEXT: bx lr 401 %a = load <2 x i16>, ptr %p 402 %tmp = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true) 403 store <2 x i16> %tmp, ptr %p 404 ret void 405} 406 407define void @test_v4i16_zero_undef(ptr %p) { 408; CHECK-LABEL: test_v4i16_zero_undef: 409; CHECK: @ %bb.0: 410; CHECK-NEXT: vldr d16, [r0] 411; CHECK-NEXT: vneg.s16 d17, d16 412; CHECK-NEXT: vand d16, d16, d17 413; CHECK-NEXT: vmov.i16 d17, #0xf 414; CHECK-NEXT: vclz.i16 d16, d16 415; CHECK-NEXT: vsub.i16 d16, d17, d16 416; CHECK-NEXT: vstr d16, [r0] 417; CHECK-NEXT: bx lr 418 %a = load <4 x i16>, ptr %p 419 %tmp = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true) 420 store <4 x i16> %tmp, ptr %p 421 ret void 422} 423 424define void @test_v8i16_zero_undef(ptr %p) { 425; CHECK-LABEL: test_v8i16_zero_undef: 426; CHECK: @ %bb.0: 427; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 428; CHECK-NEXT: vneg.s16 q9, q8 429; CHECK-NEXT: vand q8, q8, q9 430; CHECK-NEXT: vmov.i16 q9, #0xf 431; CHECK-NEXT: vclz.i16 q8, q8 432; CHECK-NEXT: vsub.i16 q8, q9, q8 433; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 434; CHECK-NEXT: bx lr 435 %a = load <8 x i16>, ptr %p 436 %tmp = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) 437 store <8 x i16> %tmp, ptr %p 438 ret void 439} 440 441define void @test_v1i32_zero_undef(ptr %p) { 442; CHECK-LABEL: test_v1i32_zero_undef: 443; CHECK: @ %bb.0: 444; CHECK-NEXT: ldr r1, [r0] 445; CHECK-NEXT: rbit r1, r1 446; CHECK-NEXT: clz r1, r1 447; CHECK-NEXT: str r1, [r0] 448; CHECK-NEXT: bx lr 449 %a = load <1 x i32>, ptr %p 450 %tmp = call <1 x i32> @llvm.cttz.v1i32(<1 x i32> %a, i1 true) 451 store <1 x i32> %tmp, ptr %p 452 ret void 453} 454 455define void @test_v2i32_zero_undef(ptr %p) { 456; CHECK-LABEL: test_v2i32_zero_undef: 457; CHECK: @ %bb.0: 458; CHECK-NEXT: vldr d16, [r0] 459; CHECK-NEXT: vneg.s32 d17, d16 460; CHECK-NEXT: vand d16, d16, d17 461; CHECK-NEXT: vmov.i32 d17, #0x1f 462; CHECK-NEXT: vclz.i32 d16, d16 463; CHECK-NEXT: vsub.i32 d16, d17, d16 464; CHECK-NEXT: vstr d16, [r0] 465; CHECK-NEXT: bx lr 466 %a = load <2 x i32>, ptr %p 467 %tmp = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true) 468 store <2 x i32> %tmp, ptr %p 469 ret void 470} 471 472define void @test_v4i32_zero_undef(ptr %p) { 473; CHECK-LABEL: test_v4i32_zero_undef: 474; CHECK: @ %bb.0: 475; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 476; CHECK-NEXT: vneg.s32 q9, q8 477; CHECK-NEXT: vand q8, q8, q9 478; CHECK-NEXT: vmov.i32 q9, #0x1f 479; CHECK-NEXT: vclz.i32 q8, q8 480; CHECK-NEXT: vsub.i32 q8, q9, q8 481; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 482; CHECK-NEXT: bx lr 483 %a = load <4 x i32>, ptr %p 484 %tmp = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) 485 store <4 x i32> %tmp, ptr %p 486 ret void 487} 488 489define void @test_v1i64_zero_undef(ptr %p) { 490; CHECK-LABEL: test_v1i64_zero_undef: 491; CHECK: @ %bb.0: 492; CHECK-NEXT: vmov.i32 d16, #0x0 493; CHECK-NEXT: vldr d17, [r0] 494; CHECK-NEXT: vmov.i64 d18, #0xffffffffffffffff 495; CHECK-NEXT: vsub.i64 d16, d16, d17 496; CHECK-NEXT: vand d16, d17, d16 497; CHECK-NEXT: vadd.i64 d16, d16, d18 498; CHECK-NEXT: vcnt.8 d16, d16 499; CHECK-NEXT: vpaddl.u8 d16, d16 500; CHECK-NEXT: vpaddl.u16 d16, d16 501; CHECK-NEXT: vpaddl.u32 d16, d16 502; CHECK-NEXT: vstr d16, [r0] 503; CHECK-NEXT: bx lr 504 %a = load <1 x i64>, ptr %p 505 %tmp = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true) 506 store <1 x i64> %tmp, ptr %p 507 ret void 508} 509 510define void @test_v2i64_zero_undef(ptr %p) { 511; CHECK-LABEL: test_v2i64_zero_undef: 512; CHECK: @ %bb.0: 513; CHECK-NEXT: vmov.i32 q8, #0x0 514; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 515; CHECK-NEXT: vmov.i64 q10, #0xffffffffffffffff 516; CHECK-NEXT: vsub.i64 q8, q8, q9 517; CHECK-NEXT: vand q8, q9, q8 518; CHECK-NEXT: vadd.i64 q8, q8, q10 519; CHECK-NEXT: vcnt.8 q8, q8 520; CHECK-NEXT: vpaddl.u8 q8, q8 521; CHECK-NEXT: vpaddl.u16 q8, q8 522; CHECK-NEXT: vpaddl.u32 q8, q8 523; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 524; CHECK-NEXT: bx lr 525 %a = load <2 x i64>, ptr %p 526 %tmp = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) 527 store <2 x i64> %tmp, ptr %p 528 ret void 529} 530