1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -verify-machineinstrs -mattr=+mve %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <2 x i64> @cttz_2i64_0_t(<2 x i64> %src){ 5; CHECK-LABEL: cttz_2i64_0_t: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmov r0, r1, d1 8; CHECK-NEXT: rbit r1, r1 9; CHECK-NEXT: rbit r2, r0 10; CHECK-NEXT: clz r1, r1 11; CHECK-NEXT: cmp r0, #0 12; CHECK-NEXT: add.w r1, r1, #32 13; CHECK-NEXT: it ne 14; CHECK-NEXT: clzne r1, r2 15; CHECK-NEXT: vmov s2, r1 16; CHECK-NEXT: vmov r0, r1, d0 17; CHECK-NEXT: vldr s1, .LCPI0_0 18; CHECK-NEXT: vmov.f32 s3, s1 19; CHECK-NEXT: rbit r1, r1 20; CHECK-NEXT: rbit r2, r0 21; CHECK-NEXT: clz r1, r1 22; CHECK-NEXT: cmp r0, #0 23; CHECK-NEXT: add.w r1, r1, #32 24; CHECK-NEXT: it ne 25; CHECK-NEXT: clzne r1, r2 26; CHECK-NEXT: vmov s0, r1 27; CHECK-NEXT: bx lr 28; CHECK-NEXT: .p2align 2 29; CHECK-NEXT: @ %bb.1: 30; CHECK-NEXT: .LCPI0_0: 31; CHECK-NEXT: .long 0x00000000 @ float 0 32entry: 33 %0 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 0) 34 ret <2 x i64> %0 35} 36 37define arm_aapcs_vfpcc <4 x i32> @cttz_4i32_0_t(<4 x i32> %src){ 38; CHECK-LABEL: cttz_4i32_0_t: 39; CHECK: @ %bb.0: @ %entry 40; CHECK-NEXT: movs r0, #32 41; CHECK-NEXT: vbrsr.32 q0, q0, r0 42; CHECK-NEXT: vclz.i32 q0, q0 43; CHECK-NEXT: bx lr 44entry: 45 %0 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 0) 46 ret <4 x i32> %0 47} 48 49define arm_aapcs_vfpcc <8 x i16> @cttz_8i16_0_t(<8 x i16> %src){ 50; CHECK-LABEL: cttz_8i16_0_t: 51; CHECK: @ %bb.0: @ %entry 52; CHECK-NEXT: movs r0, #16 53; CHECK-NEXT: vbrsr.16 q0, q0, r0 54; CHECK-NEXT: vclz.i16 q0, q0 55; CHECK-NEXT: bx lr 56entry: 57 %0 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 0) 58 ret <8 x i16> %0 59} 60 61define arm_aapcs_vfpcc <16 x i8> @cttz_16i8_0_t(<16 x i8> %src) { 62; CHECK-LABEL: cttz_16i8_0_t: 63; CHECK: @ %bb.0: @ %entry 64; CHECK-NEXT: movs r0, #8 65; CHECK-NEXT: vbrsr.8 q0, q0, r0 66; CHECK-NEXT: vclz.i8 q0, q0 67; CHECK-NEXT: bx lr 68entry: 69 %0 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 0) 70 ret <16 x i8> %0 71} 72 73define arm_aapcs_vfpcc <2 x i64> @cttz_2i64_1_t(<2 x i64> %src){ 74; CHECK-LABEL: cttz_2i64_1_t: 75; CHECK: @ %bb.0: @ %entry 76; CHECK-NEXT: vmov r0, r1, d1 77; CHECK-NEXT: rbit r1, r1 78; CHECK-NEXT: rbit r2, r0 79; CHECK-NEXT: clz r1, r1 80; CHECK-NEXT: cmp r0, #0 81; CHECK-NEXT: add.w r1, r1, #32 82; CHECK-NEXT: it ne 83; CHECK-NEXT: clzne r1, r2 84; CHECK-NEXT: vmov s2, r1 85; CHECK-NEXT: vmov r0, r1, d0 86; CHECK-NEXT: vldr s1, .LCPI4_0 87; CHECK-NEXT: vmov.f32 s3, s1 88; CHECK-NEXT: rbit r1, r1 89; CHECK-NEXT: rbit r2, r0 90; CHECK-NEXT: clz r1, r1 91; CHECK-NEXT: cmp r0, #0 92; CHECK-NEXT: add.w r1, r1, #32 93; CHECK-NEXT: it ne 94; CHECK-NEXT: clzne r1, r2 95; CHECK-NEXT: vmov s0, r1 96; CHECK-NEXT: bx lr 97; CHECK-NEXT: .p2align 2 98; CHECK-NEXT: @ %bb.1: 99; CHECK-NEXT: .LCPI4_0: 100; CHECK-NEXT: .long 0x00000000 @ float 0 101entry: 102 %0 = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 1) 103 ret <2 x i64> %0 104} 105 106define arm_aapcs_vfpcc <4 x i32> @cttz_4i32_1_t(<4 x i32> %src){ 107; CHECK-LABEL: cttz_4i32_1_t: 108; CHECK: @ %bb.0: @ %entry 109; CHECK-NEXT: movs r0, #32 110; CHECK-NEXT: vbrsr.32 q0, q0, r0 111; CHECK-NEXT: vclz.i32 q0, q0 112; CHECK-NEXT: bx lr 113entry: 114 %0 = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 1) 115 ret <4 x i32> %0 116} 117 118define arm_aapcs_vfpcc <8 x i16> @cttz_8i16_1_t(<8 x i16> %src){ 119; CHECK-LABEL: cttz_8i16_1_t: 120; CHECK: @ %bb.0: @ %entry 121; CHECK-NEXT: movs r0, #16 122; CHECK-NEXT: vbrsr.16 q0, q0, r0 123; CHECK-NEXT: vclz.i16 q0, q0 124; CHECK-NEXT: bx lr 125entry: 126 %0 = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 1) 127 ret <8 x i16> %0 128} 129 130define arm_aapcs_vfpcc <16 x i8> @cttz_16i8_1_t(<16 x i8> %src) { 131; CHECK-LABEL: cttz_16i8_1_t: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: movs r0, #8 134; CHECK-NEXT: vbrsr.8 q0, q0, r0 135; CHECK-NEXT: vclz.i8 q0, q0 136; CHECK-NEXT: bx lr 137entry: 138 %0 = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 1) 139 ret <16 x i8> %0 140} 141 142 143declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 144declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 145declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 146declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 147