1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=riscv32 -mattr=+v < %s | FileCheck %s -check-prefix=RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s -check-prefix=RV64 4 5; WITH VSCALE RANGE 6 7define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 { 8; RV32-LABEL: ctz_nxv4i32: 9; RV32: # %bb.0: 10; RV32-NEXT: csrr a0, vlenb 11; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma 12; RV32-NEXT: vid.v v10 13; RV32-NEXT: li a1, -1 14; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma 15; RV32-NEXT: vmsne.vi v0, v8, 0 16; RV32-NEXT: srli a0, a0, 1 17; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma 18; RV32-NEXT: vmv.v.x v8, a0 19; RV32-NEXT: vmadd.vx v10, a1, v8 20; RV32-NEXT: vmv.v.i v8, 0 21; RV32-NEXT: vmerge.vvm v8, v8, v10, v0 22; RV32-NEXT: vredmaxu.vs v8, v8, v8 23; RV32-NEXT: vmv.x.s a1, v8 24; RV32-NEXT: sub a0, a0, a1 25; RV32-NEXT: slli a0, a0, 16 26; RV32-NEXT: srli a0, a0, 16 27; RV32-NEXT: ret 28; 29; RV64-LABEL: ctz_nxv4i32: 30; RV64: # %bb.0: 31; RV64-NEXT: csrr a0, vlenb 32; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma 33; RV64-NEXT: vid.v v10 34; RV64-NEXT: li a1, -1 35; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma 36; RV64-NEXT: vmsne.vi v0, v8, 0 37; RV64-NEXT: srli a0, a0, 1 38; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma 39; RV64-NEXT: vmv.v.x v8, a0 40; RV64-NEXT: vmadd.vx v10, a1, v8 41; RV64-NEXT: vmv.v.i v8, 0 42; RV64-NEXT: vmerge.vvm v8, v8, v10, v0 43; RV64-NEXT: vredmaxu.vs v8, v8, v8 44; RV64-NEXT: vmv.x.s a1, v8 45; RV64-NEXT: subw a0, a0, a1 46; RV64-NEXT: slli a0, a0, 48 47; RV64-NEXT: srli a0, a0, 48 48; RV64-NEXT: ret 49 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32> %a, i1 0) 50 ret i32 %res 51} 52 53; NO VSCALE RANGE 54 55define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) { 56; RV32-LABEL: ctz_nxv8i1_no_range: 57; RV32: # %bb.0: 58; RV32-NEXT: addi sp, sp, -48 59; RV32-NEXT: .cfi_def_cfa_offset 48 60; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill 61; RV32-NEXT: .cfi_offset ra, -4 62; RV32-NEXT: csrr a0, vlenb 63; RV32-NEXT: slli a0, a0, 1 64; RV32-NEXT: sub sp, sp, a0 65; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb 66; RV32-NEXT: addi a0, sp, 32 67; RV32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill 68; RV32-NEXT: csrr a0, vlenb 69; RV32-NEXT: srli a0, a0, 3 70; RV32-NEXT: li a2, 8 71; RV32-NEXT: li a1, 0 72; RV32-NEXT: li a3, 0 73; RV32-NEXT: call __muldi3 74; RV32-NEXT: sw a0, 16(sp) 75; RV32-NEXT: sw a1, 20(sp) 76; RV32-NEXT: addi a2, sp, 16 77; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma 78; RV32-NEXT: vlse64.v v16, (a2), zero 79; RV32-NEXT: vid.v v8 80; RV32-NEXT: li a2, -1 81; RV32-NEXT: addi a3, sp, 32 82; RV32-NEXT: vl2r.v v24, (a3) # Unknown-size Folded Reload 83; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma 84; RV32-NEXT: vmsne.vi v0, v24, 0 85; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma 86; RV32-NEXT: vmadd.vx v8, a2, v16 87; RV32-NEXT: vmv.v.i v16, 0 88; RV32-NEXT: li a2, 32 89; RV32-NEXT: vmerge.vim v16, v16, -1, v0 90; RV32-NEXT: vand.vv v8, v8, v16 91; RV32-NEXT: vredmaxu.vs v8, v8, v8 92; RV32-NEXT: vmv.x.s a3, v8 93; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma 94; RV32-NEXT: vsrl.vx v8, v8, a2 95; RV32-NEXT: sltu a2, a0, a3 96; RV32-NEXT: vmv.x.s a4, v8 97; RV32-NEXT: sub a1, a1, a4 98; RV32-NEXT: sub a1, a1, a2 99; RV32-NEXT: sub a0, a0, a3 100; RV32-NEXT: csrr a2, vlenb 101; RV32-NEXT: slli a2, a2, 1 102; RV32-NEXT: add sp, sp, a2 103; RV32-NEXT: .cfi_def_cfa sp, 48 104; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload 105; RV32-NEXT: .cfi_restore ra 106; RV32-NEXT: addi sp, sp, 48 107; RV32-NEXT: .cfi_def_cfa_offset 0 108; RV32-NEXT: ret 109; 110; RV64-LABEL: ctz_nxv8i1_no_range: 111; RV64: # %bb.0: 112; RV64-NEXT: csrr a0, vlenb 113; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma 114; RV64-NEXT: vid.v v16 115; RV64-NEXT: li a1, -1 116; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma 117; RV64-NEXT: vmsne.vi v0, v8, 0 118; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma 119; RV64-NEXT: vmv.v.x v8, a0 120; RV64-NEXT: vmadd.vx v16, a1, v8 121; RV64-NEXT: vmv.v.i v8, 0 122; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 123; RV64-NEXT: vredmaxu.vs v8, v8, v8 124; RV64-NEXT: vmv.x.s a1, v8 125; RV64-NEXT: sub a0, a0, a1 126; RV64-NEXT: ret 127 %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16> %a, i1 0) 128 ret i64 %res 129} 130 131define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { 132; RV32-LABEL: ctz_nxv16i1: 133; RV32: # %bb.0: 134; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma 135; RV32-NEXT: vfirst.m a0, v8 136; RV32-NEXT: bgez a0, .LBB2_2 137; RV32-NEXT: # %bb.1: 138; RV32-NEXT: csrr a0, vlenb 139; RV32-NEXT: slli a0, a0, 1 140; RV32-NEXT: .LBB2_2: 141; RV32-NEXT: ret 142; 143; RV64-LABEL: ctz_nxv16i1: 144; RV64: # %bb.0: 145; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma 146; RV64-NEXT: vfirst.m a0, v8 147; RV64-NEXT: bgez a0, .LBB2_2 148; RV64-NEXT: # %bb.1: 149; RV64-NEXT: csrr a0, vlenb 150; RV64-NEXT: slli a0, a0, 1 151; RV64-NEXT: .LBB2_2: 152; RV64-NEXT: ret 153 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0) 154 ret i32 %res 155} 156 157define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { 158; RV32-LABEL: ctz_nxv16i1_poison: 159; RV32: # %bb.0: 160; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma 161; RV32-NEXT: vfirst.m a0, v8 162; RV32-NEXT: ret 163; 164; RV64-LABEL: ctz_nxv16i1_poison: 165; RV64: # %bb.0: 166; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma 167; RV64-NEXT: vfirst.m a0, v8 168; RV64-NEXT: ret 169 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1) 170 ret i32 %res 171} 172 173define i32 @ctz_v16i1(<16 x i1> %pg, <16 x i1> %a) { 174; RV32-LABEL: ctz_v16i1: 175; RV32: # %bb.0: 176; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma 177; RV32-NEXT: vfirst.m a0, v8 178; RV32-NEXT: bgez a0, .LBB4_2 179; RV32-NEXT: # %bb.1: 180; RV32-NEXT: li a0, 16 181; RV32-NEXT: .LBB4_2: 182; RV32-NEXT: ret 183; 184; RV64-LABEL: ctz_v16i1: 185; RV64: # %bb.0: 186; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma 187; RV64-NEXT: vfirst.m a0, v8 188; RV64-NEXT: bgez a0, .LBB4_2 189; RV64-NEXT: # %bb.1: 190; RV64-NEXT: li a0, 16 191; RV64-NEXT: .LBB4_2: 192; RV64-NEXT: ret 193 %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0) 194 ret i32 %res 195} 196 197define i32 @ctz_v16i1_poison(<16 x i1> %pg, <16 x i1> %a) { 198; RV32-LABEL: ctz_v16i1_poison: 199; RV32: # %bb.0: 200; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma 201; RV32-NEXT: vfirst.m a0, v8 202; RV32-NEXT: ret 203; 204; RV64-LABEL: ctz_v16i1_poison: 205; RV64: # %bb.0: 206; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma 207; RV64-NEXT: vfirst.m a0, v8 208; RV64-NEXT: ret 209 %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1) 210 ret i32 %res 211} 212 213define i16 @ctz_v8i1_i16_ret(<8 x i1> %a) { 214; RV32-LABEL: ctz_v8i1_i16_ret: 215; RV32: # %bb.0: 216; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 217; RV32-NEXT: vfirst.m a0, v0 218; RV32-NEXT: bgez a0, .LBB6_2 219; RV32-NEXT: # %bb.1: 220; RV32-NEXT: li a0, 8 221; RV32-NEXT: .LBB6_2: 222; RV32-NEXT: ret 223; 224; RV64-LABEL: ctz_v8i1_i16_ret: 225; RV64: # %bb.0: 226; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 227; RV64-NEXT: vfirst.m a0, v0 228; RV64-NEXT: bgez a0, .LBB6_2 229; RV64-NEXT: # %bb.1: 230; RV64-NEXT: li a0, 8 231; RV64-NEXT: .LBB6_2: 232; RV64-NEXT: ret 233 %res = call i16 @llvm.experimental.cttz.elts.i16.v8i1(<8 x i1> %a, i1 0) 234 ret i16 %res 235} 236 237declare i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16>, i1) 238declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1>, i1) 239declare i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32>, i1) 240declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1) 241declare i16 @llvm.experimental.cttz.elts.i16.v16i1(<8 x i1>, i1) 242 243attributes #0 = { vscale_range(2,1024) } 244