1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 10 11declare <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8>, i1 immarg, <vscale x 1 x i1>, i32) 12 13define <vscale x 1 x i8> @vp_cttz_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vp_cttz_nxv1i8: 15; CHECK: # %bb.0: 16; CHECK-NEXT: li a1, 1 17; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 18; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 19; CHECK-NEXT: li a0, 85 20; CHECK-NEXT: vnot.v v8, v8, v0.t 21; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 22; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 23; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 24; CHECK-NEXT: li a0, 51 25; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 26; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 27; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 28; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 29; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 30; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 31; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 32; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 33; CHECK-NEXT: ret 34; 35; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8: 36; CHECK-ZVBB: # %bb.0: 37; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 38; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 39; CHECK-ZVBB-NEXT: ret 40 %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl) 41 ret <vscale x 1 x i8> %v 42} 43 44define <vscale x 1 x i8> @vp_cttz_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { 45; CHECK-LABEL: vp_cttz_nxv1i8_unmasked: 46; CHECK: # %bb.0: 47; CHECK-NEXT: li a1, 1 48; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 49; CHECK-NEXT: vnot.v v9, v8 50; CHECK-NEXT: vsub.vx v8, v8, a1 51; CHECK-NEXT: li a0, 85 52; CHECK-NEXT: vand.vv v8, v9, v8 53; CHECK-NEXT: vsrl.vi v9, v8, 1 54; CHECK-NEXT: vand.vx v9, v9, a0 55; CHECK-NEXT: li a0, 51 56; CHECK-NEXT: vsub.vv v8, v8, v9 57; CHECK-NEXT: vand.vx v9, v8, a0 58; CHECK-NEXT: vsrl.vi v8, v8, 2 59; CHECK-NEXT: vand.vx v8, v8, a0 60; CHECK-NEXT: vadd.vv v8, v9, v8 61; CHECK-NEXT: vsrl.vi v9, v8, 4 62; CHECK-NEXT: vadd.vv v8, v8, v9 63; CHECK-NEXT: vand.vi v8, v8, 15 64; CHECK-NEXT: ret 65; 66; CHECK-ZVBB-LABEL: vp_cttz_nxv1i8_unmasked: 67; CHECK-ZVBB: # %bb.0: 68; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 69; CHECK-ZVBB-NEXT: vctz.v v8, v8 70; CHECK-ZVBB-NEXT: ret 71 %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl) 72 ret <vscale x 1 x i8> %v 73} 74 75declare <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8>, i1 immarg, <vscale x 2 x i1>, i32) 76 77define <vscale x 2 x i8> @vp_cttz_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 78; CHECK-LABEL: vp_cttz_nxv2i8: 79; CHECK: # %bb.0: 80; CHECK-NEXT: li a1, 1 81; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 82; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 83; CHECK-NEXT: li a0, 85 84; CHECK-NEXT: vnot.v v8, v8, v0.t 85; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 86; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 87; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 88; CHECK-NEXT: li a0, 51 89; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 90; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 91; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 92; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 93; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 94; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 95; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 96; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 97; CHECK-NEXT: ret 98; 99; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8: 100; CHECK-ZVBB: # %bb.0: 101; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 102; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 103; CHECK-ZVBB-NEXT: ret 104 %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl) 105 ret <vscale x 2 x i8> %v 106} 107 108define <vscale x 2 x i8> @vp_cttz_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { 109; CHECK-LABEL: vp_cttz_nxv2i8_unmasked: 110; CHECK: # %bb.0: 111; CHECK-NEXT: li a1, 1 112; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 113; CHECK-NEXT: vnot.v v9, v8 114; CHECK-NEXT: vsub.vx v8, v8, a1 115; CHECK-NEXT: li a0, 85 116; CHECK-NEXT: vand.vv v8, v9, v8 117; CHECK-NEXT: vsrl.vi v9, v8, 1 118; CHECK-NEXT: vand.vx v9, v9, a0 119; CHECK-NEXT: li a0, 51 120; CHECK-NEXT: vsub.vv v8, v8, v9 121; CHECK-NEXT: vand.vx v9, v8, a0 122; CHECK-NEXT: vsrl.vi v8, v8, 2 123; CHECK-NEXT: vand.vx v8, v8, a0 124; CHECK-NEXT: vadd.vv v8, v9, v8 125; CHECK-NEXT: vsrl.vi v9, v8, 4 126; CHECK-NEXT: vadd.vv v8, v8, v9 127; CHECK-NEXT: vand.vi v8, v8, 15 128; CHECK-NEXT: ret 129; 130; CHECK-ZVBB-LABEL: vp_cttz_nxv2i8_unmasked: 131; CHECK-ZVBB: # %bb.0: 132; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 133; CHECK-ZVBB-NEXT: vctz.v v8, v8 134; CHECK-ZVBB-NEXT: ret 135 %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl) 136 ret <vscale x 2 x i8> %v 137} 138 139declare <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8>, i1 immarg, <vscale x 4 x i1>, i32) 140 141define <vscale x 4 x i8> @vp_cttz_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 142; CHECK-LABEL: vp_cttz_nxv4i8: 143; CHECK: # %bb.0: 144; CHECK-NEXT: li a1, 1 145; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 146; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 147; CHECK-NEXT: li a0, 85 148; CHECK-NEXT: vnot.v v8, v8, v0.t 149; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 150; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 151; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 152; CHECK-NEXT: li a0, 51 153; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 154; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 155; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 156; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 157; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 158; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 159; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 160; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 161; CHECK-NEXT: ret 162; 163; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8: 164; CHECK-ZVBB: # %bb.0: 165; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 166; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 167; CHECK-ZVBB-NEXT: ret 168 %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl) 169 ret <vscale x 4 x i8> %v 170} 171 172define <vscale x 4 x i8> @vp_cttz_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { 173; CHECK-LABEL: vp_cttz_nxv4i8_unmasked: 174; CHECK: # %bb.0: 175; CHECK-NEXT: li a1, 1 176; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 177; CHECK-NEXT: vnot.v v9, v8 178; CHECK-NEXT: vsub.vx v8, v8, a1 179; CHECK-NEXT: li a0, 85 180; CHECK-NEXT: vand.vv v8, v9, v8 181; CHECK-NEXT: vsrl.vi v9, v8, 1 182; CHECK-NEXT: vand.vx v9, v9, a0 183; CHECK-NEXT: li a0, 51 184; CHECK-NEXT: vsub.vv v8, v8, v9 185; CHECK-NEXT: vand.vx v9, v8, a0 186; CHECK-NEXT: vsrl.vi v8, v8, 2 187; CHECK-NEXT: vand.vx v8, v8, a0 188; CHECK-NEXT: vadd.vv v8, v9, v8 189; CHECK-NEXT: vsrl.vi v9, v8, 4 190; CHECK-NEXT: vadd.vv v8, v8, v9 191; CHECK-NEXT: vand.vi v8, v8, 15 192; CHECK-NEXT: ret 193; 194; CHECK-ZVBB-LABEL: vp_cttz_nxv4i8_unmasked: 195; CHECK-ZVBB: # %bb.0: 196; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 197; CHECK-ZVBB-NEXT: vctz.v v8, v8 198; CHECK-ZVBB-NEXT: ret 199 %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl) 200 ret <vscale x 4 x i8> %v 201} 202 203declare <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8>, i1 immarg, <vscale x 8 x i1>, i32) 204 205define <vscale x 8 x i8> @vp_cttz_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 206; CHECK-LABEL: vp_cttz_nxv8i8: 207; CHECK: # %bb.0: 208; CHECK-NEXT: li a1, 1 209; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 210; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 211; CHECK-NEXT: li a0, 85 212; CHECK-NEXT: vnot.v v8, v8, v0.t 213; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 214; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 215; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 216; CHECK-NEXT: li a0, 51 217; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 218; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 219; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 220; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 221; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 222; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 223; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 224; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 225; CHECK-NEXT: ret 226; 227; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8: 228; CHECK-ZVBB: # %bb.0: 229; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 230; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 231; CHECK-ZVBB-NEXT: ret 232 %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl) 233 ret <vscale x 8 x i8> %v 234} 235 236define <vscale x 8 x i8> @vp_cttz_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { 237; CHECK-LABEL: vp_cttz_nxv8i8_unmasked: 238; CHECK: # %bb.0: 239; CHECK-NEXT: li a1, 1 240; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 241; CHECK-NEXT: vnot.v v9, v8 242; CHECK-NEXT: vsub.vx v8, v8, a1 243; CHECK-NEXT: li a0, 85 244; CHECK-NEXT: vand.vv v8, v9, v8 245; CHECK-NEXT: vsrl.vi v9, v8, 1 246; CHECK-NEXT: vand.vx v9, v9, a0 247; CHECK-NEXT: li a0, 51 248; CHECK-NEXT: vsub.vv v8, v8, v9 249; CHECK-NEXT: vand.vx v9, v8, a0 250; CHECK-NEXT: vsrl.vi v8, v8, 2 251; CHECK-NEXT: vand.vx v8, v8, a0 252; CHECK-NEXT: vadd.vv v8, v9, v8 253; CHECK-NEXT: vsrl.vi v9, v8, 4 254; CHECK-NEXT: vadd.vv v8, v8, v9 255; CHECK-NEXT: vand.vi v8, v8, 15 256; CHECK-NEXT: ret 257; 258; CHECK-ZVBB-LABEL: vp_cttz_nxv8i8_unmasked: 259; CHECK-ZVBB: # %bb.0: 260; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 261; CHECK-ZVBB-NEXT: vctz.v v8, v8 262; CHECK-ZVBB-NEXT: ret 263 %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl) 264 ret <vscale x 8 x i8> %v 265} 266 267declare <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8>, i1 immarg, <vscale x 16 x i1>, i32) 268 269define <vscale x 16 x i8> @vp_cttz_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 270; CHECK-LABEL: vp_cttz_nxv16i8: 271; CHECK: # %bb.0: 272; CHECK-NEXT: li a1, 1 273; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 274; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 275; CHECK-NEXT: li a0, 85 276; CHECK-NEXT: vnot.v v8, v8, v0.t 277; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 278; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 279; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 280; CHECK-NEXT: li a0, 51 281; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 282; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 283; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 284; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 285; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 286; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 287; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 288; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 289; CHECK-NEXT: ret 290; 291; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8: 292; CHECK-ZVBB: # %bb.0: 293; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 294; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 295; CHECK-ZVBB-NEXT: ret 296 %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl) 297 ret <vscale x 16 x i8> %v 298} 299 300define <vscale x 16 x i8> @vp_cttz_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { 301; CHECK-LABEL: vp_cttz_nxv16i8_unmasked: 302; CHECK: # %bb.0: 303; CHECK-NEXT: li a1, 1 304; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 305; CHECK-NEXT: vnot.v v10, v8 306; CHECK-NEXT: vsub.vx v8, v8, a1 307; CHECK-NEXT: li a0, 85 308; CHECK-NEXT: vand.vv v8, v10, v8 309; CHECK-NEXT: vsrl.vi v10, v8, 1 310; CHECK-NEXT: vand.vx v10, v10, a0 311; CHECK-NEXT: li a0, 51 312; CHECK-NEXT: vsub.vv v8, v8, v10 313; CHECK-NEXT: vand.vx v10, v8, a0 314; CHECK-NEXT: vsrl.vi v8, v8, 2 315; CHECK-NEXT: vand.vx v8, v8, a0 316; CHECK-NEXT: vadd.vv v8, v10, v8 317; CHECK-NEXT: vsrl.vi v10, v8, 4 318; CHECK-NEXT: vadd.vv v8, v8, v10 319; CHECK-NEXT: vand.vi v8, v8, 15 320; CHECK-NEXT: ret 321; 322; CHECK-ZVBB-LABEL: vp_cttz_nxv16i8_unmasked: 323; CHECK-ZVBB: # %bb.0: 324; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 325; CHECK-ZVBB-NEXT: vctz.v v8, v8 326; CHECK-ZVBB-NEXT: ret 327 %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl) 328 ret <vscale x 16 x i8> %v 329} 330 331declare <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8>, i1 immarg, <vscale x 32 x i1>, i32) 332 333define <vscale x 32 x i8> @vp_cttz_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 334; CHECK-LABEL: vp_cttz_nxv32i8: 335; CHECK: # %bb.0: 336; CHECK-NEXT: li a1, 1 337; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 338; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t 339; CHECK-NEXT: li a0, 85 340; CHECK-NEXT: vnot.v v8, v8, v0.t 341; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 342; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 343; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 344; CHECK-NEXT: li a0, 51 345; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 346; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 347; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 348; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 349; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 350; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 351; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 352; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 353; CHECK-NEXT: ret 354; 355; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8: 356; CHECK-ZVBB: # %bb.0: 357; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 358; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 359; CHECK-ZVBB-NEXT: ret 360 %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl) 361 ret <vscale x 32 x i8> %v 362} 363 364define <vscale x 32 x i8> @vp_cttz_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { 365; CHECK-LABEL: vp_cttz_nxv32i8_unmasked: 366; CHECK: # %bb.0: 367; CHECK-NEXT: li a1, 1 368; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 369; CHECK-NEXT: vnot.v v12, v8 370; CHECK-NEXT: vsub.vx v8, v8, a1 371; CHECK-NEXT: li a0, 85 372; CHECK-NEXT: vand.vv v8, v12, v8 373; CHECK-NEXT: vsrl.vi v12, v8, 1 374; CHECK-NEXT: vand.vx v12, v12, a0 375; CHECK-NEXT: li a0, 51 376; CHECK-NEXT: vsub.vv v8, v8, v12 377; CHECK-NEXT: vand.vx v12, v8, a0 378; CHECK-NEXT: vsrl.vi v8, v8, 2 379; CHECK-NEXT: vand.vx v8, v8, a0 380; CHECK-NEXT: vadd.vv v8, v12, v8 381; CHECK-NEXT: vsrl.vi v12, v8, 4 382; CHECK-NEXT: vadd.vv v8, v8, v12 383; CHECK-NEXT: vand.vi v8, v8, 15 384; CHECK-NEXT: ret 385; 386; CHECK-ZVBB-LABEL: vp_cttz_nxv32i8_unmasked: 387; CHECK-ZVBB: # %bb.0: 388; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 389; CHECK-ZVBB-NEXT: vctz.v v8, v8 390; CHECK-ZVBB-NEXT: ret 391 %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 false, <vscale x 32 x i1> splat (i1 true), i32 %evl) 392 ret <vscale x 32 x i8> %v 393} 394 395declare <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8>, i1 immarg, <vscale x 64 x i1>, i32) 396 397define <vscale x 64 x i8> @vp_cttz_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { 398; CHECK-LABEL: vp_cttz_nxv64i8: 399; CHECK: # %bb.0: 400; CHECK-NEXT: li a1, 1 401; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 402; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t 403; CHECK-NEXT: li a0, 85 404; CHECK-NEXT: vnot.v v8, v8, v0.t 405; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 406; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 407; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 408; CHECK-NEXT: li a0, 51 409; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t 410; CHECK-NEXT: vand.vx v16, v8, a0, v0.t 411; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 412; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 413; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t 414; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 415; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 416; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 417; CHECK-NEXT: ret 418; 419; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8: 420; CHECK-ZVBB: # %bb.0: 421; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 422; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 423; CHECK-ZVBB-NEXT: ret 424 %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> %m, i32 %evl) 425 ret <vscale x 64 x i8> %v 426} 427 428define <vscale x 64 x i8> @vp_cttz_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { 429; CHECK-LABEL: vp_cttz_nxv64i8_unmasked: 430; CHECK: # %bb.0: 431; CHECK-NEXT: li a1, 1 432; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 433; CHECK-NEXT: vnot.v v16, v8 434; CHECK-NEXT: vsub.vx v8, v8, a1 435; CHECK-NEXT: li a0, 85 436; CHECK-NEXT: vand.vv v8, v16, v8 437; CHECK-NEXT: vsrl.vi v16, v8, 1 438; CHECK-NEXT: vand.vx v16, v16, a0 439; CHECK-NEXT: li a0, 51 440; CHECK-NEXT: vsub.vv v8, v8, v16 441; CHECK-NEXT: vand.vx v16, v8, a0 442; CHECK-NEXT: vsrl.vi v8, v8, 2 443; CHECK-NEXT: vand.vx v8, v8, a0 444; CHECK-NEXT: vadd.vv v8, v16, v8 445; CHECK-NEXT: vsrl.vi v16, v8, 4 446; CHECK-NEXT: vadd.vv v8, v8, v16 447; CHECK-NEXT: vand.vi v8, v8, 15 448; CHECK-NEXT: ret 449; 450; CHECK-ZVBB-LABEL: vp_cttz_nxv64i8_unmasked: 451; CHECK-ZVBB: # %bb.0: 452; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 453; CHECK-ZVBB-NEXT: vctz.v v8, v8 454; CHECK-ZVBB-NEXT: ret 455 %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 false, <vscale x 64 x i1> splat (i1 true), i32 %evl) 456 ret <vscale x 64 x i8> %v 457} 458 459declare <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16>, i1 immarg, <vscale x 1 x i1>, i32) 460 461define <vscale x 1 x i16> @vp_cttz_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 462; CHECK-LABEL: vp_cttz_nxv1i16: 463; CHECK: # %bb.0: 464; CHECK-NEXT: li a1, 1 465; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 466; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 467; CHECK-NEXT: lui a0, 5 468; CHECK-NEXT: vnot.v v8, v8, v0.t 469; CHECK-NEXT: addi a0, a0, 1365 470; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 471; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 472; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 473; CHECK-NEXT: lui a0, 3 474; CHECK-NEXT: addi a0, a0, 819 475; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 476; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 477; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 478; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 479; CHECK-NEXT: lui a0, 1 480; CHECK-NEXT: addi a0, a0, -241 481; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 482; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 483; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 484; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 485; CHECK-NEXT: li a0, 257 486; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 487; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 488; CHECK-NEXT: ret 489; 490; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16: 491; CHECK-ZVBB: # %bb.0: 492; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 493; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 494; CHECK-ZVBB-NEXT: ret 495 %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl) 496 ret <vscale x 1 x i16> %v 497} 498 499define <vscale x 1 x i16> @vp_cttz_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { 500; CHECK-LABEL: vp_cttz_nxv1i16_unmasked: 501; CHECK: # %bb.0: 502; CHECK-NEXT: li a1, 1 503; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 504; CHECK-NEXT: vnot.v v9, v8 505; CHECK-NEXT: vsub.vx v8, v8, a1 506; CHECK-NEXT: lui a0, 5 507; CHECK-NEXT: addi a0, a0, 1365 508; CHECK-NEXT: vand.vv v8, v9, v8 509; CHECK-NEXT: vsrl.vi v9, v8, 1 510; CHECK-NEXT: vand.vx v9, v9, a0 511; CHECK-NEXT: lui a0, 3 512; CHECK-NEXT: addi a0, a0, 819 513; CHECK-NEXT: vsub.vv v8, v8, v9 514; CHECK-NEXT: vand.vx v9, v8, a0 515; CHECK-NEXT: vsrl.vi v8, v8, 2 516; CHECK-NEXT: vand.vx v8, v8, a0 517; CHECK-NEXT: lui a0, 1 518; CHECK-NEXT: addi a0, a0, -241 519; CHECK-NEXT: vadd.vv v8, v9, v8 520; CHECK-NEXT: vsrl.vi v9, v8, 4 521; CHECK-NEXT: vadd.vv v8, v8, v9 522; CHECK-NEXT: vand.vx v8, v8, a0 523; CHECK-NEXT: li a0, 257 524; CHECK-NEXT: vmul.vx v8, v8, a0 525; CHECK-NEXT: vsrl.vi v8, v8, 8 526; CHECK-NEXT: ret 527; 528; CHECK-ZVBB-LABEL: vp_cttz_nxv1i16_unmasked: 529; CHECK-ZVBB: # %bb.0: 530; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 531; CHECK-ZVBB-NEXT: vctz.v v8, v8 532; CHECK-ZVBB-NEXT: ret 533 %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl) 534 ret <vscale x 1 x i16> %v 535} 536 537declare <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16>, i1 immarg, <vscale x 2 x i1>, i32) 538 539define <vscale x 2 x i16> @vp_cttz_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 540; CHECK-LABEL: vp_cttz_nxv2i16: 541; CHECK: # %bb.0: 542; CHECK-NEXT: li a1, 1 543; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 544; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 545; CHECK-NEXT: lui a0, 5 546; CHECK-NEXT: vnot.v v8, v8, v0.t 547; CHECK-NEXT: addi a0, a0, 1365 548; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 549; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 550; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 551; CHECK-NEXT: lui a0, 3 552; CHECK-NEXT: addi a0, a0, 819 553; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 554; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 555; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 556; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 557; CHECK-NEXT: lui a0, 1 558; CHECK-NEXT: addi a0, a0, -241 559; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 560; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 561; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 562; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 563; CHECK-NEXT: li a0, 257 564; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 565; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 566; CHECK-NEXT: ret 567; 568; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16: 569; CHECK-ZVBB: # %bb.0: 570; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 571; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 572; CHECK-ZVBB-NEXT: ret 573 %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl) 574 ret <vscale x 2 x i16> %v 575} 576 577define <vscale x 2 x i16> @vp_cttz_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { 578; CHECK-LABEL: vp_cttz_nxv2i16_unmasked: 579; CHECK: # %bb.0: 580; CHECK-NEXT: li a1, 1 581; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 582; CHECK-NEXT: vnot.v v9, v8 583; CHECK-NEXT: vsub.vx v8, v8, a1 584; CHECK-NEXT: lui a0, 5 585; CHECK-NEXT: addi a0, a0, 1365 586; CHECK-NEXT: vand.vv v8, v9, v8 587; CHECK-NEXT: vsrl.vi v9, v8, 1 588; CHECK-NEXT: vand.vx v9, v9, a0 589; CHECK-NEXT: lui a0, 3 590; CHECK-NEXT: addi a0, a0, 819 591; CHECK-NEXT: vsub.vv v8, v8, v9 592; CHECK-NEXT: vand.vx v9, v8, a0 593; CHECK-NEXT: vsrl.vi v8, v8, 2 594; CHECK-NEXT: vand.vx v8, v8, a0 595; CHECK-NEXT: lui a0, 1 596; CHECK-NEXT: addi a0, a0, -241 597; CHECK-NEXT: vadd.vv v8, v9, v8 598; CHECK-NEXT: vsrl.vi v9, v8, 4 599; CHECK-NEXT: vadd.vv v8, v8, v9 600; CHECK-NEXT: vand.vx v8, v8, a0 601; CHECK-NEXT: li a0, 257 602; CHECK-NEXT: vmul.vx v8, v8, a0 603; CHECK-NEXT: vsrl.vi v8, v8, 8 604; CHECK-NEXT: ret 605; 606; CHECK-ZVBB-LABEL: vp_cttz_nxv2i16_unmasked: 607; CHECK-ZVBB: # %bb.0: 608; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 609; CHECK-ZVBB-NEXT: vctz.v v8, v8 610; CHECK-ZVBB-NEXT: ret 611 %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl) 612 ret <vscale x 2 x i16> %v 613} 614 615declare <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16>, i1 immarg, <vscale x 4 x i1>, i32) 616 617define <vscale x 4 x i16> @vp_cttz_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 618; CHECK-LABEL: vp_cttz_nxv4i16: 619; CHECK: # %bb.0: 620; CHECK-NEXT: li a1, 1 621; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 622; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 623; CHECK-NEXT: lui a0, 5 624; CHECK-NEXT: vnot.v v8, v8, v0.t 625; CHECK-NEXT: addi a0, a0, 1365 626; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 627; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 628; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 629; CHECK-NEXT: lui a0, 3 630; CHECK-NEXT: addi a0, a0, 819 631; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 632; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 633; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 634; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 635; CHECK-NEXT: lui a0, 1 636; CHECK-NEXT: addi a0, a0, -241 637; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 638; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 639; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 640; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 641; CHECK-NEXT: li a0, 257 642; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 643; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 644; CHECK-NEXT: ret 645; 646; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16: 647; CHECK-ZVBB: # %bb.0: 648; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 649; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 650; CHECK-ZVBB-NEXT: ret 651 %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl) 652 ret <vscale x 4 x i16> %v 653} 654 655define <vscale x 4 x i16> @vp_cttz_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { 656; CHECK-LABEL: vp_cttz_nxv4i16_unmasked: 657; CHECK: # %bb.0: 658; CHECK-NEXT: li a1, 1 659; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 660; CHECK-NEXT: vnot.v v9, v8 661; CHECK-NEXT: vsub.vx v8, v8, a1 662; CHECK-NEXT: lui a0, 5 663; CHECK-NEXT: addi a0, a0, 1365 664; CHECK-NEXT: vand.vv v8, v9, v8 665; CHECK-NEXT: vsrl.vi v9, v8, 1 666; CHECK-NEXT: vand.vx v9, v9, a0 667; CHECK-NEXT: lui a0, 3 668; CHECK-NEXT: addi a0, a0, 819 669; CHECK-NEXT: vsub.vv v8, v8, v9 670; CHECK-NEXT: vand.vx v9, v8, a0 671; CHECK-NEXT: vsrl.vi v8, v8, 2 672; CHECK-NEXT: vand.vx v8, v8, a0 673; CHECK-NEXT: lui a0, 1 674; CHECK-NEXT: addi a0, a0, -241 675; CHECK-NEXT: vadd.vv v8, v9, v8 676; CHECK-NEXT: vsrl.vi v9, v8, 4 677; CHECK-NEXT: vadd.vv v8, v8, v9 678; CHECK-NEXT: vand.vx v8, v8, a0 679; CHECK-NEXT: li a0, 257 680; CHECK-NEXT: vmul.vx v8, v8, a0 681; CHECK-NEXT: vsrl.vi v8, v8, 8 682; CHECK-NEXT: ret 683; 684; CHECK-ZVBB-LABEL: vp_cttz_nxv4i16_unmasked: 685; CHECK-ZVBB: # %bb.0: 686; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 687; CHECK-ZVBB-NEXT: vctz.v v8, v8 688; CHECK-ZVBB-NEXT: ret 689 %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl) 690 ret <vscale x 4 x i16> %v 691} 692 693declare <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16>, i1 immarg, <vscale x 8 x i1>, i32) 694 695define <vscale x 8 x i16> @vp_cttz_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 696; CHECK-LABEL: vp_cttz_nxv8i16: 697; CHECK: # %bb.0: 698; CHECK-NEXT: li a1, 1 699; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 700; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 701; CHECK-NEXT: lui a0, 5 702; CHECK-NEXT: vnot.v v8, v8, v0.t 703; CHECK-NEXT: addi a0, a0, 1365 704; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 705; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 706; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 707; CHECK-NEXT: lui a0, 3 708; CHECK-NEXT: addi a0, a0, 819 709; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 710; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 711; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 712; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 713; CHECK-NEXT: lui a0, 1 714; CHECK-NEXT: addi a0, a0, -241 715; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 716; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 717; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 718; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 719; CHECK-NEXT: li a0, 257 720; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 721; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 722; CHECK-NEXT: ret 723; 724; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16: 725; CHECK-ZVBB: # %bb.0: 726; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 727; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 728; CHECK-ZVBB-NEXT: ret 729 %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl) 730 ret <vscale x 8 x i16> %v 731} 732 733define <vscale x 8 x i16> @vp_cttz_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { 734; CHECK-LABEL: vp_cttz_nxv8i16_unmasked: 735; CHECK: # %bb.0: 736; CHECK-NEXT: li a1, 1 737; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 738; CHECK-NEXT: vnot.v v10, v8 739; CHECK-NEXT: vsub.vx v8, v8, a1 740; CHECK-NEXT: lui a0, 5 741; CHECK-NEXT: addi a0, a0, 1365 742; CHECK-NEXT: vand.vv v8, v10, v8 743; CHECK-NEXT: vsrl.vi v10, v8, 1 744; CHECK-NEXT: vand.vx v10, v10, a0 745; CHECK-NEXT: lui a0, 3 746; CHECK-NEXT: addi a0, a0, 819 747; CHECK-NEXT: vsub.vv v8, v8, v10 748; CHECK-NEXT: vand.vx v10, v8, a0 749; CHECK-NEXT: vsrl.vi v8, v8, 2 750; CHECK-NEXT: vand.vx v8, v8, a0 751; CHECK-NEXT: lui a0, 1 752; CHECK-NEXT: addi a0, a0, -241 753; CHECK-NEXT: vadd.vv v8, v10, v8 754; CHECK-NEXT: vsrl.vi v10, v8, 4 755; CHECK-NEXT: vadd.vv v8, v8, v10 756; CHECK-NEXT: vand.vx v8, v8, a0 757; CHECK-NEXT: li a0, 257 758; CHECK-NEXT: vmul.vx v8, v8, a0 759; CHECK-NEXT: vsrl.vi v8, v8, 8 760; CHECK-NEXT: ret 761; 762; CHECK-ZVBB-LABEL: vp_cttz_nxv8i16_unmasked: 763; CHECK-ZVBB: # %bb.0: 764; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 765; CHECK-ZVBB-NEXT: vctz.v v8, v8 766; CHECK-ZVBB-NEXT: ret 767 %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl) 768 ret <vscale x 8 x i16> %v 769} 770 771declare <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16>, i1 immarg, <vscale x 16 x i1>, i32) 772 773define <vscale x 16 x i16> @vp_cttz_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 774; CHECK-LABEL: vp_cttz_nxv16i16: 775; CHECK: # %bb.0: 776; CHECK-NEXT: li a1, 1 777; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 778; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t 779; CHECK-NEXT: lui a0, 5 780; CHECK-NEXT: vnot.v v8, v8, v0.t 781; CHECK-NEXT: addi a0, a0, 1365 782; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 783; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 784; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 785; CHECK-NEXT: lui a0, 3 786; CHECK-NEXT: addi a0, a0, 819 787; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 788; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 789; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 790; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 791; CHECK-NEXT: lui a0, 1 792; CHECK-NEXT: addi a0, a0, -241 793; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 794; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 795; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 796; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 797; CHECK-NEXT: li a0, 257 798; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 799; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 800; CHECK-NEXT: ret 801; 802; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16: 803; CHECK-ZVBB: # %bb.0: 804; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 805; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 806; CHECK-ZVBB-NEXT: ret 807 %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl) 808 ret <vscale x 16 x i16> %v 809} 810 811define <vscale x 16 x i16> @vp_cttz_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { 812; CHECK-LABEL: vp_cttz_nxv16i16_unmasked: 813; CHECK: # %bb.0: 814; CHECK-NEXT: li a1, 1 815; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 816; CHECK-NEXT: vnot.v v12, v8 817; CHECK-NEXT: vsub.vx v8, v8, a1 818; CHECK-NEXT: lui a0, 5 819; CHECK-NEXT: addi a0, a0, 1365 820; CHECK-NEXT: vand.vv v8, v12, v8 821; CHECK-NEXT: vsrl.vi v12, v8, 1 822; CHECK-NEXT: vand.vx v12, v12, a0 823; CHECK-NEXT: lui a0, 3 824; CHECK-NEXT: addi a0, a0, 819 825; CHECK-NEXT: vsub.vv v8, v8, v12 826; CHECK-NEXT: vand.vx v12, v8, a0 827; CHECK-NEXT: vsrl.vi v8, v8, 2 828; CHECK-NEXT: vand.vx v8, v8, a0 829; CHECK-NEXT: lui a0, 1 830; CHECK-NEXT: addi a0, a0, -241 831; CHECK-NEXT: vadd.vv v8, v12, v8 832; CHECK-NEXT: vsrl.vi v12, v8, 4 833; CHECK-NEXT: vadd.vv v8, v8, v12 834; CHECK-NEXT: vand.vx v8, v8, a0 835; CHECK-NEXT: li a0, 257 836; CHECK-NEXT: vmul.vx v8, v8, a0 837; CHECK-NEXT: vsrl.vi v8, v8, 8 838; CHECK-NEXT: ret 839; 840; CHECK-ZVBB-LABEL: vp_cttz_nxv16i16_unmasked: 841; CHECK-ZVBB: # %bb.0: 842; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 843; CHECK-ZVBB-NEXT: vctz.v v8, v8 844; CHECK-ZVBB-NEXT: ret 845 %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl) 846 ret <vscale x 16 x i16> %v 847} 848 849declare <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16>, i1 immarg, <vscale x 32 x i1>, i32) 850 851define <vscale x 32 x i16> @vp_cttz_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 852; CHECK-LABEL: vp_cttz_nxv32i16: 853; CHECK: # %bb.0: 854; CHECK-NEXT: li a1, 1 855; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 856; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t 857; CHECK-NEXT: lui a0, 5 858; CHECK-NEXT: vnot.v v8, v8, v0.t 859; CHECK-NEXT: addi a0, a0, 1365 860; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 861; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 862; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 863; CHECK-NEXT: lui a0, 3 864; CHECK-NEXT: addi a0, a0, 819 865; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t 866; CHECK-NEXT: vand.vx v8, v16, a0, v0.t 867; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t 868; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 869; CHECK-NEXT: lui a0, 1 870; CHECK-NEXT: addi a0, a0, -241 871; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 872; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 873; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 874; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 875; CHECK-NEXT: li a0, 257 876; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 877; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 878; CHECK-NEXT: ret 879; 880; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16: 881; CHECK-ZVBB: # %bb.0: 882; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 883; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 884; CHECK-ZVBB-NEXT: ret 885 %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> %m, i32 %evl) 886 ret <vscale x 32 x i16> %v 887} 888 889define <vscale x 32 x i16> @vp_cttz_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { 890; CHECK-LABEL: vp_cttz_nxv32i16_unmasked: 891; CHECK: # %bb.0: 892; CHECK-NEXT: li a1, 1 893; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 894; CHECK-NEXT: vnot.v v16, v8 895; CHECK-NEXT: vsub.vx v8, v8, a1 896; CHECK-NEXT: lui a0, 5 897; CHECK-NEXT: addi a0, a0, 1365 898; CHECK-NEXT: vand.vv v8, v16, v8 899; CHECK-NEXT: vsrl.vi v16, v8, 1 900; CHECK-NEXT: vand.vx v16, v16, a0 901; CHECK-NEXT: lui a0, 3 902; CHECK-NEXT: addi a0, a0, 819 903; CHECK-NEXT: vsub.vv v8, v8, v16 904; CHECK-NEXT: vand.vx v16, v8, a0 905; CHECK-NEXT: vsrl.vi v8, v8, 2 906; CHECK-NEXT: vand.vx v8, v8, a0 907; CHECK-NEXT: lui a0, 1 908; CHECK-NEXT: addi a0, a0, -241 909; CHECK-NEXT: vadd.vv v8, v16, v8 910; CHECK-NEXT: vsrl.vi v16, v8, 4 911; CHECK-NEXT: vadd.vv v8, v8, v16 912; CHECK-NEXT: vand.vx v8, v8, a0 913; CHECK-NEXT: li a0, 257 914; CHECK-NEXT: vmul.vx v8, v8, a0 915; CHECK-NEXT: vsrl.vi v8, v8, 8 916; CHECK-NEXT: ret 917; 918; CHECK-ZVBB-LABEL: vp_cttz_nxv32i16_unmasked: 919; CHECK-ZVBB: # %bb.0: 920; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 921; CHECK-ZVBB-NEXT: vctz.v v8, v8 922; CHECK-ZVBB-NEXT: ret 923 %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 false, <vscale x 32 x i1> splat (i1 true), i32 %evl) 924 ret <vscale x 32 x i16> %v 925} 926 927declare <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32>, i1 immarg, <vscale x 1 x i1>, i32) 928 929define <vscale x 1 x i32> @vp_cttz_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 930; CHECK-LABEL: vp_cttz_nxv1i32: 931; CHECK: # %bb.0: 932; CHECK-NEXT: li a1, 1 933; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 934; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 935; CHECK-NEXT: lui a0, 349525 936; CHECK-NEXT: vnot.v v8, v8, v0.t 937; CHECK-NEXT: addi a0, a0, 1365 938; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 939; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 940; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 941; CHECK-NEXT: lui a0, 209715 942; CHECK-NEXT: addi a0, a0, 819 943; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 944; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 945; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 946; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 947; CHECK-NEXT: lui a0, 61681 948; CHECK-NEXT: addi a0, a0, -241 949; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 950; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 951; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 952; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 953; CHECK-NEXT: lui a0, 4112 954; CHECK-NEXT: addi a0, a0, 257 955; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 956; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 957; CHECK-NEXT: ret 958; 959; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32: 960; CHECK-ZVBB: # %bb.0: 961; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 962; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 963; CHECK-ZVBB-NEXT: ret 964 %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl) 965 ret <vscale x 1 x i32> %v 966} 967 968define <vscale x 1 x i32> @vp_cttz_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { 969; CHECK-LABEL: vp_cttz_nxv1i32_unmasked: 970; CHECK: # %bb.0: 971; CHECK-NEXT: li a1, 1 972; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 973; CHECK-NEXT: vnot.v v9, v8 974; CHECK-NEXT: vsub.vx v8, v8, a1 975; CHECK-NEXT: lui a0, 349525 976; CHECK-NEXT: addi a0, a0, 1365 977; CHECK-NEXT: vand.vv v8, v9, v8 978; CHECK-NEXT: vsrl.vi v9, v8, 1 979; CHECK-NEXT: vand.vx v9, v9, a0 980; CHECK-NEXT: lui a0, 209715 981; CHECK-NEXT: addi a0, a0, 819 982; CHECK-NEXT: vsub.vv v8, v8, v9 983; CHECK-NEXT: vand.vx v9, v8, a0 984; CHECK-NEXT: vsrl.vi v8, v8, 2 985; CHECK-NEXT: vand.vx v8, v8, a0 986; CHECK-NEXT: lui a0, 61681 987; CHECK-NEXT: addi a0, a0, -241 988; CHECK-NEXT: vadd.vv v8, v9, v8 989; CHECK-NEXT: vsrl.vi v9, v8, 4 990; CHECK-NEXT: vadd.vv v8, v8, v9 991; CHECK-NEXT: vand.vx v8, v8, a0 992; CHECK-NEXT: lui a0, 4112 993; CHECK-NEXT: addi a0, a0, 257 994; CHECK-NEXT: vmul.vx v8, v8, a0 995; CHECK-NEXT: vsrl.vi v8, v8, 24 996; CHECK-NEXT: ret 997; 998; CHECK-ZVBB-LABEL: vp_cttz_nxv1i32_unmasked: 999; CHECK-ZVBB: # %bb.0: 1000; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 1001; CHECK-ZVBB-NEXT: vctz.v v8, v8 1002; CHECK-ZVBB-NEXT: ret 1003 %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1004 ret <vscale x 1 x i32> %v 1005} 1006 1007declare <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32>, i1 immarg, <vscale x 2 x i1>, i32) 1008 1009define <vscale x 2 x i32> @vp_cttz_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1010; CHECK-LABEL: vp_cttz_nxv2i32: 1011; CHECK: # %bb.0: 1012; CHECK-NEXT: li a1, 1 1013; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1014; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 1015; CHECK-NEXT: lui a0, 349525 1016; CHECK-NEXT: vnot.v v8, v8, v0.t 1017; CHECK-NEXT: addi a0, a0, 1365 1018; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 1019; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 1020; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 1021; CHECK-NEXT: lui a0, 209715 1022; CHECK-NEXT: addi a0, a0, 819 1023; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 1024; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 1025; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 1026; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1027; CHECK-NEXT: lui a0, 61681 1028; CHECK-NEXT: addi a0, a0, -241 1029; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 1030; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 1031; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 1032; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1033; CHECK-NEXT: lui a0, 4112 1034; CHECK-NEXT: addi a0, a0, 257 1035; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 1036; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 1037; CHECK-NEXT: ret 1038; 1039; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32: 1040; CHECK-ZVBB: # %bb.0: 1041; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1042; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1043; CHECK-ZVBB-NEXT: ret 1044 %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl) 1045 ret <vscale x 2 x i32> %v 1046} 1047 1048define <vscale x 2 x i32> @vp_cttz_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { 1049; CHECK-LABEL: vp_cttz_nxv2i32_unmasked: 1050; CHECK: # %bb.0: 1051; CHECK-NEXT: li a1, 1 1052; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1053; CHECK-NEXT: vnot.v v9, v8 1054; CHECK-NEXT: vsub.vx v8, v8, a1 1055; CHECK-NEXT: lui a0, 349525 1056; CHECK-NEXT: addi a0, a0, 1365 1057; CHECK-NEXT: vand.vv v8, v9, v8 1058; CHECK-NEXT: vsrl.vi v9, v8, 1 1059; CHECK-NEXT: vand.vx v9, v9, a0 1060; CHECK-NEXT: lui a0, 209715 1061; CHECK-NEXT: addi a0, a0, 819 1062; CHECK-NEXT: vsub.vv v8, v8, v9 1063; CHECK-NEXT: vand.vx v9, v8, a0 1064; CHECK-NEXT: vsrl.vi v8, v8, 2 1065; CHECK-NEXT: vand.vx v8, v8, a0 1066; CHECK-NEXT: lui a0, 61681 1067; CHECK-NEXT: addi a0, a0, -241 1068; CHECK-NEXT: vadd.vv v8, v9, v8 1069; CHECK-NEXT: vsrl.vi v9, v8, 4 1070; CHECK-NEXT: vadd.vv v8, v8, v9 1071; CHECK-NEXT: vand.vx v8, v8, a0 1072; CHECK-NEXT: lui a0, 4112 1073; CHECK-NEXT: addi a0, a0, 257 1074; CHECK-NEXT: vmul.vx v8, v8, a0 1075; CHECK-NEXT: vsrl.vi v8, v8, 24 1076; CHECK-NEXT: ret 1077; 1078; CHECK-ZVBB-LABEL: vp_cttz_nxv2i32_unmasked: 1079; CHECK-ZVBB: # %bb.0: 1080; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 1081; CHECK-ZVBB-NEXT: vctz.v v8, v8 1082; CHECK-ZVBB-NEXT: ret 1083 %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1084 ret <vscale x 2 x i32> %v 1085} 1086 1087declare <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32>, i1 immarg, <vscale x 4 x i1>, i32) 1088 1089define <vscale x 4 x i32> @vp_cttz_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1090; CHECK-LABEL: vp_cttz_nxv4i32: 1091; CHECK: # %bb.0: 1092; CHECK-NEXT: li a1, 1 1093; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1094; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 1095; CHECK-NEXT: lui a0, 349525 1096; CHECK-NEXT: vnot.v v8, v8, v0.t 1097; CHECK-NEXT: addi a0, a0, 1365 1098; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 1099; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 1100; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 1101; CHECK-NEXT: lui a0, 209715 1102; CHECK-NEXT: addi a0, a0, 819 1103; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 1104; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 1105; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 1106; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1107; CHECK-NEXT: lui a0, 61681 1108; CHECK-NEXT: addi a0, a0, -241 1109; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 1110; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 1111; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 1112; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1113; CHECK-NEXT: lui a0, 4112 1114; CHECK-NEXT: addi a0, a0, 257 1115; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 1116; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 1117; CHECK-NEXT: ret 1118; 1119; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32: 1120; CHECK-ZVBB: # %bb.0: 1121; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1122; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1123; CHECK-ZVBB-NEXT: ret 1124 %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl) 1125 ret <vscale x 4 x i32> %v 1126} 1127 1128define <vscale x 4 x i32> @vp_cttz_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { 1129; CHECK-LABEL: vp_cttz_nxv4i32_unmasked: 1130; CHECK: # %bb.0: 1131; CHECK-NEXT: li a1, 1 1132; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1133; CHECK-NEXT: vnot.v v10, v8 1134; CHECK-NEXT: vsub.vx v8, v8, a1 1135; CHECK-NEXT: lui a0, 349525 1136; CHECK-NEXT: addi a0, a0, 1365 1137; CHECK-NEXT: vand.vv v8, v10, v8 1138; CHECK-NEXT: vsrl.vi v10, v8, 1 1139; CHECK-NEXT: vand.vx v10, v10, a0 1140; CHECK-NEXT: lui a0, 209715 1141; CHECK-NEXT: addi a0, a0, 819 1142; CHECK-NEXT: vsub.vv v8, v8, v10 1143; CHECK-NEXT: vand.vx v10, v8, a0 1144; CHECK-NEXT: vsrl.vi v8, v8, 2 1145; CHECK-NEXT: vand.vx v8, v8, a0 1146; CHECK-NEXT: lui a0, 61681 1147; CHECK-NEXT: addi a0, a0, -241 1148; CHECK-NEXT: vadd.vv v8, v10, v8 1149; CHECK-NEXT: vsrl.vi v10, v8, 4 1150; CHECK-NEXT: vadd.vv v8, v8, v10 1151; CHECK-NEXT: vand.vx v8, v8, a0 1152; CHECK-NEXT: lui a0, 4112 1153; CHECK-NEXT: addi a0, a0, 257 1154; CHECK-NEXT: vmul.vx v8, v8, a0 1155; CHECK-NEXT: vsrl.vi v8, v8, 24 1156; CHECK-NEXT: ret 1157; 1158; CHECK-ZVBB-LABEL: vp_cttz_nxv4i32_unmasked: 1159; CHECK-ZVBB: # %bb.0: 1160; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1161; CHECK-ZVBB-NEXT: vctz.v v8, v8 1162; CHECK-ZVBB-NEXT: ret 1163 %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1164 ret <vscale x 4 x i32> %v 1165} 1166 1167declare <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32>, i1 immarg, <vscale x 8 x i1>, i32) 1168 1169define <vscale x 8 x i32> @vp_cttz_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1170; CHECK-LABEL: vp_cttz_nxv8i32: 1171; CHECK: # %bb.0: 1172; CHECK-NEXT: li a1, 1 1173; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1174; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t 1175; CHECK-NEXT: lui a0, 349525 1176; CHECK-NEXT: vnot.v v8, v8, v0.t 1177; CHECK-NEXT: addi a0, a0, 1365 1178; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 1179; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 1180; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 1181; CHECK-NEXT: lui a0, 209715 1182; CHECK-NEXT: addi a0, a0, 819 1183; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 1184; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 1185; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 1186; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1187; CHECK-NEXT: lui a0, 61681 1188; CHECK-NEXT: addi a0, a0, -241 1189; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 1190; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 1191; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 1192; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1193; CHECK-NEXT: lui a0, 4112 1194; CHECK-NEXT: addi a0, a0, 257 1195; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 1196; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 1197; CHECK-NEXT: ret 1198; 1199; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32: 1200; CHECK-ZVBB: # %bb.0: 1201; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1202; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1203; CHECK-ZVBB-NEXT: ret 1204 %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl) 1205 ret <vscale x 8 x i32> %v 1206} 1207 1208define <vscale x 8 x i32> @vp_cttz_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { 1209; CHECK-LABEL: vp_cttz_nxv8i32_unmasked: 1210; CHECK: # %bb.0: 1211; CHECK-NEXT: li a1, 1 1212; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1213; CHECK-NEXT: vnot.v v12, v8 1214; CHECK-NEXT: vsub.vx v8, v8, a1 1215; CHECK-NEXT: lui a0, 349525 1216; CHECK-NEXT: addi a0, a0, 1365 1217; CHECK-NEXT: vand.vv v8, v12, v8 1218; CHECK-NEXT: vsrl.vi v12, v8, 1 1219; CHECK-NEXT: vand.vx v12, v12, a0 1220; CHECK-NEXT: lui a0, 209715 1221; CHECK-NEXT: addi a0, a0, 819 1222; CHECK-NEXT: vsub.vv v8, v8, v12 1223; CHECK-NEXT: vand.vx v12, v8, a0 1224; CHECK-NEXT: vsrl.vi v8, v8, 2 1225; CHECK-NEXT: vand.vx v8, v8, a0 1226; CHECK-NEXT: lui a0, 61681 1227; CHECK-NEXT: addi a0, a0, -241 1228; CHECK-NEXT: vadd.vv v8, v12, v8 1229; CHECK-NEXT: vsrl.vi v12, v8, 4 1230; CHECK-NEXT: vadd.vv v8, v8, v12 1231; CHECK-NEXT: vand.vx v8, v8, a0 1232; CHECK-NEXT: lui a0, 4112 1233; CHECK-NEXT: addi a0, a0, 257 1234; CHECK-NEXT: vmul.vx v8, v8, a0 1235; CHECK-NEXT: vsrl.vi v8, v8, 24 1236; CHECK-NEXT: ret 1237; 1238; CHECK-ZVBB-LABEL: vp_cttz_nxv8i32_unmasked: 1239; CHECK-ZVBB: # %bb.0: 1240; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1241; CHECK-ZVBB-NEXT: vctz.v v8, v8 1242; CHECK-ZVBB-NEXT: ret 1243 %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1244 ret <vscale x 8 x i32> %v 1245} 1246 1247declare <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32>, i1 immarg, <vscale x 16 x i1>, i32) 1248 1249define <vscale x 16 x i32> @vp_cttz_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1250; CHECK-LABEL: vp_cttz_nxv16i32: 1251; CHECK: # %bb.0: 1252; CHECK-NEXT: li a1, 1 1253; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1254; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t 1255; CHECK-NEXT: lui a0, 349525 1256; CHECK-NEXT: vnot.v v8, v8, v0.t 1257; CHECK-NEXT: addi a0, a0, 1365 1258; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 1259; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 1260; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1261; CHECK-NEXT: lui a0, 209715 1262; CHECK-NEXT: addi a0, a0, 819 1263; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t 1264; CHECK-NEXT: vand.vx v8, v16, a0, v0.t 1265; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t 1266; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1267; CHECK-NEXT: lui a0, 61681 1268; CHECK-NEXT: addi a0, a0, -241 1269; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 1270; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 1271; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 1272; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1273; CHECK-NEXT: lui a0, 4112 1274; CHECK-NEXT: addi a0, a0, 257 1275; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 1276; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 1277; CHECK-NEXT: ret 1278; 1279; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32: 1280; CHECK-ZVBB: # %bb.0: 1281; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1282; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1283; CHECK-ZVBB-NEXT: ret 1284 %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl) 1285 ret <vscale x 16 x i32> %v 1286} 1287 1288define <vscale x 16 x i32> @vp_cttz_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { 1289; CHECK-LABEL: vp_cttz_nxv16i32_unmasked: 1290; CHECK: # %bb.0: 1291; CHECK-NEXT: li a1, 1 1292; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1293; CHECK-NEXT: vnot.v v16, v8 1294; CHECK-NEXT: vsub.vx v8, v8, a1 1295; CHECK-NEXT: lui a0, 349525 1296; CHECK-NEXT: addi a0, a0, 1365 1297; CHECK-NEXT: vand.vv v8, v16, v8 1298; CHECK-NEXT: vsrl.vi v16, v8, 1 1299; CHECK-NEXT: vand.vx v16, v16, a0 1300; CHECK-NEXT: lui a0, 209715 1301; CHECK-NEXT: addi a0, a0, 819 1302; CHECK-NEXT: vsub.vv v8, v8, v16 1303; CHECK-NEXT: vand.vx v16, v8, a0 1304; CHECK-NEXT: vsrl.vi v8, v8, 2 1305; CHECK-NEXT: vand.vx v8, v8, a0 1306; CHECK-NEXT: lui a0, 61681 1307; CHECK-NEXT: addi a0, a0, -241 1308; CHECK-NEXT: vadd.vv v8, v16, v8 1309; CHECK-NEXT: vsrl.vi v16, v8, 4 1310; CHECK-NEXT: vadd.vv v8, v8, v16 1311; CHECK-NEXT: vand.vx v8, v8, a0 1312; CHECK-NEXT: lui a0, 4112 1313; CHECK-NEXT: addi a0, a0, 257 1314; CHECK-NEXT: vmul.vx v8, v8, a0 1315; CHECK-NEXT: vsrl.vi v8, v8, 24 1316; CHECK-NEXT: ret 1317; 1318; CHECK-ZVBB-LABEL: vp_cttz_nxv16i32_unmasked: 1319; CHECK-ZVBB: # %bb.0: 1320; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1321; CHECK-ZVBB-NEXT: vctz.v v8, v8 1322; CHECK-ZVBB-NEXT: ret 1323 %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1324 ret <vscale x 16 x i32> %v 1325} 1326 1327declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg, <vscale x 1 x i1>, i32) 1328 1329define <vscale x 1 x i64> @vp_cttz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1330; RV32-LABEL: vp_cttz_nxv1i64: 1331; RV32: # %bb.0: 1332; RV32-NEXT: li a1, 1 1333; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1334; RV32-NEXT: vsub.vx v9, v8, a1, v0.t 1335; RV32-NEXT: lui a1, 349525 1336; RV32-NEXT: addi a1, a1, 1365 1337; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1338; RV32-NEXT: vmv.v.x v10, a1 1339; RV32-NEXT: lui a1, 209715 1340; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1341; RV32-NEXT: vnot.v v8, v8, v0.t 1342; RV32-NEXT: addi a1, a1, 819 1343; RV32-NEXT: vand.vv v8, v8, v9, v0.t 1344; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 1345; RV32-NEXT: vand.vv v9, v9, v10, v0.t 1346; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1347; RV32-NEXT: vmv.v.x v10, a1 1348; RV32-NEXT: lui a1, 61681 1349; RV32-NEXT: addi a1, a1, -241 1350; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1351; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 1352; RV32-NEXT: vand.vv v9, v8, v10, v0.t 1353; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1354; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1355; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1356; RV32-NEXT: vmv.v.x v10, a1 1357; RV32-NEXT: lui a1, 4112 1358; RV32-NEXT: addi a1, a1, 257 1359; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1360; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 1361; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 1362; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 1363; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1364; RV32-NEXT: vmv.v.x v9, a1 1365; RV32-NEXT: li a1, 56 1366; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1367; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1368; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 1369; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1370; RV32-NEXT: ret 1371; 1372; RV64-LABEL: vp_cttz_nxv1i64: 1373; RV64: # %bb.0: 1374; RV64-NEXT: li a1, 1 1375; RV64-NEXT: lui a2, 349525 1376; RV64-NEXT: lui a3, 209715 1377; RV64-NEXT: lui a4, 61681 1378; RV64-NEXT: lui a5, 4112 1379; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1380; RV64-NEXT: vsub.vx v9, v8, a1, v0.t 1381; RV64-NEXT: addiw a0, a2, 1365 1382; RV64-NEXT: addiw a1, a3, 819 1383; RV64-NEXT: addiw a2, a4, -241 1384; RV64-NEXT: addiw a3, a5, 257 1385; RV64-NEXT: slli a4, a0, 32 1386; RV64-NEXT: add a0, a0, a4 1387; RV64-NEXT: slli a4, a1, 32 1388; RV64-NEXT: add a1, a1, a4 1389; RV64-NEXT: slli a4, a2, 32 1390; RV64-NEXT: add a2, a2, a4 1391; RV64-NEXT: slli a4, a3, 32 1392; RV64-NEXT: add a3, a3, a4 1393; RV64-NEXT: vnot.v v8, v8, v0.t 1394; RV64-NEXT: vand.vv v8, v8, v9, v0.t 1395; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 1396; RV64-NEXT: vand.vx v9, v9, a0, v0.t 1397; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 1398; RV64-NEXT: vand.vx v9, v8, a1, v0.t 1399; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1400; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1401; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 1402; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 1403; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 1404; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1405; RV64-NEXT: li a0, 56 1406; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1407; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1408; RV64-NEXT: ret 1409; 1410; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64: 1411; CHECK-ZVBB: # %bb.0: 1412; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1413; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1414; CHECK-ZVBB-NEXT: ret 1415 %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl) 1416 ret <vscale x 1 x i64> %v 1417} 1418 1419define <vscale x 1 x i64> @vp_cttz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { 1420; RV32-LABEL: vp_cttz_nxv1i64_unmasked: 1421; RV32: # %bb.0: 1422; RV32-NEXT: li a1, 1 1423; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1424; RV32-NEXT: vnot.v v9, v8 1425; RV32-NEXT: vsub.vx v8, v8, a1 1426; RV32-NEXT: lui a1, 349525 1427; RV32-NEXT: addi a1, a1, 1365 1428; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1429; RV32-NEXT: vmv.v.x v10, a1 1430; RV32-NEXT: lui a1, 209715 1431; RV32-NEXT: addi a1, a1, 819 1432; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1433; RV32-NEXT: vand.vv v8, v9, v8 1434; RV32-NEXT: vsrl.vi v9, v8, 1 1435; RV32-NEXT: vand.vv v9, v9, v10 1436; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1437; RV32-NEXT: vmv.v.x v10, a1 1438; RV32-NEXT: lui a1, 61681 1439; RV32-NEXT: addi a1, a1, -241 1440; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1441; RV32-NEXT: vsub.vv v8, v8, v9 1442; RV32-NEXT: vand.vv v9, v8, v10 1443; RV32-NEXT: vsrl.vi v8, v8, 2 1444; RV32-NEXT: vand.vv v8, v8, v10 1445; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1446; RV32-NEXT: vmv.v.x v10, a1 1447; RV32-NEXT: lui a1, 4112 1448; RV32-NEXT: addi a1, a1, 257 1449; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1450; RV32-NEXT: vadd.vv v8, v9, v8 1451; RV32-NEXT: vsrl.vi v9, v8, 4 1452; RV32-NEXT: vadd.vv v8, v8, v9 1453; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1454; RV32-NEXT: vmv.v.x v9, a1 1455; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1456; RV32-NEXT: vand.vv v8, v8, v10 1457; RV32-NEXT: vmul.vv v8, v8, v9 1458; RV32-NEXT: li a0, 56 1459; RV32-NEXT: vsrl.vx v8, v8, a0 1460; RV32-NEXT: ret 1461; 1462; RV64-LABEL: vp_cttz_nxv1i64_unmasked: 1463; RV64: # %bb.0: 1464; RV64-NEXT: li a1, 1 1465; RV64-NEXT: lui a2, 349525 1466; RV64-NEXT: lui a3, 209715 1467; RV64-NEXT: lui a4, 61681 1468; RV64-NEXT: lui a5, 4112 1469; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1470; RV64-NEXT: vsub.vx v9, v8, a1 1471; RV64-NEXT: addiw a0, a2, 1365 1472; RV64-NEXT: addiw a1, a3, 819 1473; RV64-NEXT: addiw a2, a4, -241 1474; RV64-NEXT: addiw a3, a5, 257 1475; RV64-NEXT: slli a4, a0, 32 1476; RV64-NEXT: add a0, a0, a4 1477; RV64-NEXT: slli a4, a1, 32 1478; RV64-NEXT: add a1, a1, a4 1479; RV64-NEXT: slli a4, a2, 32 1480; RV64-NEXT: add a2, a2, a4 1481; RV64-NEXT: slli a4, a3, 32 1482; RV64-NEXT: add a3, a3, a4 1483; RV64-NEXT: vnot.v v8, v8 1484; RV64-NEXT: vand.vv v8, v8, v9 1485; RV64-NEXT: vsrl.vi v9, v8, 1 1486; RV64-NEXT: vand.vx v9, v9, a0 1487; RV64-NEXT: vsub.vv v8, v8, v9 1488; RV64-NEXT: vand.vx v9, v8, a1 1489; RV64-NEXT: vsrl.vi v8, v8, 2 1490; RV64-NEXT: vand.vx v8, v8, a1 1491; RV64-NEXT: vadd.vv v8, v9, v8 1492; RV64-NEXT: vsrl.vi v9, v8, 4 1493; RV64-NEXT: vadd.vv v8, v8, v9 1494; RV64-NEXT: vand.vx v8, v8, a2 1495; RV64-NEXT: vmul.vx v8, v8, a3 1496; RV64-NEXT: li a0, 56 1497; RV64-NEXT: vsrl.vx v8, v8, a0 1498; RV64-NEXT: ret 1499; 1500; CHECK-ZVBB-LABEL: vp_cttz_nxv1i64_unmasked: 1501; CHECK-ZVBB: # %bb.0: 1502; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1503; CHECK-ZVBB-NEXT: vctz.v v8, v8 1504; CHECK-ZVBB-NEXT: ret 1505 %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1506 ret <vscale x 1 x i64> %v 1507} 1508 1509declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg, <vscale x 2 x i1>, i32) 1510 1511define <vscale x 2 x i64> @vp_cttz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1512; RV32-LABEL: vp_cttz_nxv2i64: 1513; RV32: # %bb.0: 1514; RV32-NEXT: li a1, 1 1515; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1516; RV32-NEXT: vsub.vx v10, v8, a1, v0.t 1517; RV32-NEXT: lui a1, 349525 1518; RV32-NEXT: addi a1, a1, 1365 1519; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1520; RV32-NEXT: vmv.v.x v12, a1 1521; RV32-NEXT: lui a1, 209715 1522; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1523; RV32-NEXT: vnot.v v8, v8, v0.t 1524; RV32-NEXT: addi a1, a1, 819 1525; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1526; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 1527; RV32-NEXT: vand.vv v10, v10, v12, v0.t 1528; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1529; RV32-NEXT: vmv.v.x v12, a1 1530; RV32-NEXT: lui a1, 61681 1531; RV32-NEXT: addi a1, a1, -241 1532; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1533; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 1534; RV32-NEXT: vand.vv v10, v8, v12, v0.t 1535; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1536; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1537; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1538; RV32-NEXT: vmv.v.x v12, a1 1539; RV32-NEXT: lui a1, 4112 1540; RV32-NEXT: addi a1, a1, 257 1541; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1542; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 1543; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 1544; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 1545; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1546; RV32-NEXT: vmv.v.x v10, a1 1547; RV32-NEXT: li a1, 56 1548; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1549; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1550; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 1551; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1552; RV32-NEXT: ret 1553; 1554; RV64-LABEL: vp_cttz_nxv2i64: 1555; RV64: # %bb.0: 1556; RV64-NEXT: li a1, 1 1557; RV64-NEXT: lui a2, 349525 1558; RV64-NEXT: lui a3, 209715 1559; RV64-NEXT: lui a4, 61681 1560; RV64-NEXT: lui a5, 4112 1561; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1562; RV64-NEXT: vsub.vx v10, v8, a1, v0.t 1563; RV64-NEXT: addiw a0, a2, 1365 1564; RV64-NEXT: addiw a1, a3, 819 1565; RV64-NEXT: addiw a2, a4, -241 1566; RV64-NEXT: addiw a3, a5, 257 1567; RV64-NEXT: slli a4, a0, 32 1568; RV64-NEXT: add a0, a0, a4 1569; RV64-NEXT: slli a4, a1, 32 1570; RV64-NEXT: add a1, a1, a4 1571; RV64-NEXT: slli a4, a2, 32 1572; RV64-NEXT: add a2, a2, a4 1573; RV64-NEXT: slli a4, a3, 32 1574; RV64-NEXT: add a3, a3, a4 1575; RV64-NEXT: vnot.v v8, v8, v0.t 1576; RV64-NEXT: vand.vv v8, v8, v10, v0.t 1577; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 1578; RV64-NEXT: vand.vx v10, v10, a0, v0.t 1579; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 1580; RV64-NEXT: vand.vx v10, v8, a1, v0.t 1581; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1582; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1583; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 1584; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 1585; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 1586; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1587; RV64-NEXT: li a0, 56 1588; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1589; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1590; RV64-NEXT: ret 1591; 1592; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64: 1593; CHECK-ZVBB: # %bb.0: 1594; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1595; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1596; CHECK-ZVBB-NEXT: ret 1597 %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> %m, i32 %evl) 1598 ret <vscale x 2 x i64> %v 1599} 1600 1601define <vscale x 2 x i64> @vp_cttz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { 1602; RV32-LABEL: vp_cttz_nxv2i64_unmasked: 1603; RV32: # %bb.0: 1604; RV32-NEXT: li a1, 1 1605; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1606; RV32-NEXT: vnot.v v10, v8 1607; RV32-NEXT: vsub.vx v8, v8, a1 1608; RV32-NEXT: lui a1, 349525 1609; RV32-NEXT: addi a1, a1, 1365 1610; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1611; RV32-NEXT: vmv.v.x v12, a1 1612; RV32-NEXT: lui a1, 209715 1613; RV32-NEXT: addi a1, a1, 819 1614; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1615; RV32-NEXT: vand.vv v8, v10, v8 1616; RV32-NEXT: vsrl.vi v10, v8, 1 1617; RV32-NEXT: vand.vv v10, v10, v12 1618; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1619; RV32-NEXT: vmv.v.x v12, a1 1620; RV32-NEXT: lui a1, 61681 1621; RV32-NEXT: addi a1, a1, -241 1622; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1623; RV32-NEXT: vsub.vv v8, v8, v10 1624; RV32-NEXT: vand.vv v10, v8, v12 1625; RV32-NEXT: vsrl.vi v8, v8, 2 1626; RV32-NEXT: vand.vv v8, v8, v12 1627; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1628; RV32-NEXT: vmv.v.x v12, a1 1629; RV32-NEXT: lui a1, 4112 1630; RV32-NEXT: addi a1, a1, 257 1631; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1632; RV32-NEXT: vadd.vv v8, v10, v8 1633; RV32-NEXT: vsrl.vi v10, v8, 4 1634; RV32-NEXT: vadd.vv v8, v8, v10 1635; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1636; RV32-NEXT: vmv.v.x v10, a1 1637; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1638; RV32-NEXT: vand.vv v8, v8, v12 1639; RV32-NEXT: vmul.vv v8, v8, v10 1640; RV32-NEXT: li a0, 56 1641; RV32-NEXT: vsrl.vx v8, v8, a0 1642; RV32-NEXT: ret 1643; 1644; RV64-LABEL: vp_cttz_nxv2i64_unmasked: 1645; RV64: # %bb.0: 1646; RV64-NEXT: li a1, 1 1647; RV64-NEXT: lui a2, 349525 1648; RV64-NEXT: lui a3, 209715 1649; RV64-NEXT: lui a4, 61681 1650; RV64-NEXT: lui a5, 4112 1651; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1652; RV64-NEXT: vsub.vx v10, v8, a1 1653; RV64-NEXT: addiw a0, a2, 1365 1654; RV64-NEXT: addiw a1, a3, 819 1655; RV64-NEXT: addiw a2, a4, -241 1656; RV64-NEXT: addiw a3, a5, 257 1657; RV64-NEXT: slli a4, a0, 32 1658; RV64-NEXT: add a0, a0, a4 1659; RV64-NEXT: slli a4, a1, 32 1660; RV64-NEXT: add a1, a1, a4 1661; RV64-NEXT: slli a4, a2, 32 1662; RV64-NEXT: add a2, a2, a4 1663; RV64-NEXT: slli a4, a3, 32 1664; RV64-NEXT: add a3, a3, a4 1665; RV64-NEXT: vnot.v v8, v8 1666; RV64-NEXT: vand.vv v8, v8, v10 1667; RV64-NEXT: vsrl.vi v10, v8, 1 1668; RV64-NEXT: vand.vx v10, v10, a0 1669; RV64-NEXT: vsub.vv v8, v8, v10 1670; RV64-NEXT: vand.vx v10, v8, a1 1671; RV64-NEXT: vsrl.vi v8, v8, 2 1672; RV64-NEXT: vand.vx v8, v8, a1 1673; RV64-NEXT: vadd.vv v8, v10, v8 1674; RV64-NEXT: vsrl.vi v10, v8, 4 1675; RV64-NEXT: vadd.vv v8, v8, v10 1676; RV64-NEXT: vand.vx v8, v8, a2 1677; RV64-NEXT: vmul.vx v8, v8, a3 1678; RV64-NEXT: li a0, 56 1679; RV64-NEXT: vsrl.vx v8, v8, a0 1680; RV64-NEXT: ret 1681; 1682; CHECK-ZVBB-LABEL: vp_cttz_nxv2i64_unmasked: 1683; CHECK-ZVBB: # %bb.0: 1684; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1685; CHECK-ZVBB-NEXT: vctz.v v8, v8 1686; CHECK-ZVBB-NEXT: ret 1687 %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 false, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1688 ret <vscale x 2 x i64> %v 1689} 1690 1691declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg, <vscale x 4 x i1>, i32) 1692 1693define <vscale x 4 x i64> @vp_cttz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1694; RV32-LABEL: vp_cttz_nxv4i64: 1695; RV32: # %bb.0: 1696; RV32-NEXT: li a1, 1 1697; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1698; RV32-NEXT: vsub.vx v12, v8, a1, v0.t 1699; RV32-NEXT: lui a1, 349525 1700; RV32-NEXT: addi a1, a1, 1365 1701; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1702; RV32-NEXT: vmv.v.x v16, a1 1703; RV32-NEXT: lui a1, 209715 1704; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1705; RV32-NEXT: vnot.v v8, v8, v0.t 1706; RV32-NEXT: addi a1, a1, 819 1707; RV32-NEXT: vand.vv v12, v8, v12, v0.t 1708; RV32-NEXT: vsrl.vi v8, v12, 1, v0.t 1709; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1710; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1711; RV32-NEXT: vmv.v.x v8, a1 1712; RV32-NEXT: lui a1, 61681 1713; RV32-NEXT: addi a1, a1, -241 1714; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1715; RV32-NEXT: vsub.vv v12, v12, v16, v0.t 1716; RV32-NEXT: vand.vv v16, v12, v8, v0.t 1717; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t 1718; RV32-NEXT: vand.vv v8, v12, v8, v0.t 1719; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1720; RV32-NEXT: vmv.v.x v12, a1 1721; RV32-NEXT: lui a1, 4112 1722; RV32-NEXT: addi a1, a1, 257 1723; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1724; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 1725; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1726; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1727; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1728; RV32-NEXT: vmv.v.x v16, a1 1729; RV32-NEXT: li a1, 56 1730; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1731; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1732; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1733; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1734; RV32-NEXT: ret 1735; 1736; RV64-LABEL: vp_cttz_nxv4i64: 1737; RV64: # %bb.0: 1738; RV64-NEXT: li a1, 1 1739; RV64-NEXT: lui a2, 349525 1740; RV64-NEXT: lui a3, 209715 1741; RV64-NEXT: lui a4, 61681 1742; RV64-NEXT: lui a5, 4112 1743; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1744; RV64-NEXT: vsub.vx v12, v8, a1, v0.t 1745; RV64-NEXT: addiw a0, a2, 1365 1746; RV64-NEXT: addiw a1, a3, 819 1747; RV64-NEXT: addiw a2, a4, -241 1748; RV64-NEXT: addiw a3, a5, 257 1749; RV64-NEXT: slli a4, a0, 32 1750; RV64-NEXT: add a0, a0, a4 1751; RV64-NEXT: slli a4, a1, 32 1752; RV64-NEXT: add a1, a1, a4 1753; RV64-NEXT: slli a4, a2, 32 1754; RV64-NEXT: add a2, a2, a4 1755; RV64-NEXT: slli a4, a3, 32 1756; RV64-NEXT: add a3, a3, a4 1757; RV64-NEXT: vnot.v v8, v8, v0.t 1758; RV64-NEXT: vand.vv v8, v8, v12, v0.t 1759; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1760; RV64-NEXT: vand.vx v12, v12, a0, v0.t 1761; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 1762; RV64-NEXT: vand.vx v12, v8, a1, v0.t 1763; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1764; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1765; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 1766; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1767; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 1768; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1769; RV64-NEXT: li a0, 56 1770; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1771; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1772; RV64-NEXT: ret 1773; 1774; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64: 1775; CHECK-ZVBB: # %bb.0: 1776; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1777; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1778; CHECK-ZVBB-NEXT: ret 1779 %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> %m, i32 %evl) 1780 ret <vscale x 4 x i64> %v 1781} 1782 1783define <vscale x 4 x i64> @vp_cttz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { 1784; RV32-LABEL: vp_cttz_nxv4i64_unmasked: 1785; RV32: # %bb.0: 1786; RV32-NEXT: li a1, 1 1787; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1788; RV32-NEXT: vnot.v v12, v8 1789; RV32-NEXT: vsub.vx v8, v8, a1 1790; RV32-NEXT: lui a1, 349525 1791; RV32-NEXT: addi a1, a1, 1365 1792; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1793; RV32-NEXT: vmv.v.x v16, a1 1794; RV32-NEXT: lui a1, 209715 1795; RV32-NEXT: addi a1, a1, 819 1796; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1797; RV32-NEXT: vand.vv v8, v12, v8 1798; RV32-NEXT: vsrl.vi v12, v8, 1 1799; RV32-NEXT: vand.vv v12, v12, v16 1800; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1801; RV32-NEXT: vmv.v.x v16, a1 1802; RV32-NEXT: lui a1, 61681 1803; RV32-NEXT: addi a1, a1, -241 1804; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1805; RV32-NEXT: vsub.vv v8, v8, v12 1806; RV32-NEXT: vand.vv v12, v8, v16 1807; RV32-NEXT: vsrl.vi v8, v8, 2 1808; RV32-NEXT: vand.vv v8, v8, v16 1809; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1810; RV32-NEXT: vmv.v.x v16, a1 1811; RV32-NEXT: lui a1, 4112 1812; RV32-NEXT: addi a1, a1, 257 1813; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1814; RV32-NEXT: vadd.vv v8, v12, v8 1815; RV32-NEXT: vsrl.vi v12, v8, 4 1816; RV32-NEXT: vadd.vv v8, v8, v12 1817; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1818; RV32-NEXT: vmv.v.x v12, a1 1819; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1820; RV32-NEXT: vand.vv v8, v8, v16 1821; RV32-NEXT: vmul.vv v8, v8, v12 1822; RV32-NEXT: li a0, 56 1823; RV32-NEXT: vsrl.vx v8, v8, a0 1824; RV32-NEXT: ret 1825; 1826; RV64-LABEL: vp_cttz_nxv4i64_unmasked: 1827; RV64: # %bb.0: 1828; RV64-NEXT: li a1, 1 1829; RV64-NEXT: lui a2, 349525 1830; RV64-NEXT: lui a3, 209715 1831; RV64-NEXT: lui a4, 61681 1832; RV64-NEXT: lui a5, 4112 1833; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1834; RV64-NEXT: vsub.vx v12, v8, a1 1835; RV64-NEXT: addiw a0, a2, 1365 1836; RV64-NEXT: addiw a1, a3, 819 1837; RV64-NEXT: addiw a2, a4, -241 1838; RV64-NEXT: addiw a3, a5, 257 1839; RV64-NEXT: slli a4, a0, 32 1840; RV64-NEXT: add a0, a0, a4 1841; RV64-NEXT: slli a4, a1, 32 1842; RV64-NEXT: add a1, a1, a4 1843; RV64-NEXT: slli a4, a2, 32 1844; RV64-NEXT: add a2, a2, a4 1845; RV64-NEXT: slli a4, a3, 32 1846; RV64-NEXT: add a3, a3, a4 1847; RV64-NEXT: vnot.v v8, v8 1848; RV64-NEXT: vand.vv v8, v8, v12 1849; RV64-NEXT: vsrl.vi v12, v8, 1 1850; RV64-NEXT: vand.vx v12, v12, a0 1851; RV64-NEXT: vsub.vv v8, v8, v12 1852; RV64-NEXT: vand.vx v12, v8, a1 1853; RV64-NEXT: vsrl.vi v8, v8, 2 1854; RV64-NEXT: vand.vx v8, v8, a1 1855; RV64-NEXT: vadd.vv v8, v12, v8 1856; RV64-NEXT: vsrl.vi v12, v8, 4 1857; RV64-NEXT: vadd.vv v8, v8, v12 1858; RV64-NEXT: vand.vx v8, v8, a2 1859; RV64-NEXT: vmul.vx v8, v8, a3 1860; RV64-NEXT: li a0, 56 1861; RV64-NEXT: vsrl.vx v8, v8, a0 1862; RV64-NEXT: ret 1863; 1864; CHECK-ZVBB-LABEL: vp_cttz_nxv4i64_unmasked: 1865; CHECK-ZVBB: # %bb.0: 1866; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1867; CHECK-ZVBB-NEXT: vctz.v v8, v8 1868; CHECK-ZVBB-NEXT: ret 1869 %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 false, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1870 ret <vscale x 4 x i64> %v 1871} 1872 1873declare <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64>, i1 immarg, <vscale x 7 x i1>, i32) 1874 1875define <vscale x 7 x i64> @vp_cttz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1876; RV32-LABEL: vp_cttz_nxv7i64: 1877; RV32: # %bb.0: 1878; RV32-NEXT: li a1, 1 1879; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1880; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 1881; RV32-NEXT: lui a1, 349525 1882; RV32-NEXT: addi a1, a1, 1365 1883; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1884; RV32-NEXT: vmv.v.x v24, a1 1885; RV32-NEXT: lui a1, 209715 1886; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1887; RV32-NEXT: vnot.v v8, v8, v0.t 1888; RV32-NEXT: addi a1, a1, 819 1889; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1890; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 1891; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1892; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1893; RV32-NEXT: vmv.v.x v8, a1 1894; RV32-NEXT: lui a1, 61681 1895; RV32-NEXT: addi a1, a1, -241 1896; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1897; RV32-NEXT: vsub.vv v24, v16, v24, v0.t 1898; RV32-NEXT: vand.vv v16, v24, v8, v0.t 1899; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t 1900; RV32-NEXT: vand.vv v24, v24, v8, v0.t 1901; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1902; RV32-NEXT: vmv.v.x v8, a1 1903; RV32-NEXT: lui a1, 4112 1904; RV32-NEXT: addi a1, a1, 257 1905; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1906; RV32-NEXT: vadd.vv v16, v16, v24, v0.t 1907; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t 1908; RV32-NEXT: vadd.vv v16, v16, v24, v0.t 1909; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1910; RV32-NEXT: vmv.v.x v24, a1 1911; RV32-NEXT: li a1, 56 1912; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1913; RV32-NEXT: vand.vv v8, v16, v8, v0.t 1914; RV32-NEXT: vmul.vv v8, v8, v24, v0.t 1915; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1916; RV32-NEXT: ret 1917; 1918; RV64-LABEL: vp_cttz_nxv7i64: 1919; RV64: # %bb.0: 1920; RV64-NEXT: li a1, 1 1921; RV64-NEXT: lui a2, 349525 1922; RV64-NEXT: lui a3, 209715 1923; RV64-NEXT: lui a4, 61681 1924; RV64-NEXT: lui a5, 4112 1925; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1926; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 1927; RV64-NEXT: addiw a0, a2, 1365 1928; RV64-NEXT: addiw a1, a3, 819 1929; RV64-NEXT: addiw a2, a4, -241 1930; RV64-NEXT: addiw a3, a5, 257 1931; RV64-NEXT: slli a4, a0, 32 1932; RV64-NEXT: add a0, a0, a4 1933; RV64-NEXT: slli a4, a1, 32 1934; RV64-NEXT: add a1, a1, a4 1935; RV64-NEXT: slli a4, a2, 32 1936; RV64-NEXT: add a2, a2, a4 1937; RV64-NEXT: slli a4, a3, 32 1938; RV64-NEXT: add a3, a3, a4 1939; RV64-NEXT: vnot.v v8, v8, v0.t 1940; RV64-NEXT: vand.vv v8, v8, v16, v0.t 1941; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1942; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1943; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1944; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1945; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1946; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1947; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1948; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1949; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1950; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1951; RV64-NEXT: li a0, 56 1952; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1953; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1954; RV64-NEXT: ret 1955; 1956; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64: 1957; CHECK-ZVBB: # %bb.0: 1958; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1959; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 1960; CHECK-ZVBB-NEXT: ret 1961 %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> %m, i32 %evl) 1962 ret <vscale x 7 x i64> %v 1963} 1964 1965define <vscale x 7 x i64> @vp_cttz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) { 1966; RV32-LABEL: vp_cttz_nxv7i64_unmasked: 1967; RV32: # %bb.0: 1968; RV32-NEXT: li a1, 1 1969; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1970; RV32-NEXT: vnot.v v16, v8 1971; RV32-NEXT: vsub.vx v8, v8, a1 1972; RV32-NEXT: lui a1, 349525 1973; RV32-NEXT: addi a1, a1, 1365 1974; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1975; RV32-NEXT: vmv.v.x v24, a1 1976; RV32-NEXT: lui a1, 209715 1977; RV32-NEXT: addi a1, a1, 819 1978; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1979; RV32-NEXT: vand.vv v8, v16, v8 1980; RV32-NEXT: vsrl.vi v16, v8, 1 1981; RV32-NEXT: vand.vv v24, v16, v24 1982; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1983; RV32-NEXT: vmv.v.x v16, a1 1984; RV32-NEXT: lui a1, 61681 1985; RV32-NEXT: addi a1, a1, -241 1986; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1987; RV32-NEXT: vsub.vv v8, v8, v24 1988; RV32-NEXT: vand.vv v24, v8, v16 1989; RV32-NEXT: vsrl.vi v8, v8, 2 1990; RV32-NEXT: vand.vv v8, v8, v16 1991; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1992; RV32-NEXT: vmv.v.x v16, a1 1993; RV32-NEXT: lui a1, 4112 1994; RV32-NEXT: addi a1, a1, 257 1995; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1996; RV32-NEXT: vadd.vv v8, v24, v8 1997; RV32-NEXT: vsrl.vi v24, v8, 4 1998; RV32-NEXT: vadd.vv v8, v8, v24 1999; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2000; RV32-NEXT: vmv.v.x v24, a1 2001; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2002; RV32-NEXT: vand.vv v8, v8, v16 2003; RV32-NEXT: vmul.vv v8, v8, v24 2004; RV32-NEXT: li a0, 56 2005; RV32-NEXT: vsrl.vx v8, v8, a0 2006; RV32-NEXT: ret 2007; 2008; RV64-LABEL: vp_cttz_nxv7i64_unmasked: 2009; RV64: # %bb.0: 2010; RV64-NEXT: li a1, 1 2011; RV64-NEXT: lui a2, 349525 2012; RV64-NEXT: lui a3, 209715 2013; RV64-NEXT: lui a4, 61681 2014; RV64-NEXT: lui a5, 4112 2015; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2016; RV64-NEXT: vsub.vx v16, v8, a1 2017; RV64-NEXT: addiw a0, a2, 1365 2018; RV64-NEXT: addiw a1, a3, 819 2019; RV64-NEXT: addiw a2, a4, -241 2020; RV64-NEXT: addiw a3, a5, 257 2021; RV64-NEXT: slli a4, a0, 32 2022; RV64-NEXT: add a0, a0, a4 2023; RV64-NEXT: slli a4, a1, 32 2024; RV64-NEXT: add a1, a1, a4 2025; RV64-NEXT: slli a4, a2, 32 2026; RV64-NEXT: add a2, a2, a4 2027; RV64-NEXT: slli a4, a3, 32 2028; RV64-NEXT: add a3, a3, a4 2029; RV64-NEXT: vnot.v v8, v8 2030; RV64-NEXT: vand.vv v8, v8, v16 2031; RV64-NEXT: vsrl.vi v16, v8, 1 2032; RV64-NEXT: vand.vx v16, v16, a0 2033; RV64-NEXT: vsub.vv v8, v8, v16 2034; RV64-NEXT: vand.vx v16, v8, a1 2035; RV64-NEXT: vsrl.vi v8, v8, 2 2036; RV64-NEXT: vand.vx v8, v8, a1 2037; RV64-NEXT: vadd.vv v8, v16, v8 2038; RV64-NEXT: vsrl.vi v16, v8, 4 2039; RV64-NEXT: vadd.vv v8, v8, v16 2040; RV64-NEXT: vand.vx v8, v8, a2 2041; RV64-NEXT: vmul.vx v8, v8, a3 2042; RV64-NEXT: li a0, 56 2043; RV64-NEXT: vsrl.vx v8, v8, a0 2044; RV64-NEXT: ret 2045; 2046; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64_unmasked: 2047; CHECK-ZVBB: # %bb.0: 2048; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2049; CHECK-ZVBB-NEXT: vctz.v v8, v8 2050; CHECK-ZVBB-NEXT: ret 2051 %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 false, <vscale x 7 x i1> splat (i1 true), i32 %evl) 2052 ret <vscale x 7 x i64> %v 2053} 2054 2055declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg, <vscale x 8 x i1>, i32) 2056 2057define <vscale x 8 x i64> @vp_cttz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2058; RV32-LABEL: vp_cttz_nxv8i64: 2059; RV32: # %bb.0: 2060; RV32-NEXT: li a1, 1 2061; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2062; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 2063; RV32-NEXT: lui a1, 349525 2064; RV32-NEXT: addi a1, a1, 1365 2065; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2066; RV32-NEXT: vmv.v.x v24, a1 2067; RV32-NEXT: lui a1, 209715 2068; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2069; RV32-NEXT: vnot.v v8, v8, v0.t 2070; RV32-NEXT: addi a1, a1, 819 2071; RV32-NEXT: vand.vv v16, v8, v16, v0.t 2072; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 2073; RV32-NEXT: vand.vv v24, v8, v24, v0.t 2074; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2075; RV32-NEXT: vmv.v.x v8, a1 2076; RV32-NEXT: lui a1, 61681 2077; RV32-NEXT: addi a1, a1, -241 2078; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2079; RV32-NEXT: vsub.vv v24, v16, v24, v0.t 2080; RV32-NEXT: vand.vv v16, v24, v8, v0.t 2081; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t 2082; RV32-NEXT: vand.vv v24, v24, v8, v0.t 2083; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2084; RV32-NEXT: vmv.v.x v8, a1 2085; RV32-NEXT: lui a1, 4112 2086; RV32-NEXT: addi a1, a1, 257 2087; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2088; RV32-NEXT: vadd.vv v16, v16, v24, v0.t 2089; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t 2090; RV32-NEXT: vadd.vv v16, v16, v24, v0.t 2091; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2092; RV32-NEXT: vmv.v.x v24, a1 2093; RV32-NEXT: li a1, 56 2094; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2095; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2096; RV32-NEXT: vmul.vv v8, v8, v24, v0.t 2097; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 2098; RV32-NEXT: ret 2099; 2100; RV64-LABEL: vp_cttz_nxv8i64: 2101; RV64: # %bb.0: 2102; RV64-NEXT: li a1, 1 2103; RV64-NEXT: lui a2, 349525 2104; RV64-NEXT: lui a3, 209715 2105; RV64-NEXT: lui a4, 61681 2106; RV64-NEXT: lui a5, 4112 2107; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2108; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 2109; RV64-NEXT: addiw a0, a2, 1365 2110; RV64-NEXT: addiw a1, a3, 819 2111; RV64-NEXT: addiw a2, a4, -241 2112; RV64-NEXT: addiw a3, a5, 257 2113; RV64-NEXT: slli a4, a0, 32 2114; RV64-NEXT: add a0, a0, a4 2115; RV64-NEXT: slli a4, a1, 32 2116; RV64-NEXT: add a1, a1, a4 2117; RV64-NEXT: slli a4, a2, 32 2118; RV64-NEXT: add a2, a2, a4 2119; RV64-NEXT: slli a4, a3, 32 2120; RV64-NEXT: add a3, a3, a4 2121; RV64-NEXT: vnot.v v8, v8, v0.t 2122; RV64-NEXT: vand.vv v8, v8, v16, v0.t 2123; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2124; RV64-NEXT: vand.vx v16, v16, a0, v0.t 2125; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2126; RV64-NEXT: vand.vx v16, v8, a1, v0.t 2127; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2128; RV64-NEXT: vand.vx v8, v8, a1, v0.t 2129; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2130; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2131; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2132; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2133; RV64-NEXT: li a0, 56 2134; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2135; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 2136; RV64-NEXT: ret 2137; 2138; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64: 2139; CHECK-ZVBB: # %bb.0: 2140; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2141; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 2142; CHECK-ZVBB-NEXT: ret 2143 %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> %m, i32 %evl) 2144 ret <vscale x 8 x i64> %v 2145} 2146 2147define <vscale x 8 x i64> @vp_cttz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { 2148; RV32-LABEL: vp_cttz_nxv8i64_unmasked: 2149; RV32: # %bb.0: 2150; RV32-NEXT: li a1, 1 2151; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2152; RV32-NEXT: vnot.v v16, v8 2153; RV32-NEXT: vsub.vx v8, v8, a1 2154; RV32-NEXT: lui a1, 349525 2155; RV32-NEXT: addi a1, a1, 1365 2156; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2157; RV32-NEXT: vmv.v.x v24, a1 2158; RV32-NEXT: lui a1, 209715 2159; RV32-NEXT: addi a1, a1, 819 2160; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2161; RV32-NEXT: vand.vv v8, v16, v8 2162; RV32-NEXT: vsrl.vi v16, v8, 1 2163; RV32-NEXT: vand.vv v24, v16, v24 2164; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2165; RV32-NEXT: vmv.v.x v16, a1 2166; RV32-NEXT: lui a1, 61681 2167; RV32-NEXT: addi a1, a1, -241 2168; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2169; RV32-NEXT: vsub.vv v8, v8, v24 2170; RV32-NEXT: vand.vv v24, v8, v16 2171; RV32-NEXT: vsrl.vi v8, v8, 2 2172; RV32-NEXT: vand.vv v8, v8, v16 2173; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2174; RV32-NEXT: vmv.v.x v16, a1 2175; RV32-NEXT: lui a1, 4112 2176; RV32-NEXT: addi a1, a1, 257 2177; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2178; RV32-NEXT: vadd.vv v8, v24, v8 2179; RV32-NEXT: vsrl.vi v24, v8, 4 2180; RV32-NEXT: vadd.vv v8, v8, v24 2181; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 2182; RV32-NEXT: vmv.v.x v24, a1 2183; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2184; RV32-NEXT: vand.vv v8, v8, v16 2185; RV32-NEXT: vmul.vv v8, v8, v24 2186; RV32-NEXT: li a0, 56 2187; RV32-NEXT: vsrl.vx v8, v8, a0 2188; RV32-NEXT: ret 2189; 2190; RV64-LABEL: vp_cttz_nxv8i64_unmasked: 2191; RV64: # %bb.0: 2192; RV64-NEXT: li a1, 1 2193; RV64-NEXT: lui a2, 349525 2194; RV64-NEXT: lui a3, 209715 2195; RV64-NEXT: lui a4, 61681 2196; RV64-NEXT: lui a5, 4112 2197; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2198; RV64-NEXT: vsub.vx v16, v8, a1 2199; RV64-NEXT: addiw a0, a2, 1365 2200; RV64-NEXT: addiw a1, a3, 819 2201; RV64-NEXT: addiw a2, a4, -241 2202; RV64-NEXT: addiw a3, a5, 257 2203; RV64-NEXT: slli a4, a0, 32 2204; RV64-NEXT: add a0, a0, a4 2205; RV64-NEXT: slli a4, a1, 32 2206; RV64-NEXT: add a1, a1, a4 2207; RV64-NEXT: slli a4, a2, 32 2208; RV64-NEXT: add a2, a2, a4 2209; RV64-NEXT: slli a4, a3, 32 2210; RV64-NEXT: add a3, a3, a4 2211; RV64-NEXT: vnot.v v8, v8 2212; RV64-NEXT: vand.vv v8, v8, v16 2213; RV64-NEXT: vsrl.vi v16, v8, 1 2214; RV64-NEXT: vand.vx v16, v16, a0 2215; RV64-NEXT: vsub.vv v8, v8, v16 2216; RV64-NEXT: vand.vx v16, v8, a1 2217; RV64-NEXT: vsrl.vi v8, v8, 2 2218; RV64-NEXT: vand.vx v8, v8, a1 2219; RV64-NEXT: vadd.vv v8, v16, v8 2220; RV64-NEXT: vsrl.vi v16, v8, 4 2221; RV64-NEXT: vadd.vv v8, v8, v16 2222; RV64-NEXT: vand.vx v8, v8, a2 2223; RV64-NEXT: vmul.vx v8, v8, a3 2224; RV64-NEXT: li a0, 56 2225; RV64-NEXT: vsrl.vx v8, v8, a0 2226; RV64-NEXT: ret 2227; 2228; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64_unmasked: 2229; CHECK-ZVBB: # %bb.0: 2230; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2231; CHECK-ZVBB-NEXT: vctz.v v8, v8 2232; CHECK-ZVBB-NEXT: ret 2233 %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 false, <vscale x 8 x i1> splat (i1 true), i32 %evl) 2234 ret <vscale x 8 x i64> %v 2235} 2236 2237declare <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64>, i1 immarg, <vscale x 16 x i1>, i32) 2238 2239define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2240; RV32-LABEL: vp_cttz_nxv16i64: 2241; RV32: # %bb.0: 2242; RV32-NEXT: addi sp, sp, -16 2243; RV32-NEXT: .cfi_def_cfa_offset 16 2244; RV32-NEXT: csrr a1, vlenb 2245; RV32-NEXT: li a2, 56 2246; RV32-NEXT: mul a1, a1, a2 2247; RV32-NEXT: sub sp, sp, a1 2248; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb 2249; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2250; RV32-NEXT: vmv1r.v v24, v0 2251; RV32-NEXT: csrr a1, vlenb 2252; RV32-NEXT: slli a1, a1, 5 2253; RV32-NEXT: add a1, sp, a1 2254; RV32-NEXT: addi a1, a1, 16 2255; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2256; RV32-NEXT: csrr a1, vlenb 2257; RV32-NEXT: li a2, 1 2258; RV32-NEXT: srli a3, a1, 3 2259; RV32-NEXT: sub a4, a0, a1 2260; RV32-NEXT: vslidedown.vx v0, v0, a3 2261; RV32-NEXT: sltu a3, a0, a4 2262; RV32-NEXT: addi a3, a3, -1 2263; RV32-NEXT: and a3, a3, a4 2264; RV32-NEXT: lui a4, 349525 2265; RV32-NEXT: addi a4, a4, 1365 2266; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2267; RV32-NEXT: vsub.vx v8, v16, a2, v0.t 2268; RV32-NEXT: vnot.v v16, v16, v0.t 2269; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2270; RV32-NEXT: csrr a5, vlenb 2271; RV32-NEXT: li a6, 48 2272; RV32-NEXT: mul a5, a5, a6 2273; RV32-NEXT: add a5, sp, a5 2274; RV32-NEXT: addi a5, a5, 16 2275; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill 2276; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2277; RV32-NEXT: vmv.v.x v16, a4 2278; RV32-NEXT: csrr a4, vlenb 2279; RV32-NEXT: li a5, 40 2280; RV32-NEXT: mul a4, a4, a5 2281; RV32-NEXT: add a4, sp, a4 2282; RV32-NEXT: addi a4, a4, 16 2283; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2284; RV32-NEXT: csrr a4, vlenb 2285; RV32-NEXT: li a5, 48 2286; RV32-NEXT: mul a4, a4, a5 2287; RV32-NEXT: add a4, sp, a4 2288; RV32-NEXT: addi a4, a4, 16 2289; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2290; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2291; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t 2292; RV32-NEXT: csrr a4, vlenb 2293; RV32-NEXT: li a5, 24 2294; RV32-NEXT: mul a4, a4, a5 2295; RV32-NEXT: add a4, sp, a4 2296; RV32-NEXT: addi a4, a4, 16 2297; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 2298; RV32-NEXT: csrr a4, vlenb 2299; RV32-NEXT: li a5, 40 2300; RV32-NEXT: mul a4, a4, a5 2301; RV32-NEXT: add a4, sp, a4 2302; RV32-NEXT: addi a4, a4, 16 2303; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload 2304; RV32-NEXT: csrr a4, vlenb 2305; RV32-NEXT: li a5, 24 2306; RV32-NEXT: mul a4, a4, a5 2307; RV32-NEXT: add a4, sp, a4 2308; RV32-NEXT: addi a4, a4, 16 2309; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2310; RV32-NEXT: vand.vv v16, v8, v16, v0.t 2311; RV32-NEXT: csrr a4, vlenb 2312; RV32-NEXT: li a5, 48 2313; RV32-NEXT: mul a4, a4, a5 2314; RV32-NEXT: add a4, sp, a4 2315; RV32-NEXT: addi a4, a4, 16 2316; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2317; RV32-NEXT: vsub.vv v16, v8, v16, v0.t 2318; RV32-NEXT: lui a4, 209715 2319; RV32-NEXT: addi a4, a4, 819 2320; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2321; RV32-NEXT: vmv.v.x v8, a4 2322; RV32-NEXT: csrr a4, vlenb 2323; RV32-NEXT: li a5, 48 2324; RV32-NEXT: mul a4, a4, a5 2325; RV32-NEXT: add a4, sp, a4 2326; RV32-NEXT: addi a4, a4, 16 2327; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 2328; RV32-NEXT: csrr a4, vlenb 2329; RV32-NEXT: li a5, 48 2330; RV32-NEXT: mul a4, a4, a5 2331; RV32-NEXT: add a4, sp, a4 2332; RV32-NEXT: addi a4, a4, 16 2333; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2334; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2335; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2336; RV32-NEXT: csrr a4, vlenb 2337; RV32-NEXT: li a5, 24 2338; RV32-NEXT: mul a4, a4, a5 2339; RV32-NEXT: add a4, sp, a4 2340; RV32-NEXT: addi a4, a4, 16 2341; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 2342; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t 2343; RV32-NEXT: csrr a4, vlenb 2344; RV32-NEXT: li a5, 48 2345; RV32-NEXT: mul a4, a4, a5 2346; RV32-NEXT: add a4, sp, a4 2347; RV32-NEXT: addi a4, a4, 16 2348; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2349; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2350; RV32-NEXT: csrr a4, vlenb 2351; RV32-NEXT: li a5, 24 2352; RV32-NEXT: mul a4, a4, a5 2353; RV32-NEXT: add a4, sp, a4 2354; RV32-NEXT: addi a4, a4, 16 2355; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2356; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2357; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2358; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2359; RV32-NEXT: lui a4, 61681 2360; RV32-NEXT: addi a4, a4, -241 2361; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2362; RV32-NEXT: vmv.v.x v16, a4 2363; RV32-NEXT: csrr a4, vlenb 2364; RV32-NEXT: li a5, 24 2365; RV32-NEXT: mul a4, a4, a5 2366; RV32-NEXT: add a4, sp, a4 2367; RV32-NEXT: addi a4, a4, 16 2368; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2369; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2370; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2371; RV32-NEXT: lui a4, 4112 2372; RV32-NEXT: addi a4, a4, 257 2373; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2374; RV32-NEXT: vmv.v.x v16, a4 2375; RV32-NEXT: csrr a4, vlenb 2376; RV32-NEXT: slli a4, a4, 4 2377; RV32-NEXT: add a4, sp, a4 2378; RV32-NEXT: addi a4, a4, 16 2379; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2380; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2381; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 2382; RV32-NEXT: li a3, 56 2383; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t 2384; RV32-NEXT: csrr a4, vlenb 2385; RV32-NEXT: slli a4, a4, 3 2386; RV32-NEXT: add a4, sp, a4 2387; RV32-NEXT: addi a4, a4, 16 2388; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 2389; RV32-NEXT: bltu a0, a1, .LBB46_2 2390; RV32-NEXT: # %bb.1: 2391; RV32-NEXT: mv a0, a1 2392; RV32-NEXT: .LBB46_2: 2393; RV32-NEXT: vmv1r.v v0, v24 2394; RV32-NEXT: slli a1, a1, 5 2395; RV32-NEXT: add a1, sp, a1 2396; RV32-NEXT: addi a1, a1, 16 2397; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2398; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2399; RV32-NEXT: vsub.vx v16, v8, a2, v0.t 2400; RV32-NEXT: vnot.v v8, v8, v0.t 2401; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2402; RV32-NEXT: addi a0, sp, 16 2403; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2404; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t 2405; RV32-NEXT: csrr a0, vlenb 2406; RV32-NEXT: slli a0, a0, 5 2407; RV32-NEXT: add a0, sp, a0 2408; RV32-NEXT: addi a0, a0, 16 2409; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2410; RV32-NEXT: csrr a0, vlenb 2411; RV32-NEXT: li a1, 40 2412; RV32-NEXT: mul a0, a0, a1 2413; RV32-NEXT: add a0, sp, a0 2414; RV32-NEXT: addi a0, a0, 16 2415; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2416; RV32-NEXT: csrr a0, vlenb 2417; RV32-NEXT: slli a0, a0, 5 2418; RV32-NEXT: add a0, sp, a0 2419; RV32-NEXT: addi a0, a0, 16 2420; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2421; RV32-NEXT: vand.vv v16, v8, v16, v0.t 2422; RV32-NEXT: addi a0, sp, 16 2423; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2424; RV32-NEXT: vsub.vv v8, v8, v16, v0.t 2425; RV32-NEXT: csrr a0, vlenb 2426; RV32-NEXT: li a1, 40 2427; RV32-NEXT: mul a0, a0, a1 2428; RV32-NEXT: add a0, sp, a0 2429; RV32-NEXT: addi a0, a0, 16 2430; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2431; RV32-NEXT: csrr a0, vlenb 2432; RV32-NEXT: li a1, 48 2433; RV32-NEXT: mul a0, a0, a1 2434; RV32-NEXT: add a0, sp, a0 2435; RV32-NEXT: addi a0, a0, 16 2436; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2437; RV32-NEXT: csrr a0, vlenb 2438; RV32-NEXT: li a1, 40 2439; RV32-NEXT: mul a0, a0, a1 2440; RV32-NEXT: add a0, sp, a0 2441; RV32-NEXT: addi a0, a0, 16 2442; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2443; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2444; RV32-NEXT: csrr a0, vlenb 2445; RV32-NEXT: slli a0, a0, 5 2446; RV32-NEXT: add a0, sp, a0 2447; RV32-NEXT: addi a0, a0, 16 2448; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 2449; RV32-NEXT: vmv8r.v v16, v8 2450; RV32-NEXT: csrr a0, vlenb 2451; RV32-NEXT: li a1, 40 2452; RV32-NEXT: mul a0, a0, a1 2453; RV32-NEXT: add a0, sp, a0 2454; RV32-NEXT: addi a0, a0, 16 2455; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2456; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 2457; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2458; RV32-NEXT: csrr a0, vlenb 2459; RV32-NEXT: slli a0, a0, 5 2460; RV32-NEXT: add a0, sp, a0 2461; RV32-NEXT: addi a0, a0, 16 2462; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2463; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 2464; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2465; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2466; RV32-NEXT: csrr a0, vlenb 2467; RV32-NEXT: li a1, 24 2468; RV32-NEXT: mul a0, a0, a1 2469; RV32-NEXT: add a0, sp, a0 2470; RV32-NEXT: addi a0, a0, 16 2471; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2472; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2473; RV32-NEXT: csrr a0, vlenb 2474; RV32-NEXT: slli a0, a0, 4 2475; RV32-NEXT: add a0, sp, a0 2476; RV32-NEXT: addi a0, a0, 16 2477; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2478; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 2479; RV32-NEXT: vsrl.vx v8, v8, a3, v0.t 2480; RV32-NEXT: csrr a0, vlenb 2481; RV32-NEXT: slli a0, a0, 3 2482; RV32-NEXT: add a0, sp, a0 2483; RV32-NEXT: addi a0, a0, 16 2484; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2485; RV32-NEXT: csrr a0, vlenb 2486; RV32-NEXT: li a1, 56 2487; RV32-NEXT: mul a0, a0, a1 2488; RV32-NEXT: add sp, sp, a0 2489; RV32-NEXT: .cfi_def_cfa sp, 16 2490; RV32-NEXT: addi sp, sp, 16 2491; RV32-NEXT: .cfi_def_cfa_offset 0 2492; RV32-NEXT: ret 2493; 2494; RV64-LABEL: vp_cttz_nxv16i64: 2495; RV64: # %bb.0: 2496; RV64-NEXT: addi sp, sp, -16 2497; RV64-NEXT: .cfi_def_cfa_offset 16 2498; RV64-NEXT: csrr a1, vlenb 2499; RV64-NEXT: slli a1, a1, 4 2500; RV64-NEXT: sub sp, sp, a1 2501; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2502; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2503; RV64-NEXT: vmv1r.v v24, v0 2504; RV64-NEXT: csrr a1, vlenb 2505; RV64-NEXT: slli a1, a1, 3 2506; RV64-NEXT: add a1, sp, a1 2507; RV64-NEXT: addi a1, a1, 16 2508; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2509; RV64-NEXT: csrr a1, vlenb 2510; RV64-NEXT: li a2, 1 2511; RV64-NEXT: lui a3, 349525 2512; RV64-NEXT: lui a4, 209715 2513; RV64-NEXT: lui a5, 61681 2514; RV64-NEXT: lui a6, 4112 2515; RV64-NEXT: srli a7, a1, 3 2516; RV64-NEXT: sub t0, a0, a1 2517; RV64-NEXT: addiw a3, a3, 1365 2518; RV64-NEXT: addiw a4, a4, 819 2519; RV64-NEXT: addiw a5, a5, -241 2520; RV64-NEXT: addiw t1, a6, 257 2521; RV64-NEXT: vslidedown.vx v0, v0, a7 2522; RV64-NEXT: slli a7, a3, 32 2523; RV64-NEXT: add a7, a3, a7 2524; RV64-NEXT: slli a6, a4, 32 2525; RV64-NEXT: add a6, a4, a6 2526; RV64-NEXT: slli a3, a5, 32 2527; RV64-NEXT: add a3, a5, a3 2528; RV64-NEXT: slli a4, t1, 32 2529; RV64-NEXT: add a4, t1, a4 2530; RV64-NEXT: sltu a5, a0, t0 2531; RV64-NEXT: addi a5, a5, -1 2532; RV64-NEXT: and t0, a5, t0 2533; RV64-NEXT: li a5, 56 2534; RV64-NEXT: vsetvli zero, t0, e64, m8, ta, ma 2535; RV64-NEXT: vsub.vx v8, v16, a2, v0.t 2536; RV64-NEXT: vnot.v v16, v16, v0.t 2537; RV64-NEXT: vand.vv v8, v16, v8, v0.t 2538; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2539; RV64-NEXT: vand.vx v16, v16, a7, v0.t 2540; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2541; RV64-NEXT: vand.vx v16, v8, a6, v0.t 2542; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2543; RV64-NEXT: vand.vx v8, v8, a6, v0.t 2544; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2545; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2546; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2547; RV64-NEXT: vand.vx v8, v8, a3, v0.t 2548; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 2549; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t 2550; RV64-NEXT: addi t0, sp, 16 2551; RV64-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill 2552; RV64-NEXT: bltu a0, a1, .LBB46_2 2553; RV64-NEXT: # %bb.1: 2554; RV64-NEXT: mv a0, a1 2555; RV64-NEXT: .LBB46_2: 2556; RV64-NEXT: vmv1r.v v0, v24 2557; RV64-NEXT: slli a1, a1, 3 2558; RV64-NEXT: add a1, sp, a1 2559; RV64-NEXT: addi a1, a1, 16 2560; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2561; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2562; RV64-NEXT: vsub.vx v16, v8, a2, v0.t 2563; RV64-NEXT: vnot.v v8, v8, v0.t 2564; RV64-NEXT: vand.vv v8, v8, v16, v0.t 2565; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2566; RV64-NEXT: vand.vx v16, v16, a7, v0.t 2567; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2568; RV64-NEXT: vand.vx v16, v8, a6, v0.t 2569; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2570; RV64-NEXT: vand.vx v8, v8, a6, v0.t 2571; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2572; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2573; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2574; RV64-NEXT: vand.vx v8, v8, a3, v0.t 2575; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 2576; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t 2577; RV64-NEXT: addi a0, sp, 16 2578; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2579; RV64-NEXT: csrr a0, vlenb 2580; RV64-NEXT: slli a0, a0, 4 2581; RV64-NEXT: add sp, sp, a0 2582; RV64-NEXT: .cfi_def_cfa sp, 16 2583; RV64-NEXT: addi sp, sp, 16 2584; RV64-NEXT: .cfi_def_cfa_offset 0 2585; RV64-NEXT: ret 2586; 2587; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64: 2588; CHECK-ZVBB: # %bb.0: 2589; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2590; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 2591; CHECK-ZVBB-NEXT: csrr a1, vlenb 2592; CHECK-ZVBB-NEXT: srli a2, a1, 3 2593; CHECK-ZVBB-NEXT: sub a3, a0, a1 2594; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 2595; CHECK-ZVBB-NEXT: sltu a2, a0, a3 2596; CHECK-ZVBB-NEXT: addi a2, a2, -1 2597; CHECK-ZVBB-NEXT: and a2, a2, a3 2598; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2599; CHECK-ZVBB-NEXT: vctz.v v16, v16, v0.t 2600; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 2601; CHECK-ZVBB-NEXT: # %bb.1: 2602; CHECK-ZVBB-NEXT: mv a0, a1 2603; CHECK-ZVBB-NEXT: .LBB46_2: 2604; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 2605; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2606; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 2607; CHECK-ZVBB-NEXT: ret 2608 %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> %m, i32 %evl) 2609 ret <vscale x 16 x i64> %v 2610} 2611 2612define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) { 2613; RV32-LABEL: vp_cttz_nxv16i64_unmasked: 2614; RV32: # %bb.0: 2615; RV32-NEXT: addi sp, sp, -16 2616; RV32-NEXT: .cfi_def_cfa_offset 16 2617; RV32-NEXT: csrr a1, vlenb 2618; RV32-NEXT: slli a1, a1, 5 2619; RV32-NEXT: sub sp, sp, a1 2620; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 2621; RV32-NEXT: csrr a1, vlenb 2622; RV32-NEXT: li a2, 1 2623; RV32-NEXT: lui a3, 349525 2624; RV32-NEXT: lui a4, 209715 2625; RV32-NEXT: sub a5, a0, a1 2626; RV32-NEXT: addi a3, a3, 1365 2627; RV32-NEXT: addi a4, a4, 819 2628; RV32-NEXT: vsetvli a6, zero, e32, m8, ta, ma 2629; RV32-NEXT: vmv.v.x v0, a3 2630; RV32-NEXT: sltu a3, a0, a5 2631; RV32-NEXT: addi a3, a3, -1 2632; RV32-NEXT: and a3, a3, a5 2633; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2634; RV32-NEXT: vsub.vx v24, v16, a2 2635; RV32-NEXT: vnot.v v16, v16 2636; RV32-NEXT: vand.vv v16, v16, v24 2637; RV32-NEXT: vsrl.vi v24, v16, 1 2638; RV32-NEXT: csrr a5, vlenb 2639; RV32-NEXT: li a6, 24 2640; RV32-NEXT: mul a5, a5, a6 2641; RV32-NEXT: add a5, sp, a5 2642; RV32-NEXT: addi a5, a5, 16 2643; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill 2644; RV32-NEXT: vand.vv v24, v24, v0 2645; RV32-NEXT: vsub.vv v16, v16, v24 2646; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2647; RV32-NEXT: vmv.v.x v0, a4 2648; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2649; RV32-NEXT: vand.vv v24, v16, v0 2650; RV32-NEXT: vsrl.vi v16, v16, 2 2651; RV32-NEXT: csrr a4, vlenb 2652; RV32-NEXT: slli a4, a4, 4 2653; RV32-NEXT: add a4, sp, a4 2654; RV32-NEXT: addi a4, a4, 16 2655; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill 2656; RV32-NEXT: vand.vv v16, v16, v0 2657; RV32-NEXT: vadd.vv v16, v24, v16 2658; RV32-NEXT: vsrl.vi v24, v16, 4 2659; RV32-NEXT: vadd.vv v16, v16, v24 2660; RV32-NEXT: lui a4, 61681 2661; RV32-NEXT: lui a5, 4112 2662; RV32-NEXT: addi a4, a4, -241 2663; RV32-NEXT: addi a5, a5, 257 2664; RV32-NEXT: vsetvli a6, zero, e32, m8, ta, ma 2665; RV32-NEXT: vmv.v.x v24, a4 2666; RV32-NEXT: csrr a4, vlenb 2667; RV32-NEXT: slli a4, a4, 3 2668; RV32-NEXT: add a4, sp, a4 2669; RV32-NEXT: addi a4, a4, 16 2670; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill 2671; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2672; RV32-NEXT: vand.vv v16, v16, v24 2673; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2674; RV32-NEXT: vmv.v.x v24, a5 2675; RV32-NEXT: addi a4, sp, 16 2676; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill 2677; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma 2678; RV32-NEXT: vmul.vv v16, v16, v24 2679; RV32-NEXT: li a3, 56 2680; RV32-NEXT: vsrl.vx v16, v16, a3 2681; RV32-NEXT: bltu a0, a1, .LBB47_2 2682; RV32-NEXT: # %bb.1: 2683; RV32-NEXT: mv a0, a1 2684; RV32-NEXT: .LBB47_2: 2685; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2686; RV32-NEXT: vsub.vx v24, v8, a2 2687; RV32-NEXT: vnot.v v8, v8 2688; RV32-NEXT: vand.vv v8, v8, v24 2689; RV32-NEXT: vsrl.vi v24, v8, 1 2690; RV32-NEXT: csrr a0, vlenb 2691; RV32-NEXT: li a1, 24 2692; RV32-NEXT: mul a0, a0, a1 2693; RV32-NEXT: add a0, sp, a0 2694; RV32-NEXT: addi a0, a0, 16 2695; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 2696; RV32-NEXT: vand.vv v24, v24, v0 2697; RV32-NEXT: vsub.vv v8, v8, v24 2698; RV32-NEXT: csrr a0, vlenb 2699; RV32-NEXT: slli a0, a0, 4 2700; RV32-NEXT: add a0, sp, a0 2701; RV32-NEXT: addi a0, a0, 16 2702; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 2703; RV32-NEXT: vand.vv v24, v8, v0 2704; RV32-NEXT: vsrl.vi v8, v8, 2 2705; RV32-NEXT: vand.vv v8, v8, v0 2706; RV32-NEXT: vadd.vv v8, v24, v8 2707; RV32-NEXT: vsrl.vi v24, v8, 4 2708; RV32-NEXT: vadd.vv v8, v8, v24 2709; RV32-NEXT: csrr a0, vlenb 2710; RV32-NEXT: slli a0, a0, 3 2711; RV32-NEXT: add a0, sp, a0 2712; RV32-NEXT: addi a0, a0, 16 2713; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2714; RV32-NEXT: vand.vv v8, v8, v24 2715; RV32-NEXT: addi a0, sp, 16 2716; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2717; RV32-NEXT: vmul.vv v8, v8, v24 2718; RV32-NEXT: vsrl.vx v8, v8, a3 2719; RV32-NEXT: csrr a0, vlenb 2720; RV32-NEXT: slli a0, a0, 5 2721; RV32-NEXT: add sp, sp, a0 2722; RV32-NEXT: .cfi_def_cfa sp, 16 2723; RV32-NEXT: addi sp, sp, 16 2724; RV32-NEXT: .cfi_def_cfa_offset 0 2725; RV32-NEXT: ret 2726; 2727; RV64-LABEL: vp_cttz_nxv16i64_unmasked: 2728; RV64: # %bb.0: 2729; RV64-NEXT: csrr a1, vlenb 2730; RV64-NEXT: li a2, 1 2731; RV64-NEXT: lui a3, 349525 2732; RV64-NEXT: lui a4, 209715 2733; RV64-NEXT: lui a5, 61681 2734; RV64-NEXT: lui a6, 4112 2735; RV64-NEXT: sub a7, a0, a1 2736; RV64-NEXT: addiw a3, a3, 1365 2737; RV64-NEXT: addiw a4, a4, 819 2738; RV64-NEXT: addiw t0, a5, -241 2739; RV64-NEXT: addiw t1, a6, 257 2740; RV64-NEXT: slli a6, a3, 32 2741; RV64-NEXT: add a6, a3, a6 2742; RV64-NEXT: slli a5, a4, 32 2743; RV64-NEXT: add a5, a4, a5 2744; RV64-NEXT: slli a3, t0, 32 2745; RV64-NEXT: add a3, t0, a3 2746; RV64-NEXT: slli a4, t1, 32 2747; RV64-NEXT: add a4, t1, a4 2748; RV64-NEXT: sltu t0, a0, a7 2749; RV64-NEXT: addi t0, t0, -1 2750; RV64-NEXT: and a7, t0, a7 2751; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma 2752; RV64-NEXT: vsub.vx v24, v16, a2 2753; RV64-NEXT: vnot.v v16, v16 2754; RV64-NEXT: vand.vv v16, v16, v24 2755; RV64-NEXT: vsrl.vi v24, v16, 1 2756; RV64-NEXT: vand.vx v24, v24, a6 2757; RV64-NEXT: vsub.vv v16, v16, v24 2758; RV64-NEXT: vand.vx v24, v16, a5 2759; RV64-NEXT: vsrl.vi v16, v16, 2 2760; RV64-NEXT: vand.vx v16, v16, a5 2761; RV64-NEXT: vadd.vv v16, v24, v16 2762; RV64-NEXT: vsrl.vi v24, v16, 4 2763; RV64-NEXT: vadd.vv v16, v16, v24 2764; RV64-NEXT: vand.vx v16, v16, a3 2765; RV64-NEXT: vmul.vx v16, v16, a4 2766; RV64-NEXT: li a7, 56 2767; RV64-NEXT: vsrl.vx v16, v16, a7 2768; RV64-NEXT: bltu a0, a1, .LBB47_2 2769; RV64-NEXT: # %bb.1: 2770; RV64-NEXT: mv a0, a1 2771; RV64-NEXT: .LBB47_2: 2772; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2773; RV64-NEXT: vsub.vx v24, v8, a2 2774; RV64-NEXT: vnot.v v8, v8 2775; RV64-NEXT: vand.vv v8, v8, v24 2776; RV64-NEXT: vsrl.vi v24, v8, 1 2777; RV64-NEXT: vand.vx v24, v24, a6 2778; RV64-NEXT: vsub.vv v8, v8, v24 2779; RV64-NEXT: vand.vx v24, v8, a5 2780; RV64-NEXT: vsrl.vi v8, v8, 2 2781; RV64-NEXT: vand.vx v8, v8, a5 2782; RV64-NEXT: vadd.vv v8, v24, v8 2783; RV64-NEXT: vsrl.vi v24, v8, 4 2784; RV64-NEXT: vadd.vv v8, v8, v24 2785; RV64-NEXT: vand.vx v8, v8, a3 2786; RV64-NEXT: vmul.vx v8, v8, a4 2787; RV64-NEXT: vsrl.vx v8, v8, a7 2788; RV64-NEXT: ret 2789; 2790; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked: 2791; CHECK-ZVBB: # %bb.0: 2792; CHECK-ZVBB-NEXT: csrr a1, vlenb 2793; CHECK-ZVBB-NEXT: sub a2, a0, a1 2794; CHECK-ZVBB-NEXT: sltu a3, a0, a2 2795; CHECK-ZVBB-NEXT: addi a3, a3, -1 2796; CHECK-ZVBB-NEXT: and a2, a3, a2 2797; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2798; CHECK-ZVBB-NEXT: vctz.v v16, v16 2799; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 2800; CHECK-ZVBB-NEXT: # %bb.1: 2801; CHECK-ZVBB-NEXT: mv a0, a1 2802; CHECK-ZVBB-NEXT: .LBB47_2: 2803; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2804; CHECK-ZVBB-NEXT: vctz.v v8, v8 2805; CHECK-ZVBB-NEXT: ret 2806 %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 false, <vscale x 16 x i1> splat (i1 true), i32 %evl) 2807 ret <vscale x 16 x i64> %v 2808} 2809 2810define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 2811; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8: 2812; CHECK: # %bb.0: 2813; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2814; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 2815; CHECK-NEXT: li a0, 127 2816; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2817; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 2818; CHECK-NEXT: vzext.vf2 v9, v8, v0.t 2819; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t 2820; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 2821; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t 2822; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 2823; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 2824; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 2825; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 2826; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 2827; CHECK-NEXT: ret 2828; 2829; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8: 2830; CHECK-ZVBB: # %bb.0: 2831; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2832; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 2833; CHECK-ZVBB-NEXT: ret 2834 %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl) 2835 ret <vscale x 1 x i8> %v 2836} 2837 2838define <vscale x 1 x i8> @vp_cttz_zero_undef_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { 2839; CHECK-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked: 2840; CHECK: # %bb.0: 2841; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2842; CHECK-NEXT: vrsub.vi v9, v8, 0 2843; CHECK-NEXT: vand.vv v8, v8, v9 2844; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 2845; CHECK-NEXT: vzext.vf2 v9, v8 2846; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 2847; CHECK-NEXT: vnsrl.wi v8, v8, 23 2848; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma 2849; CHECK-NEXT: vnsrl.wi v8, v8, 0 2850; CHECK-NEXT: li a0, 127 2851; CHECK-NEXT: vsub.vx v8, v8, a0 2852; CHECK-NEXT: ret 2853; 2854; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i8_unmasked: 2855; CHECK-ZVBB: # %bb.0: 2856; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2857; CHECK-ZVBB-NEXT: vctz.v v8, v8 2858; CHECK-ZVBB-NEXT: ret 2859 %v = call <vscale x 1 x i8> @llvm.vp.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl) 2860 ret <vscale x 1 x i8> %v 2861} 2862 2863 2864define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 2865; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8: 2866; CHECK: # %bb.0: 2867; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2868; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 2869; CHECK-NEXT: li a0, 127 2870; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2871; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 2872; CHECK-NEXT: vzext.vf2 v9, v8, v0.t 2873; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t 2874; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 2875; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t 2876; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 2877; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 2878; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 2879; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 2880; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 2881; CHECK-NEXT: ret 2882; 2883; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8: 2884; CHECK-ZVBB: # %bb.0: 2885; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2886; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 2887; CHECK-ZVBB-NEXT: ret 2888 %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl) 2889 ret <vscale x 2 x i8> %v 2890} 2891 2892define <vscale x 2 x i8> @vp_cttz_zero_undef_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { 2893; CHECK-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked: 2894; CHECK: # %bb.0: 2895; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2896; CHECK-NEXT: vrsub.vi v9, v8, 0 2897; CHECK-NEXT: vand.vv v8, v8, v9 2898; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 2899; CHECK-NEXT: vzext.vf2 v9, v8 2900; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 2901; CHECK-NEXT: vnsrl.wi v8, v8, 23 2902; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma 2903; CHECK-NEXT: vnsrl.wi v8, v8, 0 2904; CHECK-NEXT: li a0, 127 2905; CHECK-NEXT: vsub.vx v8, v8, a0 2906; CHECK-NEXT: ret 2907; 2908; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i8_unmasked: 2909; CHECK-ZVBB: # %bb.0: 2910; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2911; CHECK-ZVBB-NEXT: vctz.v v8, v8 2912; CHECK-ZVBB-NEXT: ret 2913 %v = call <vscale x 2 x i8> @llvm.vp.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl) 2914 ret <vscale x 2 x i8> %v 2915} 2916 2917 2918define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 2919; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8: 2920; CHECK: # %bb.0: 2921; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2922; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 2923; CHECK-NEXT: li a0, 127 2924; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2925; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2926; CHECK-NEXT: vzext.vf2 v9, v8, v0.t 2927; CHECK-NEXT: vfwcvt.f.xu.v v10, v9, v0.t 2928; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2929; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t 2930; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2931; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t 2932; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 2933; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t 2934; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 2935; CHECK-NEXT: ret 2936; 2937; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8: 2938; CHECK-ZVBB: # %bb.0: 2939; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2940; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 2941; CHECK-ZVBB-NEXT: ret 2942 %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl) 2943 ret <vscale x 4 x i8> %v 2944} 2945 2946define <vscale x 4 x i8> @vp_cttz_zero_undef_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { 2947; CHECK-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked: 2948; CHECK: # %bb.0: 2949; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2950; CHECK-NEXT: vrsub.vi v9, v8, 0 2951; CHECK-NEXT: vand.vv v8, v8, v9 2952; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2953; CHECK-NEXT: vzext.vf2 v9, v8 2954; CHECK-NEXT: vfwcvt.f.xu.v v10, v9 2955; CHECK-NEXT: vnsrl.wi v8, v10, 23 2956; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma 2957; CHECK-NEXT: vnsrl.wi v8, v8, 0 2958; CHECK-NEXT: li a0, 127 2959; CHECK-NEXT: vsub.vx v8, v8, a0 2960; CHECK-NEXT: ret 2961; 2962; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i8_unmasked: 2963; CHECK-ZVBB: # %bb.0: 2964; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2965; CHECK-ZVBB-NEXT: vctz.v v8, v8 2966; CHECK-ZVBB-NEXT: ret 2967 %v = call <vscale x 4 x i8> @llvm.vp.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl) 2968 ret <vscale x 4 x i8> %v 2969} 2970 2971 2972define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 2973; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8: 2974; CHECK: # %bb.0: 2975; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 2976; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 2977; CHECK-NEXT: li a0, 127 2978; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2979; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2980; CHECK-NEXT: vzext.vf2 v10, v8, v0.t 2981; CHECK-NEXT: vfwcvt.f.xu.v v12, v10, v0.t 2982; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2983; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t 2984; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2985; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t 2986; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 2987; CHECK-NEXT: vnsrl.wi v8, v12, 0, v0.t 2988; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 2989; CHECK-NEXT: ret 2990; 2991; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8: 2992; CHECK-ZVBB: # %bb.0: 2993; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 2994; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 2995; CHECK-ZVBB-NEXT: ret 2996 %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl) 2997 ret <vscale x 8 x i8> %v 2998} 2999 3000define <vscale x 8 x i8> @vp_cttz_zero_undef_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { 3001; CHECK-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked: 3002; CHECK: # %bb.0: 3003; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 3004; CHECK-NEXT: vrsub.vi v9, v8, 0 3005; CHECK-NEXT: vand.vv v8, v8, v9 3006; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 3007; CHECK-NEXT: vzext.vf2 v10, v8 3008; CHECK-NEXT: vfwcvt.f.xu.v v12, v10 3009; CHECK-NEXT: vnsrl.wi v8, v12, 23 3010; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma 3011; CHECK-NEXT: vnsrl.wi v10, v8, 0 3012; CHECK-NEXT: li a0, 127 3013; CHECK-NEXT: vsub.vx v8, v10, a0 3014; CHECK-NEXT: ret 3015; 3016; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i8_unmasked: 3017; CHECK-ZVBB: # %bb.0: 3018; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 3019; CHECK-ZVBB-NEXT: vctz.v v8, v8 3020; CHECK-ZVBB-NEXT: ret 3021 %v = call <vscale x 8 x i8> @llvm.vp.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl) 3022 ret <vscale x 8 x i8> %v 3023} 3024 3025 3026define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 3027; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8: 3028; CHECK: # %bb.0: 3029; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 3030; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t 3031; CHECK-NEXT: li a0, 127 3032; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 3033; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 3034; CHECK-NEXT: vzext.vf2 v12, v8, v0.t 3035; CHECK-NEXT: vfwcvt.f.xu.v v16, v12, v0.t 3036; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 3037; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t 3038; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 3039; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t 3040; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 3041; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t 3042; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 3043; CHECK-NEXT: ret 3044; 3045; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8: 3046; CHECK-ZVBB: # %bb.0: 3047; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 3048; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3049; CHECK-ZVBB-NEXT: ret 3050 %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl) 3051 ret <vscale x 16 x i8> %v 3052} 3053 3054define <vscale x 16 x i8> @vp_cttz_zero_undef_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { 3055; CHECK-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked: 3056; CHECK: # %bb.0: 3057; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 3058; CHECK-NEXT: vrsub.vi v10, v8, 0 3059; CHECK-NEXT: vand.vv v8, v8, v10 3060; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 3061; CHECK-NEXT: vzext.vf2 v12, v8 3062; CHECK-NEXT: vfwcvt.f.xu.v v16, v12 3063; CHECK-NEXT: vnsrl.wi v8, v16, 23 3064; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma 3065; CHECK-NEXT: vnsrl.wi v12, v8, 0 3066; CHECK-NEXT: li a0, 127 3067; CHECK-NEXT: vsub.vx v8, v12, a0 3068; CHECK-NEXT: ret 3069; 3070; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i8_unmasked: 3071; CHECK-ZVBB: # %bb.0: 3072; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 3073; CHECK-ZVBB-NEXT: vctz.v v8, v8 3074; CHECK-ZVBB-NEXT: ret 3075 %v = call <vscale x 16 x i8> @llvm.vp.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl) 3076 ret <vscale x 16 x i8> %v 3077} 3078 3079 3080define <vscale x 32 x i8> @vp_cttz_zero_undef_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 3081; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8: 3082; CHECK: # %bb.0: 3083; CHECK-NEXT: li a1, 1 3084; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 3085; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t 3086; CHECK-NEXT: li a0, 85 3087; CHECK-NEXT: vnot.v v8, v8, v0.t 3088; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 3089; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 3090; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 3091; CHECK-NEXT: li a0, 51 3092; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 3093; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 3094; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3095; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3096; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 3097; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 3098; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 3099; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 3100; CHECK-NEXT: ret 3101; 3102; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8: 3103; CHECK-ZVBB: # %bb.0: 3104; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 3105; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3106; CHECK-ZVBB-NEXT: ret 3107 %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl) 3108 ret <vscale x 32 x i8> %v 3109} 3110 3111define <vscale x 32 x i8> @vp_cttz_zero_undef_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { 3112; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked: 3113; CHECK: # %bb.0: 3114; CHECK-NEXT: li a1, 1 3115; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 3116; CHECK-NEXT: vnot.v v12, v8 3117; CHECK-NEXT: vsub.vx v8, v8, a1 3118; CHECK-NEXT: li a0, 85 3119; CHECK-NEXT: vand.vv v8, v12, v8 3120; CHECK-NEXT: vsrl.vi v12, v8, 1 3121; CHECK-NEXT: vand.vx v12, v12, a0 3122; CHECK-NEXT: li a0, 51 3123; CHECK-NEXT: vsub.vv v8, v8, v12 3124; CHECK-NEXT: vand.vx v12, v8, a0 3125; CHECK-NEXT: vsrl.vi v8, v8, 2 3126; CHECK-NEXT: vand.vx v8, v8, a0 3127; CHECK-NEXT: vadd.vv v8, v12, v8 3128; CHECK-NEXT: vsrl.vi v12, v8, 4 3129; CHECK-NEXT: vadd.vv v8, v8, v12 3130; CHECK-NEXT: vand.vi v8, v8, 15 3131; CHECK-NEXT: ret 3132; 3133; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked: 3134; CHECK-ZVBB: # %bb.0: 3135; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 3136; CHECK-ZVBB-NEXT: vctz.v v8, v8 3137; CHECK-ZVBB-NEXT: ret 3138 %v = call <vscale x 32 x i8> @llvm.vp.cttz.nxv32i8(<vscale x 32 x i8> %va, i1 true, <vscale x 32 x i1> splat (i1 true), i32 %evl) 3139 ret <vscale x 32 x i8> %v 3140} 3141 3142 3143define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { 3144; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8: 3145; CHECK: # %bb.0: 3146; CHECK-NEXT: li a1, 1 3147; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 3148; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t 3149; CHECK-NEXT: li a0, 85 3150; CHECK-NEXT: vnot.v v8, v8, v0.t 3151; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 3152; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 3153; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 3154; CHECK-NEXT: li a0, 51 3155; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t 3156; CHECK-NEXT: vand.vx v16, v8, a0, v0.t 3157; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3158; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3159; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t 3160; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 3161; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 3162; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 3163; CHECK-NEXT: ret 3164; 3165; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8: 3166; CHECK-ZVBB: # %bb.0: 3167; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 3168; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3169; CHECK-ZVBB-NEXT: ret 3170 %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> %m, i32 %evl) 3171 ret <vscale x 64 x i8> %v 3172} 3173 3174define <vscale x 64 x i8> @vp_cttz_zero_undef_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { 3175; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked: 3176; CHECK: # %bb.0: 3177; CHECK-NEXT: li a1, 1 3178; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 3179; CHECK-NEXT: vnot.v v16, v8 3180; CHECK-NEXT: vsub.vx v8, v8, a1 3181; CHECK-NEXT: li a0, 85 3182; CHECK-NEXT: vand.vv v8, v16, v8 3183; CHECK-NEXT: vsrl.vi v16, v8, 1 3184; CHECK-NEXT: vand.vx v16, v16, a0 3185; CHECK-NEXT: li a0, 51 3186; CHECK-NEXT: vsub.vv v8, v8, v16 3187; CHECK-NEXT: vand.vx v16, v8, a0 3188; CHECK-NEXT: vsrl.vi v8, v8, 2 3189; CHECK-NEXT: vand.vx v8, v8, a0 3190; CHECK-NEXT: vadd.vv v8, v16, v8 3191; CHECK-NEXT: vsrl.vi v16, v8, 4 3192; CHECK-NEXT: vadd.vv v8, v8, v16 3193; CHECK-NEXT: vand.vi v8, v8, 15 3194; CHECK-NEXT: ret 3195; 3196; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked: 3197; CHECK-ZVBB: # %bb.0: 3198; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 3199; CHECK-ZVBB-NEXT: vctz.v v8, v8 3200; CHECK-ZVBB-NEXT: ret 3201 %v = call <vscale x 64 x i8> @llvm.vp.cttz.nxv64i8(<vscale x 64 x i8> %va, i1 true, <vscale x 64 x i1> splat (i1 true), i32 %evl) 3202 ret <vscale x 64 x i8> %v 3203} 3204 3205 3206define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 3207; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16: 3208; CHECK: # %bb.0: 3209; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3210; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 3211; CHECK-NEXT: li a0, 127 3212; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 3213; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t 3214; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 3215; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t 3216; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 3217; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 3218; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 3219; CHECK-NEXT: ret 3220; 3221; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16: 3222; CHECK-ZVBB: # %bb.0: 3223; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3224; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3225; CHECK-ZVBB-NEXT: ret 3226 %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl) 3227 ret <vscale x 1 x i16> %v 3228} 3229 3230define <vscale x 1 x i16> @vp_cttz_zero_undef_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { 3231; CHECK-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: 3232; CHECK: # %bb.0: 3233; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3234; CHECK-NEXT: vrsub.vi v9, v8, 0 3235; CHECK-NEXT: vand.vv v8, v8, v9 3236; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 3237; CHECK-NEXT: vnsrl.wi v8, v9, 23 3238; CHECK-NEXT: li a0, 127 3239; CHECK-NEXT: vsub.vx v8, v8, a0 3240; CHECK-NEXT: ret 3241; 3242; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i16_unmasked: 3243; CHECK-ZVBB: # %bb.0: 3244; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3245; CHECK-ZVBB-NEXT: vctz.v v8, v8 3246; CHECK-ZVBB-NEXT: ret 3247 %v = call <vscale x 1 x i16> @llvm.vp.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl) 3248 ret <vscale x 1 x i16> %v 3249} 3250 3251 3252define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 3253; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16: 3254; CHECK: # %bb.0: 3255; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 3256; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 3257; CHECK-NEXT: li a0, 127 3258; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 3259; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t 3260; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3261; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t 3262; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 3263; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 3264; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 3265; CHECK-NEXT: ret 3266; 3267; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16: 3268; CHECK-ZVBB: # %bb.0: 3269; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 3270; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3271; CHECK-ZVBB-NEXT: ret 3272 %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl) 3273 ret <vscale x 2 x i16> %v 3274} 3275 3276define <vscale x 2 x i16> @vp_cttz_zero_undef_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { 3277; CHECK-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: 3278; CHECK: # %bb.0: 3279; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 3280; CHECK-NEXT: vrsub.vi v9, v8, 0 3281; CHECK-NEXT: vand.vv v8, v8, v9 3282; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 3283; CHECK-NEXT: vnsrl.wi v8, v9, 23 3284; CHECK-NEXT: li a0, 127 3285; CHECK-NEXT: vsub.vx v8, v8, a0 3286; CHECK-NEXT: ret 3287; 3288; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i16_unmasked: 3289; CHECK-ZVBB: # %bb.0: 3290; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 3291; CHECK-ZVBB-NEXT: vctz.v v8, v8 3292; CHECK-ZVBB-NEXT: ret 3293 %v = call <vscale x 2 x i16> @llvm.vp.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl) 3294 ret <vscale x 2 x i16> %v 3295} 3296 3297 3298define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 3299; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16: 3300; CHECK: # %bb.0: 3301; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 3302; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 3303; CHECK-NEXT: li a0, 127 3304; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 3305; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t 3306; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3307; CHECK-NEXT: vsrl.vi v8, v10, 23, v0.t 3308; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3309; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t 3310; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t 3311; CHECK-NEXT: ret 3312; 3313; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16: 3314; CHECK-ZVBB: # %bb.0: 3315; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 3316; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3317; CHECK-ZVBB-NEXT: ret 3318 %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl) 3319 ret <vscale x 4 x i16> %v 3320} 3321 3322define <vscale x 4 x i16> @vp_cttz_zero_undef_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { 3323; CHECK-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: 3324; CHECK: # %bb.0: 3325; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 3326; CHECK-NEXT: vrsub.vi v9, v8, 0 3327; CHECK-NEXT: vand.vv v8, v8, v9 3328; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 3329; CHECK-NEXT: vnsrl.wi v8, v10, 23 3330; CHECK-NEXT: li a0, 127 3331; CHECK-NEXT: vsub.vx v8, v8, a0 3332; CHECK-NEXT: ret 3333; 3334; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i16_unmasked: 3335; CHECK-ZVBB: # %bb.0: 3336; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 3337; CHECK-ZVBB-NEXT: vctz.v v8, v8 3338; CHECK-ZVBB-NEXT: ret 3339 %v = call <vscale x 4 x i16> @llvm.vp.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl) 3340 ret <vscale x 4 x i16> %v 3341} 3342 3343 3344define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 3345; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16: 3346; CHECK: # %bb.0: 3347; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 3348; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t 3349; CHECK-NEXT: li a0, 127 3350; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 3351; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t 3352; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3353; CHECK-NEXT: vsrl.vi v8, v12, 23, v0.t 3354; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 3355; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t 3356; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t 3357; CHECK-NEXT: ret 3358; 3359; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16: 3360; CHECK-ZVBB: # %bb.0: 3361; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 3362; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3363; CHECK-ZVBB-NEXT: ret 3364 %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl) 3365 ret <vscale x 8 x i16> %v 3366} 3367 3368define <vscale x 8 x i16> @vp_cttz_zero_undef_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { 3369; CHECK-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: 3370; CHECK: # %bb.0: 3371; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 3372; CHECK-NEXT: vrsub.vi v10, v8, 0 3373; CHECK-NEXT: vand.vv v8, v8, v10 3374; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 3375; CHECK-NEXT: vnsrl.wi v8, v12, 23 3376; CHECK-NEXT: li a0, 127 3377; CHECK-NEXT: vsub.vx v8, v8, a0 3378; CHECK-NEXT: ret 3379; 3380; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i16_unmasked: 3381; CHECK-ZVBB: # %bb.0: 3382; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 3383; CHECK-ZVBB-NEXT: vctz.v v8, v8 3384; CHECK-ZVBB-NEXT: ret 3385 %v = call <vscale x 8 x i16> @llvm.vp.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl) 3386 ret <vscale x 8 x i16> %v 3387} 3388 3389 3390define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 3391; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16: 3392; CHECK: # %bb.0: 3393; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 3394; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t 3395; CHECK-NEXT: li a0, 127 3396; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 3397; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t 3398; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma 3399; CHECK-NEXT: vsrl.vi v8, v16, 23, v0.t 3400; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma 3401; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t 3402; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t 3403; CHECK-NEXT: ret 3404; 3405; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16: 3406; CHECK-ZVBB: # %bb.0: 3407; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 3408; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3409; CHECK-ZVBB-NEXT: ret 3410 %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl) 3411 ret <vscale x 16 x i16> %v 3412} 3413 3414define <vscale x 16 x i16> @vp_cttz_zero_undef_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { 3415; CHECK-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: 3416; CHECK: # %bb.0: 3417; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 3418; CHECK-NEXT: vrsub.vi v12, v8, 0 3419; CHECK-NEXT: vand.vv v8, v8, v12 3420; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 3421; CHECK-NEXT: vnsrl.wi v8, v16, 23 3422; CHECK-NEXT: li a0, 127 3423; CHECK-NEXT: vsub.vx v8, v8, a0 3424; CHECK-NEXT: ret 3425; 3426; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i16_unmasked: 3427; CHECK-ZVBB: # %bb.0: 3428; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 3429; CHECK-ZVBB-NEXT: vctz.v v8, v8 3430; CHECK-ZVBB-NEXT: ret 3431 %v = call <vscale x 16 x i16> @llvm.vp.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl) 3432 ret <vscale x 16 x i16> %v 3433} 3434 3435 3436define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 3437; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16: 3438; CHECK: # %bb.0: 3439; CHECK-NEXT: li a1, 1 3440; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3441; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t 3442; CHECK-NEXT: lui a0, 5 3443; CHECK-NEXT: vnot.v v8, v8, v0.t 3444; CHECK-NEXT: addi a0, a0, 1365 3445; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 3446; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 3447; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 3448; CHECK-NEXT: lui a0, 3 3449; CHECK-NEXT: addi a0, a0, 819 3450; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t 3451; CHECK-NEXT: vand.vx v8, v16, a0, v0.t 3452; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t 3453; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 3454; CHECK-NEXT: lui a0, 1 3455; CHECK-NEXT: addi a0, a0, -241 3456; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 3457; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 3458; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 3459; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3460; CHECK-NEXT: li a0, 257 3461; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3462; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 3463; CHECK-NEXT: ret 3464; 3465; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16: 3466; CHECK-ZVBB: # %bb.0: 3467; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3468; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3469; CHECK-ZVBB-NEXT: ret 3470 %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> %m, i32 %evl) 3471 ret <vscale x 32 x i16> %v 3472} 3473 3474define <vscale x 32 x i16> @vp_cttz_zero_undef_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { 3475; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: 3476; CHECK: # %bb.0: 3477; CHECK-NEXT: li a1, 1 3478; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3479; CHECK-NEXT: vnot.v v16, v8 3480; CHECK-NEXT: vsub.vx v8, v8, a1 3481; CHECK-NEXT: lui a0, 5 3482; CHECK-NEXT: addi a0, a0, 1365 3483; CHECK-NEXT: vand.vv v8, v16, v8 3484; CHECK-NEXT: vsrl.vi v16, v8, 1 3485; CHECK-NEXT: vand.vx v16, v16, a0 3486; CHECK-NEXT: lui a0, 3 3487; CHECK-NEXT: addi a0, a0, 819 3488; CHECK-NEXT: vsub.vv v8, v8, v16 3489; CHECK-NEXT: vand.vx v16, v8, a0 3490; CHECK-NEXT: vsrl.vi v8, v8, 2 3491; CHECK-NEXT: vand.vx v8, v8, a0 3492; CHECK-NEXT: lui a0, 1 3493; CHECK-NEXT: addi a0, a0, -241 3494; CHECK-NEXT: vadd.vv v8, v16, v8 3495; CHECK-NEXT: vsrl.vi v16, v8, 4 3496; CHECK-NEXT: vadd.vv v8, v8, v16 3497; CHECK-NEXT: vand.vx v8, v8, a0 3498; CHECK-NEXT: li a0, 257 3499; CHECK-NEXT: vmul.vx v8, v8, a0 3500; CHECK-NEXT: vsrl.vi v8, v8, 8 3501; CHECK-NEXT: ret 3502; 3503; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: 3504; CHECK-ZVBB: # %bb.0: 3505; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 3506; CHECK-ZVBB-NEXT: vctz.v v8, v8 3507; CHECK-ZVBB-NEXT: ret 3508 %v = call <vscale x 32 x i16> @llvm.vp.cttz.nxv32i16(<vscale x 32 x i16> %va, i1 true, <vscale x 32 x i1> splat (i1 true), i32 %evl) 3509 ret <vscale x 32 x i16> %v 3510} 3511 3512 3513define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 3514; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32: 3515; CHECK: # %bb.0: 3516; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 3517; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 3518; CHECK-NEXT: li a0, 52 3519; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 3520; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t 3521; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 3522; CHECK-NEXT: vsrl.vx v8, v9, a0, v0.t 3523; CHECK-NEXT: li a0, 1023 3524; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 3525; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 3526; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 3527; CHECK-NEXT: ret 3528; 3529; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32: 3530; CHECK-ZVBB: # %bb.0: 3531; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 3532; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3533; CHECK-ZVBB-NEXT: ret 3534 %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl) 3535 ret <vscale x 1 x i32> %v 3536} 3537 3538define <vscale x 1 x i32> @vp_cttz_zero_undef_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { 3539; CHECK-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: 3540; CHECK: # %bb.0: 3541; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 3542; CHECK-NEXT: vrsub.vi v9, v8, 0 3543; CHECK-NEXT: li a0, 52 3544; CHECK-NEXT: vand.vv v8, v8, v9 3545; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 3546; CHECK-NEXT: vnsrl.wx v8, v9, a0 3547; CHECK-NEXT: li a0, 1023 3548; CHECK-NEXT: vsub.vx v8, v8, a0 3549; CHECK-NEXT: ret 3550; 3551; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i32_unmasked: 3552; CHECK-ZVBB: # %bb.0: 3553; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 3554; CHECK-ZVBB-NEXT: vctz.v v8, v8 3555; CHECK-ZVBB-NEXT: ret 3556 %v = call <vscale x 1 x i32> @llvm.vp.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl) 3557 ret <vscale x 1 x i32> %v 3558} 3559 3560 3561define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 3562; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32: 3563; CHECK: # %bb.0: 3564; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 3565; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 3566; CHECK-NEXT: li a0, 52 3567; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 3568; CHECK-NEXT: vfwcvt.f.xu.v v10, v8, v0.t 3569; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma 3570; CHECK-NEXT: vsrl.vx v8, v10, a0, v0.t 3571; CHECK-NEXT: li a0, 1023 3572; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 3573; CHECK-NEXT: vnsrl.wi v10, v8, 0, v0.t 3574; CHECK-NEXT: vsub.vx v8, v10, a0, v0.t 3575; CHECK-NEXT: ret 3576; 3577; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32: 3578; CHECK-ZVBB: # %bb.0: 3579; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 3580; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3581; CHECK-ZVBB-NEXT: ret 3582 %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl) 3583 ret <vscale x 2 x i32> %v 3584} 3585 3586define <vscale x 2 x i32> @vp_cttz_zero_undef_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { 3587; CHECK-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: 3588; CHECK: # %bb.0: 3589; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 3590; CHECK-NEXT: vrsub.vi v9, v8, 0 3591; CHECK-NEXT: li a0, 52 3592; CHECK-NEXT: vand.vv v8, v8, v9 3593; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 3594; CHECK-NEXT: vnsrl.wx v8, v10, a0 3595; CHECK-NEXT: li a0, 1023 3596; CHECK-NEXT: vsub.vx v8, v8, a0 3597; CHECK-NEXT: ret 3598; 3599; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i32_unmasked: 3600; CHECK-ZVBB: # %bb.0: 3601; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 3602; CHECK-ZVBB-NEXT: vctz.v v8, v8 3603; CHECK-ZVBB-NEXT: ret 3604 %v = call <vscale x 2 x i32> @llvm.vp.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl) 3605 ret <vscale x 2 x i32> %v 3606} 3607 3608 3609define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 3610; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32: 3611; CHECK: # %bb.0: 3612; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 3613; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t 3614; CHECK-NEXT: li a0, 52 3615; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 3616; CHECK-NEXT: vfwcvt.f.xu.v v12, v8, v0.t 3617; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma 3618; CHECK-NEXT: vsrl.vx v8, v12, a0, v0.t 3619; CHECK-NEXT: li a0, 1023 3620; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3621; CHECK-NEXT: vnsrl.wi v12, v8, 0, v0.t 3622; CHECK-NEXT: vsub.vx v8, v12, a0, v0.t 3623; CHECK-NEXT: ret 3624; 3625; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32: 3626; CHECK-ZVBB: # %bb.0: 3627; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 3628; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3629; CHECK-ZVBB-NEXT: ret 3630 %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl) 3631 ret <vscale x 4 x i32> %v 3632} 3633 3634define <vscale x 4 x i32> @vp_cttz_zero_undef_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { 3635; CHECK-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: 3636; CHECK: # %bb.0: 3637; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 3638; CHECK-NEXT: vrsub.vi v10, v8, 0 3639; CHECK-NEXT: li a0, 52 3640; CHECK-NEXT: vand.vv v8, v8, v10 3641; CHECK-NEXT: vfwcvt.f.xu.v v12, v8 3642; CHECK-NEXT: vnsrl.wx v8, v12, a0 3643; CHECK-NEXT: li a0, 1023 3644; CHECK-NEXT: vsub.vx v8, v8, a0 3645; CHECK-NEXT: ret 3646; 3647; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i32_unmasked: 3648; CHECK-ZVBB: # %bb.0: 3649; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 3650; CHECK-ZVBB-NEXT: vctz.v v8, v8 3651; CHECK-ZVBB-NEXT: ret 3652 %v = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl) 3653 ret <vscale x 4 x i32> %v 3654} 3655 3656 3657define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 3658; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32: 3659; CHECK: # %bb.0: 3660; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 3661; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t 3662; CHECK-NEXT: li a0, 52 3663; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 3664; CHECK-NEXT: vfwcvt.f.xu.v v16, v8, v0.t 3665; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma 3666; CHECK-NEXT: vsrl.vx v8, v16, a0, v0.t 3667; CHECK-NEXT: li a0, 1023 3668; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 3669; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t 3670; CHECK-NEXT: vsub.vx v8, v16, a0, v0.t 3671; CHECK-NEXT: ret 3672; 3673; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32: 3674; CHECK-ZVBB: # %bb.0: 3675; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 3676; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3677; CHECK-ZVBB-NEXT: ret 3678 %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl) 3679 ret <vscale x 8 x i32> %v 3680} 3681 3682define <vscale x 8 x i32> @vp_cttz_zero_undef_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { 3683; CHECK-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: 3684; CHECK: # %bb.0: 3685; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 3686; CHECK-NEXT: vrsub.vi v12, v8, 0 3687; CHECK-NEXT: li a0, 52 3688; CHECK-NEXT: vand.vv v8, v8, v12 3689; CHECK-NEXT: vfwcvt.f.xu.v v16, v8 3690; CHECK-NEXT: vnsrl.wx v8, v16, a0 3691; CHECK-NEXT: li a0, 1023 3692; CHECK-NEXT: vsub.vx v8, v8, a0 3693; CHECK-NEXT: ret 3694; 3695; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i32_unmasked: 3696; CHECK-ZVBB: # %bb.0: 3697; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 3698; CHECK-ZVBB-NEXT: vctz.v v8, v8 3699; CHECK-ZVBB-NEXT: ret 3700 %v = call <vscale x 8 x i32> @llvm.vp.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl) 3701 ret <vscale x 8 x i32> %v 3702} 3703 3704 3705define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 3706; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32: 3707; CHECK: # %bb.0: 3708; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 3709; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t 3710; CHECK-NEXT: fsrmi a0, 1 3711; CHECK-NEXT: li a1, 127 3712; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 3713; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 3714; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t 3715; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3716; CHECK-NEXT: fsrm a0 3717; CHECK-NEXT: ret 3718; 3719; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32: 3720; CHECK-ZVBB: # %bb.0: 3721; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 3722; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3723; CHECK-ZVBB-NEXT: ret 3724 %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl) 3725 ret <vscale x 16 x i32> %v 3726} 3727 3728define <vscale x 16 x i32> @vp_cttz_zero_undef_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { 3729; CHECK-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: 3730; CHECK: # %bb.0: 3731; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 3732; CHECK-NEXT: vrsub.vi v16, v8, 0 3733; CHECK-NEXT: fsrmi a0, 1 3734; CHECK-NEXT: vand.vv v8, v8, v16 3735; CHECK-NEXT: vfcvt.f.xu.v v8, v8 3736; CHECK-NEXT: vsrl.vi v8, v8, 23 3737; CHECK-NEXT: li a1, 127 3738; CHECK-NEXT: vsub.vx v8, v8, a1 3739; CHECK-NEXT: fsrm a0 3740; CHECK-NEXT: ret 3741; 3742; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32_unmasked: 3743; CHECK-ZVBB: # %bb.0: 3744; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 3745; CHECK-ZVBB-NEXT: vctz.v v8, v8 3746; CHECK-ZVBB-NEXT: ret 3747 %v = call <vscale x 16 x i32> @llvm.vp.cttz.nxv16i32(<vscale x 16 x i32> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl) 3748 ret <vscale x 16 x i32> %v 3749} 3750 3751 3752define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 3753; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64: 3754; CHECK: # %bb.0: 3755; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3756; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 3757; CHECK-NEXT: fsrmi a0, 1 3758; CHECK-NEXT: li a1, 52 3759; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 3760; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 3761; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t 3762; CHECK-NEXT: li a1, 1023 3763; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3764; CHECK-NEXT: fsrm a0 3765; CHECK-NEXT: ret 3766; 3767; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64: 3768; CHECK-ZVBB: # %bb.0: 3769; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3770; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3771; CHECK-ZVBB-NEXT: ret 3772 %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl) 3773 ret <vscale x 1 x i64> %v 3774} 3775 3776define <vscale x 1 x i64> @vp_cttz_zero_undef_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { 3777; CHECK-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: 3778; CHECK: # %bb.0: 3779; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3780; CHECK-NEXT: vrsub.vi v9, v8, 0 3781; CHECK-NEXT: fsrmi a0, 1 3782; CHECK-NEXT: li a1, 52 3783; CHECK-NEXT: vand.vv v8, v8, v9 3784; CHECK-NEXT: vfcvt.f.xu.v v8, v8 3785; CHECK-NEXT: vsrl.vx v8, v8, a1 3786; CHECK-NEXT: li a1, 1023 3787; CHECK-NEXT: vsub.vx v8, v8, a1 3788; CHECK-NEXT: fsrm a0 3789; CHECK-NEXT: ret 3790; 3791; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv1i64_unmasked: 3792; CHECK-ZVBB: # %bb.0: 3793; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3794; CHECK-ZVBB-NEXT: vctz.v v8, v8 3795; CHECK-ZVBB-NEXT: ret 3796 %v = call <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64> %va, i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl) 3797 ret <vscale x 1 x i64> %v 3798} 3799 3800 3801define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 3802; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64: 3803; CHECK: # %bb.0: 3804; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3805; CHECK-NEXT: vrsub.vi v10, v8, 0, v0.t 3806; CHECK-NEXT: fsrmi a0, 1 3807; CHECK-NEXT: li a1, 52 3808; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 3809; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 3810; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t 3811; CHECK-NEXT: li a1, 1023 3812; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3813; CHECK-NEXT: fsrm a0 3814; CHECK-NEXT: ret 3815; 3816; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64: 3817; CHECK-ZVBB: # %bb.0: 3818; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3819; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3820; CHECK-ZVBB-NEXT: ret 3821 %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> %m, i32 %evl) 3822 ret <vscale x 2 x i64> %v 3823} 3824 3825define <vscale x 2 x i64> @vp_cttz_zero_undef_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { 3826; CHECK-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: 3827; CHECK: # %bb.0: 3828; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3829; CHECK-NEXT: vrsub.vi v10, v8, 0 3830; CHECK-NEXT: fsrmi a0, 1 3831; CHECK-NEXT: li a1, 52 3832; CHECK-NEXT: vand.vv v8, v8, v10 3833; CHECK-NEXT: vfcvt.f.xu.v v8, v8 3834; CHECK-NEXT: vsrl.vx v8, v8, a1 3835; CHECK-NEXT: li a1, 1023 3836; CHECK-NEXT: vsub.vx v8, v8, a1 3837; CHECK-NEXT: fsrm a0 3838; CHECK-NEXT: ret 3839; 3840; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv2i64_unmasked: 3841; CHECK-ZVBB: # %bb.0: 3842; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3843; CHECK-ZVBB-NEXT: vctz.v v8, v8 3844; CHECK-ZVBB-NEXT: ret 3845 %v = call <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64> %va, i1 true, <vscale x 2 x i1> splat (i1 true), i32 %evl) 3846 ret <vscale x 2 x i64> %v 3847} 3848 3849 3850define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 3851; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64: 3852; CHECK: # %bb.0: 3853; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3854; CHECK-NEXT: vrsub.vi v12, v8, 0, v0.t 3855; CHECK-NEXT: fsrmi a0, 1 3856; CHECK-NEXT: li a1, 52 3857; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 3858; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 3859; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t 3860; CHECK-NEXT: li a1, 1023 3861; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3862; CHECK-NEXT: fsrm a0 3863; CHECK-NEXT: ret 3864; 3865; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64: 3866; CHECK-ZVBB: # %bb.0: 3867; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3868; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3869; CHECK-ZVBB-NEXT: ret 3870 %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> %m, i32 %evl) 3871 ret <vscale x 4 x i64> %v 3872} 3873 3874define <vscale x 4 x i64> @vp_cttz_zero_undef_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { 3875; CHECK-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: 3876; CHECK: # %bb.0: 3877; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3878; CHECK-NEXT: vrsub.vi v12, v8, 0 3879; CHECK-NEXT: fsrmi a0, 1 3880; CHECK-NEXT: li a1, 52 3881; CHECK-NEXT: vand.vv v8, v8, v12 3882; CHECK-NEXT: vfcvt.f.xu.v v8, v8 3883; CHECK-NEXT: vsrl.vx v8, v8, a1 3884; CHECK-NEXT: li a1, 1023 3885; CHECK-NEXT: vsub.vx v8, v8, a1 3886; CHECK-NEXT: fsrm a0 3887; CHECK-NEXT: ret 3888; 3889; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv4i64_unmasked: 3890; CHECK-ZVBB: # %bb.0: 3891; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3892; CHECK-ZVBB-NEXT: vctz.v v8, v8 3893; CHECK-ZVBB-NEXT: ret 3894 %v = call <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64> %va, i1 true, <vscale x 4 x i1> splat (i1 true), i32 %evl) 3895 ret <vscale x 4 x i64> %v 3896} 3897 3898 3899define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 3900; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64: 3901; CHECK: # %bb.0: 3902; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3903; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t 3904; CHECK-NEXT: fsrmi a0, 1 3905; CHECK-NEXT: li a1, 52 3906; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 3907; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 3908; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t 3909; CHECK-NEXT: li a1, 1023 3910; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3911; CHECK-NEXT: fsrm a0 3912; CHECK-NEXT: ret 3913; 3914; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64: 3915; CHECK-ZVBB: # %bb.0: 3916; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3917; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3918; CHECK-ZVBB-NEXT: ret 3919 %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> %m, i32 %evl) 3920 ret <vscale x 7 x i64> %v 3921} 3922 3923define <vscale x 7 x i64> @vp_cttz_zero_undef_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) { 3924; CHECK-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: 3925; CHECK: # %bb.0: 3926; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3927; CHECK-NEXT: vrsub.vi v16, v8, 0 3928; CHECK-NEXT: fsrmi a0, 1 3929; CHECK-NEXT: li a1, 52 3930; CHECK-NEXT: vand.vv v8, v8, v16 3931; CHECK-NEXT: vfcvt.f.xu.v v8, v8 3932; CHECK-NEXT: vsrl.vx v8, v8, a1 3933; CHECK-NEXT: li a1, 1023 3934; CHECK-NEXT: vsub.vx v8, v8, a1 3935; CHECK-NEXT: fsrm a0 3936; CHECK-NEXT: ret 3937; 3938; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv7i64_unmasked: 3939; CHECK-ZVBB: # %bb.0: 3940; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3941; CHECK-ZVBB-NEXT: vctz.v v8, v8 3942; CHECK-ZVBB-NEXT: ret 3943 %v = call <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64> %va, i1 true, <vscale x 7 x i1> splat (i1 true), i32 %evl) 3944 ret <vscale x 7 x i64> %v 3945} 3946 3947 3948define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 3949; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64: 3950; CHECK: # %bb.0: 3951; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3952; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t 3953; CHECK-NEXT: fsrmi a0, 1 3954; CHECK-NEXT: li a1, 52 3955; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 3956; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 3957; CHECK-NEXT: vsrl.vx v8, v8, a1, v0.t 3958; CHECK-NEXT: li a1, 1023 3959; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t 3960; CHECK-NEXT: fsrm a0 3961; CHECK-NEXT: ret 3962; 3963; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64: 3964; CHECK-ZVBB: # %bb.0: 3965; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3966; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 3967; CHECK-ZVBB-NEXT: ret 3968 %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> %m, i32 %evl) 3969 ret <vscale x 8 x i64> %v 3970} 3971 3972define <vscale x 8 x i64> @vp_cttz_zero_undef_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { 3973; CHECK-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: 3974; CHECK: # %bb.0: 3975; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3976; CHECK-NEXT: vrsub.vi v16, v8, 0 3977; CHECK-NEXT: fsrmi a0, 1 3978; CHECK-NEXT: li a1, 52 3979; CHECK-NEXT: vand.vv v8, v8, v16 3980; CHECK-NEXT: vfcvt.f.xu.v v8, v8 3981; CHECK-NEXT: vsrl.vx v8, v8, a1 3982; CHECK-NEXT: li a1, 1023 3983; CHECK-NEXT: vsub.vx v8, v8, a1 3984; CHECK-NEXT: fsrm a0 3985; CHECK-NEXT: ret 3986; 3987; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv8i64_unmasked: 3988; CHECK-ZVBB: # %bb.0: 3989; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3990; CHECK-ZVBB-NEXT: vctz.v v8, v8 3991; CHECK-ZVBB-NEXT: ret 3992 %v = call <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64> %va, i1 true, <vscale x 8 x i1> splat (i1 true), i32 %evl) 3993 ret <vscale x 8 x i64> %v 3994} 3995 3996define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 3997; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64: 3998; CHECK: # %bb.0: 3999; CHECK-NEXT: addi sp, sp, -16 4000; CHECK-NEXT: .cfi_def_cfa_offset 16 4001; CHECK-NEXT: csrr a1, vlenb 4002; CHECK-NEXT: slli a1, a1, 4 4003; CHECK-NEXT: sub sp, sp, a1 4004; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 4005; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 4006; CHECK-NEXT: vmv1r.v v24, v0 4007; CHECK-NEXT: csrr a1, vlenb 4008; CHECK-NEXT: slli a1, a1, 3 4009; CHECK-NEXT: add a1, sp, a1 4010; CHECK-NEXT: addi a1, a1, 16 4011; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 4012; CHECK-NEXT: csrr a1, vlenb 4013; CHECK-NEXT: fsrmi a3, 1 4014; CHECK-NEXT: srli a2, a1, 3 4015; CHECK-NEXT: sub a4, a0, a1 4016; CHECK-NEXT: vslidedown.vx v0, v0, a2 4017; CHECK-NEXT: sltu a2, a0, a4 4018; CHECK-NEXT: addi a2, a2, -1 4019; CHECK-NEXT: and a4, a2, a4 4020; CHECK-NEXT: li a2, 52 4021; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma 4022; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t 4023; CHECK-NEXT: vand.vv v8, v16, v8, v0.t 4024; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 4025; CHECK-NEXT: fsrm a3 4026; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t 4027; CHECK-NEXT: li a3, 1023 4028; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t 4029; CHECK-NEXT: addi a4, sp, 16 4030; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 4031; CHECK-NEXT: bltu a0, a1, .LBB94_2 4032; CHECK-NEXT: # %bb.1: 4033; CHECK-NEXT: mv a0, a1 4034; CHECK-NEXT: .LBB94_2: 4035; CHECK-NEXT: vmv1r.v v0, v24 4036; CHECK-NEXT: slli a1, a1, 3 4037; CHECK-NEXT: add a1, sp, a1 4038; CHECK-NEXT: addi a1, a1, 16 4039; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 4040; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4041; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t 4042; CHECK-NEXT: fsrmi a0, 1 4043; CHECK-NEXT: vand.vv v8, v8, v16, v0.t 4044; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t 4045; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t 4046; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t 4047; CHECK-NEXT: fsrm a0 4048; CHECK-NEXT: addi a0, sp, 16 4049; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4050; CHECK-NEXT: csrr a0, vlenb 4051; CHECK-NEXT: slli a0, a0, 4 4052; CHECK-NEXT: add sp, sp, a0 4053; CHECK-NEXT: .cfi_def_cfa sp, 16 4054; CHECK-NEXT: addi sp, sp, 16 4055; CHECK-NEXT: .cfi_def_cfa_offset 0 4056; CHECK-NEXT: ret 4057; 4058; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64: 4059; CHECK-ZVBB: # %bb.0: 4060; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 4061; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 4062; CHECK-ZVBB-NEXT: csrr a1, vlenb 4063; CHECK-ZVBB-NEXT: srli a2, a1, 3 4064; CHECK-ZVBB-NEXT: sub a3, a0, a1 4065; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 4066; CHECK-ZVBB-NEXT: sltu a2, a0, a3 4067; CHECK-ZVBB-NEXT: addi a2, a2, -1 4068; CHECK-ZVBB-NEXT: and a2, a2, a3 4069; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4070; CHECK-ZVBB-NEXT: vctz.v v16, v16, v0.t 4071; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB94_2 4072; CHECK-ZVBB-NEXT: # %bb.1: 4073; CHECK-ZVBB-NEXT: mv a0, a1 4074; CHECK-ZVBB-NEXT: .LBB94_2: 4075; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 4076; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4077; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 4078; CHECK-ZVBB-NEXT: ret 4079 %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl) 4080 ret <vscale x 16 x i64> %v 4081} 4082 4083define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) { 4084; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: 4085; CHECK: # %bb.0: 4086; CHECK-NEXT: csrr a1, vlenb 4087; CHECK-NEXT: fsrmi a3, 1 4088; CHECK-NEXT: sub a2, a0, a1 4089; CHECK-NEXT: sltu a4, a0, a2 4090; CHECK-NEXT: addi a4, a4, -1 4091; CHECK-NEXT: and a4, a4, a2 4092; CHECK-NEXT: li a2, 52 4093; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma 4094; CHECK-NEXT: vrsub.vi v24, v16, 0 4095; CHECK-NEXT: vand.vv v16, v16, v24 4096; CHECK-NEXT: vfcvt.f.xu.v v16, v16 4097; CHECK-NEXT: fsrm a3 4098; CHECK-NEXT: vsrl.vx v16, v16, a2 4099; CHECK-NEXT: li a3, 1023 4100; CHECK-NEXT: vsub.vx v16, v16, a3 4101; CHECK-NEXT: bltu a0, a1, .LBB95_2 4102; CHECK-NEXT: # %bb.1: 4103; CHECK-NEXT: mv a0, a1 4104; CHECK-NEXT: .LBB95_2: 4105; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4106; CHECK-NEXT: vrsub.vi v24, v8, 0 4107; CHECK-NEXT: vand.vv v8, v8, v24 4108; CHECK-NEXT: fsrmi a0, 1 4109; CHECK-NEXT: vfcvt.f.xu.v v8, v8 4110; CHECK-NEXT: vsrl.vx v8, v8, a2 4111; CHECK-NEXT: vsub.vx v8, v8, a3 4112; CHECK-NEXT: fsrm a0 4113; CHECK-NEXT: ret 4114; 4115; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: 4116; CHECK-ZVBB: # %bb.0: 4117; CHECK-ZVBB-NEXT: csrr a1, vlenb 4118; CHECK-ZVBB-NEXT: sub a2, a0, a1 4119; CHECK-ZVBB-NEXT: sltu a3, a0, a2 4120; CHECK-ZVBB-NEXT: addi a3, a3, -1 4121; CHECK-ZVBB-NEXT: and a2, a3, a2 4122; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4123; CHECK-ZVBB-NEXT: vctz.v v16, v16 4124; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB95_2 4125; CHECK-ZVBB-NEXT: # %bb.1: 4126; CHECK-ZVBB-NEXT: mv a0, a1 4127; CHECK-ZVBB-NEXT: .LBB95_2: 4128; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4129; CHECK-ZVBB-NEXT: vctz.v v8, v8 4130; CHECK-ZVBB-NEXT: ret 4131 %v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> splat (i1 true), i32 %evl) 4132 ret <vscale x 16 x i64> %v 4133} 4134 4135; Test promotion. 4136declare <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9>, i1 immarg, <vscale x 1 x i1>, i32) 4137define <vscale x 1 x i9> @vp_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 4138; CHECK-LABEL: vp_cttz_nxv1i9: 4139; CHECK: # %bb.0: 4140; CHECK-NEXT: li a1, 512 4141; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 4142; CHECK-NEXT: vor.vx v8, v8, a1, v0.t 4143; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 4144; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 4145; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t 4146; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 4147; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t 4148; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 4149; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 4150; CHECK-NEXT: li a0, 127 4151; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 4152; CHECK-NEXT: ret 4153; 4154; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9: 4155; CHECK-ZVBB: # %bb.0: 4156; CHECK-ZVBB-NEXT: li a1, 512 4157; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 4158; CHECK-ZVBB-NEXT: vor.vx v8, v8, a1, v0.t 4159; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 4160; CHECK-ZVBB-NEXT: ret 4161 %v = call <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl) 4162 ret <vscale x 1 x i9> %v 4163} 4164define <vscale x 1 x i9> @vp_zero_undef_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 4165; CHECK-LABEL: vp_zero_undef_cttz_nxv1i9: 4166; CHECK: # %bb.0: 4167; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 4168; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t 4169; CHECK-NEXT: li a0, 127 4170; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 4171; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t 4172; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 4173; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t 4174; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma 4175; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t 4176; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t 4177; CHECK-NEXT: ret 4178; 4179; CHECK-ZVBB-LABEL: vp_zero_undef_cttz_nxv1i9: 4180; CHECK-ZVBB: # %bb.0: 4181; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 4182; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t 4183; CHECK-ZVBB-NEXT: ret 4184 %v = call <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl) 4185 ret <vscale x 1 x i9> %v 4186} 4187