1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) 8 9define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vp_cttz_v2i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: li a1, 1 13; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 14; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 15; CHECK-NEXT: li a0, 85 16; CHECK-NEXT: vnot.v v8, v8, v0.t 17; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 18; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 19; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 20; CHECK-NEXT: li a0, 51 21; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 22; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 23; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 24; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 25; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 26; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 27; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 28; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 29; CHECK-NEXT: ret 30 %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl) 31 ret <2 x i8> %v 32} 33 34define <2 x i8> @vp_cttz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { 35; CHECK-LABEL: vp_cttz_v2i8_unmasked: 36; CHECK: # %bb.0: 37; CHECK-NEXT: li a1, 1 38; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 39; CHECK-NEXT: vnot.v v9, v8 40; CHECK-NEXT: vsub.vx v8, v8, a1 41; CHECK-NEXT: li a0, 85 42; CHECK-NEXT: vand.vv v8, v9, v8 43; CHECK-NEXT: vsrl.vi v9, v8, 1 44; CHECK-NEXT: vand.vx v9, v9, a0 45; CHECK-NEXT: li a0, 51 46; CHECK-NEXT: vsub.vv v8, v8, v9 47; CHECK-NEXT: vand.vx v9, v8, a0 48; CHECK-NEXT: vsrl.vi v8, v8, 2 49; CHECK-NEXT: vand.vx v8, v8, a0 50; CHECK-NEXT: vadd.vv v8, v9, v8 51; CHECK-NEXT: vsrl.vi v9, v8, 4 52; CHECK-NEXT: vadd.vv v8, v8, v9 53; CHECK-NEXT: vand.vi v8, v8, 15 54; CHECK-NEXT: ret 55 %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 56 ret <2 x i8> %v 57} 58 59declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32) 60 61define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { 62; CHECK-LABEL: vp_cttz_v4i8: 63; CHECK: # %bb.0: 64; CHECK-NEXT: li a1, 1 65; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 66; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 67; CHECK-NEXT: li a0, 85 68; CHECK-NEXT: vnot.v v8, v8, v0.t 69; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 70; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 71; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 72; CHECK-NEXT: li a0, 51 73; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 74; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 75; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 76; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 77; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 78; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 79; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 80; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 81; CHECK-NEXT: ret 82 %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl) 83 ret <4 x i8> %v 84} 85 86define <4 x i8> @vp_cttz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { 87; CHECK-LABEL: vp_cttz_v4i8_unmasked: 88; CHECK: # %bb.0: 89; CHECK-NEXT: li a1, 1 90; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 91; CHECK-NEXT: vnot.v v9, v8 92; CHECK-NEXT: vsub.vx v8, v8, a1 93; CHECK-NEXT: li a0, 85 94; CHECK-NEXT: vand.vv v8, v9, v8 95; CHECK-NEXT: vsrl.vi v9, v8, 1 96; CHECK-NEXT: vand.vx v9, v9, a0 97; CHECK-NEXT: li a0, 51 98; CHECK-NEXT: vsub.vv v8, v8, v9 99; CHECK-NEXT: vand.vx v9, v8, a0 100; CHECK-NEXT: vsrl.vi v8, v8, 2 101; CHECK-NEXT: vand.vx v8, v8, a0 102; CHECK-NEXT: vadd.vv v8, v9, v8 103; CHECK-NEXT: vsrl.vi v9, v8, 4 104; CHECK-NEXT: vadd.vv v8, v8, v9 105; CHECK-NEXT: vand.vi v8, v8, 15 106; CHECK-NEXT: ret 107 %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 108 ret <4 x i8> %v 109} 110 111declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32) 112 113define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { 114; CHECK-LABEL: vp_cttz_v8i8: 115; CHECK: # %bb.0: 116; CHECK-NEXT: li a1, 1 117; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 118; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 119; CHECK-NEXT: li a0, 85 120; CHECK-NEXT: vnot.v v8, v8, v0.t 121; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 122; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 123; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 124; CHECK-NEXT: li a0, 51 125; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 126; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 127; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 128; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 129; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 130; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 131; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 132; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 133; CHECK-NEXT: ret 134 %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl) 135 ret <8 x i8> %v 136} 137 138define <8 x i8> @vp_cttz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { 139; CHECK-LABEL: vp_cttz_v8i8_unmasked: 140; CHECK: # %bb.0: 141; CHECK-NEXT: li a1, 1 142; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 143; CHECK-NEXT: vnot.v v9, v8 144; CHECK-NEXT: vsub.vx v8, v8, a1 145; CHECK-NEXT: li a0, 85 146; CHECK-NEXT: vand.vv v8, v9, v8 147; CHECK-NEXT: vsrl.vi v9, v8, 1 148; CHECK-NEXT: vand.vx v9, v9, a0 149; CHECK-NEXT: li a0, 51 150; CHECK-NEXT: vsub.vv v8, v8, v9 151; CHECK-NEXT: vand.vx v9, v8, a0 152; CHECK-NEXT: vsrl.vi v8, v8, 2 153; CHECK-NEXT: vand.vx v8, v8, a0 154; CHECK-NEXT: vadd.vv v8, v9, v8 155; CHECK-NEXT: vsrl.vi v9, v8, 4 156; CHECK-NEXT: vadd.vv v8, v8, v9 157; CHECK-NEXT: vand.vi v8, v8, 15 158; CHECK-NEXT: ret 159 %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 160 ret <8 x i8> %v 161} 162 163declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32) 164 165define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { 166; CHECK-LABEL: vp_cttz_v16i8: 167; CHECK: # %bb.0: 168; CHECK-NEXT: li a1, 1 169; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 170; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 171; CHECK-NEXT: li a0, 85 172; CHECK-NEXT: vnot.v v8, v8, v0.t 173; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 174; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 175; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 176; CHECK-NEXT: li a0, 51 177; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 178; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 179; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 180; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 181; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 182; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 183; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 184; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 185; CHECK-NEXT: ret 186 %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl) 187 ret <16 x i8> %v 188} 189 190define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { 191; CHECK-LABEL: vp_cttz_v16i8_unmasked: 192; CHECK: # %bb.0: 193; CHECK-NEXT: li a1, 1 194; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 195; CHECK-NEXT: vnot.v v9, v8 196; CHECK-NEXT: vsub.vx v8, v8, a1 197; CHECK-NEXT: li a0, 85 198; CHECK-NEXT: vand.vv v8, v9, v8 199; CHECK-NEXT: vsrl.vi v9, v8, 1 200; CHECK-NEXT: vand.vx v9, v9, a0 201; CHECK-NEXT: li a0, 51 202; CHECK-NEXT: vsub.vv v8, v8, v9 203; CHECK-NEXT: vand.vx v9, v8, a0 204; CHECK-NEXT: vsrl.vi v8, v8, 2 205; CHECK-NEXT: vand.vx v8, v8, a0 206; CHECK-NEXT: vadd.vv v8, v9, v8 207; CHECK-NEXT: vsrl.vi v9, v8, 4 208; CHECK-NEXT: vadd.vv v8, v8, v9 209; CHECK-NEXT: vand.vi v8, v8, 15 210; CHECK-NEXT: ret 211 %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 212 ret <16 x i8> %v 213} 214 215declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32) 216 217define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { 218; CHECK-LABEL: vp_cttz_v2i16: 219; CHECK: # %bb.0: 220; CHECK-NEXT: li a1, 1 221; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 222; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 223; CHECK-NEXT: lui a0, 5 224; CHECK-NEXT: vnot.v v8, v8, v0.t 225; CHECK-NEXT: addi a0, a0, 1365 226; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 227; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 228; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 229; CHECK-NEXT: lui a0, 3 230; CHECK-NEXT: addi a0, a0, 819 231; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 232; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 233; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 234; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 235; CHECK-NEXT: lui a0, 1 236; CHECK-NEXT: addi a0, a0, -241 237; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 238; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 239; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 240; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 241; CHECK-NEXT: li a0, 257 242; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 243; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 244; CHECK-NEXT: ret 245 %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) 246 ret <2 x i16> %v 247} 248 249define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { 250; CHECK-LABEL: vp_cttz_v2i16_unmasked: 251; CHECK: # %bb.0: 252; CHECK-NEXT: li a1, 1 253; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 254; CHECK-NEXT: vnot.v v9, v8 255; CHECK-NEXT: vsub.vx v8, v8, a1 256; CHECK-NEXT: lui a0, 5 257; CHECK-NEXT: addi a0, a0, 1365 258; CHECK-NEXT: vand.vv v8, v9, v8 259; CHECK-NEXT: vsrl.vi v9, v8, 1 260; CHECK-NEXT: vand.vx v9, v9, a0 261; CHECK-NEXT: lui a0, 3 262; CHECK-NEXT: addi a0, a0, 819 263; CHECK-NEXT: vsub.vv v8, v8, v9 264; CHECK-NEXT: vand.vx v9, v8, a0 265; CHECK-NEXT: vsrl.vi v8, v8, 2 266; CHECK-NEXT: vand.vx v8, v8, a0 267; CHECK-NEXT: lui a0, 1 268; CHECK-NEXT: addi a0, a0, -241 269; CHECK-NEXT: vadd.vv v8, v9, v8 270; CHECK-NEXT: vsrl.vi v9, v8, 4 271; CHECK-NEXT: vadd.vv v8, v8, v9 272; CHECK-NEXT: vand.vx v8, v8, a0 273; CHECK-NEXT: li a0, 257 274; CHECK-NEXT: vmul.vx v8, v8, a0 275; CHECK-NEXT: vsrl.vi v8, v8, 8 276; CHECK-NEXT: ret 277 %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 278 ret <2 x i16> %v 279} 280 281declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32) 282 283define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { 284; CHECK-LABEL: vp_cttz_v4i16: 285; CHECK: # %bb.0: 286; CHECK-NEXT: li a1, 1 287; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 288; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 289; CHECK-NEXT: lui a0, 5 290; CHECK-NEXT: vnot.v v8, v8, v0.t 291; CHECK-NEXT: addi a0, a0, 1365 292; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 293; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 294; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 295; CHECK-NEXT: lui a0, 3 296; CHECK-NEXT: addi a0, a0, 819 297; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 298; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 299; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 300; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 301; CHECK-NEXT: lui a0, 1 302; CHECK-NEXT: addi a0, a0, -241 303; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 304; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 305; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 306; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 307; CHECK-NEXT: li a0, 257 308; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 309; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 310; CHECK-NEXT: ret 311 %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) 312 ret <4 x i16> %v 313} 314 315define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { 316; CHECK-LABEL: vp_cttz_v4i16_unmasked: 317; CHECK: # %bb.0: 318; CHECK-NEXT: li a1, 1 319; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 320; CHECK-NEXT: vnot.v v9, v8 321; CHECK-NEXT: vsub.vx v8, v8, a1 322; CHECK-NEXT: lui a0, 5 323; CHECK-NEXT: addi a0, a0, 1365 324; CHECK-NEXT: vand.vv v8, v9, v8 325; CHECK-NEXT: vsrl.vi v9, v8, 1 326; CHECK-NEXT: vand.vx v9, v9, a0 327; CHECK-NEXT: lui a0, 3 328; CHECK-NEXT: addi a0, a0, 819 329; CHECK-NEXT: vsub.vv v8, v8, v9 330; CHECK-NEXT: vand.vx v9, v8, a0 331; CHECK-NEXT: vsrl.vi v8, v8, 2 332; CHECK-NEXT: vand.vx v8, v8, a0 333; CHECK-NEXT: lui a0, 1 334; CHECK-NEXT: addi a0, a0, -241 335; CHECK-NEXT: vadd.vv v8, v9, v8 336; CHECK-NEXT: vsrl.vi v9, v8, 4 337; CHECK-NEXT: vadd.vv v8, v8, v9 338; CHECK-NEXT: vand.vx v8, v8, a0 339; CHECK-NEXT: li a0, 257 340; CHECK-NEXT: vmul.vx v8, v8, a0 341; CHECK-NEXT: vsrl.vi v8, v8, 8 342; CHECK-NEXT: ret 343 %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 344 ret <4 x i16> %v 345} 346 347declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32) 348 349define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { 350; CHECK-LABEL: vp_cttz_v8i16: 351; CHECK: # %bb.0: 352; CHECK-NEXT: li a1, 1 353; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 354; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 355; CHECK-NEXT: lui a0, 5 356; CHECK-NEXT: vnot.v v8, v8, v0.t 357; CHECK-NEXT: addi a0, a0, 1365 358; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 359; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 360; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 361; CHECK-NEXT: lui a0, 3 362; CHECK-NEXT: addi a0, a0, 819 363; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 364; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 365; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 366; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 367; CHECK-NEXT: lui a0, 1 368; CHECK-NEXT: addi a0, a0, -241 369; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 370; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 371; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 372; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 373; CHECK-NEXT: li a0, 257 374; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 375; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 376; CHECK-NEXT: ret 377 %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) 378 ret <8 x i16> %v 379} 380 381define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { 382; CHECK-LABEL: vp_cttz_v8i16_unmasked: 383; CHECK: # %bb.0: 384; CHECK-NEXT: li a1, 1 385; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 386; CHECK-NEXT: vnot.v v9, v8 387; CHECK-NEXT: vsub.vx v8, v8, a1 388; CHECK-NEXT: lui a0, 5 389; CHECK-NEXT: addi a0, a0, 1365 390; CHECK-NEXT: vand.vv v8, v9, v8 391; CHECK-NEXT: vsrl.vi v9, v8, 1 392; CHECK-NEXT: vand.vx v9, v9, a0 393; CHECK-NEXT: lui a0, 3 394; CHECK-NEXT: addi a0, a0, 819 395; CHECK-NEXT: vsub.vv v8, v8, v9 396; CHECK-NEXT: vand.vx v9, v8, a0 397; CHECK-NEXT: vsrl.vi v8, v8, 2 398; CHECK-NEXT: vand.vx v8, v8, a0 399; CHECK-NEXT: lui a0, 1 400; CHECK-NEXT: addi a0, a0, -241 401; CHECK-NEXT: vadd.vv v8, v9, v8 402; CHECK-NEXT: vsrl.vi v9, v8, 4 403; CHECK-NEXT: vadd.vv v8, v8, v9 404; CHECK-NEXT: vand.vx v8, v8, a0 405; CHECK-NEXT: li a0, 257 406; CHECK-NEXT: vmul.vx v8, v8, a0 407; CHECK-NEXT: vsrl.vi v8, v8, 8 408; CHECK-NEXT: ret 409 %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 410 ret <8 x i16> %v 411} 412 413declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32) 414 415define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { 416; CHECK-LABEL: vp_cttz_v16i16: 417; CHECK: # %bb.0: 418; CHECK-NEXT: li a1, 1 419; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 420; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 421; CHECK-NEXT: lui a0, 5 422; CHECK-NEXT: vnot.v v8, v8, v0.t 423; CHECK-NEXT: addi a0, a0, 1365 424; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 425; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 426; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 427; CHECK-NEXT: lui a0, 3 428; CHECK-NEXT: addi a0, a0, 819 429; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 430; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 431; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 432; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 433; CHECK-NEXT: lui a0, 1 434; CHECK-NEXT: addi a0, a0, -241 435; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 436; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 437; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 438; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 439; CHECK-NEXT: li a0, 257 440; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 441; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 442; CHECK-NEXT: ret 443 %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) 444 ret <16 x i16> %v 445} 446 447define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { 448; CHECK-LABEL: vp_cttz_v16i16_unmasked: 449; CHECK: # %bb.0: 450; CHECK-NEXT: li a1, 1 451; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 452; CHECK-NEXT: vnot.v v10, v8 453; CHECK-NEXT: vsub.vx v8, v8, a1 454; CHECK-NEXT: lui a0, 5 455; CHECK-NEXT: addi a0, a0, 1365 456; CHECK-NEXT: vand.vv v8, v10, v8 457; CHECK-NEXT: vsrl.vi v10, v8, 1 458; CHECK-NEXT: vand.vx v10, v10, a0 459; CHECK-NEXT: lui a0, 3 460; CHECK-NEXT: addi a0, a0, 819 461; CHECK-NEXT: vsub.vv v8, v8, v10 462; CHECK-NEXT: vand.vx v10, v8, a0 463; CHECK-NEXT: vsrl.vi v8, v8, 2 464; CHECK-NEXT: vand.vx v8, v8, a0 465; CHECK-NEXT: lui a0, 1 466; CHECK-NEXT: addi a0, a0, -241 467; CHECK-NEXT: vadd.vv v8, v10, v8 468; CHECK-NEXT: vsrl.vi v10, v8, 4 469; CHECK-NEXT: vadd.vv v8, v8, v10 470; CHECK-NEXT: vand.vx v8, v8, a0 471; CHECK-NEXT: li a0, 257 472; CHECK-NEXT: vmul.vx v8, v8, a0 473; CHECK-NEXT: vsrl.vi v8, v8, 8 474; CHECK-NEXT: ret 475 %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 476 ret <16 x i16> %v 477} 478 479declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32) 480 481define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { 482; CHECK-LABEL: vp_cttz_v2i32: 483; CHECK: # %bb.0: 484; CHECK-NEXT: li a1, 1 485; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 486; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 487; CHECK-NEXT: lui a0, 349525 488; CHECK-NEXT: vnot.v v8, v8, v0.t 489; CHECK-NEXT: addi a0, a0, 1365 490; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 491; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 492; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 493; CHECK-NEXT: lui a0, 209715 494; CHECK-NEXT: addi a0, a0, 819 495; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 496; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 497; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 498; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 499; CHECK-NEXT: lui a0, 61681 500; CHECK-NEXT: addi a0, a0, -241 501; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 502; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 503; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 504; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 505; CHECK-NEXT: lui a0, 4112 506; CHECK-NEXT: addi a0, a0, 257 507; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 508; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 509; CHECK-NEXT: ret 510 %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) 511 ret <2 x i32> %v 512} 513 514define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { 515; CHECK-LABEL: vp_cttz_v2i32_unmasked: 516; CHECK: # %bb.0: 517; CHECK-NEXT: li a1, 1 518; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 519; CHECK-NEXT: vnot.v v9, v8 520; CHECK-NEXT: vsub.vx v8, v8, a1 521; CHECK-NEXT: lui a0, 349525 522; CHECK-NEXT: addi a0, a0, 1365 523; CHECK-NEXT: vand.vv v8, v9, v8 524; CHECK-NEXT: vsrl.vi v9, v8, 1 525; CHECK-NEXT: vand.vx v9, v9, a0 526; CHECK-NEXT: lui a0, 209715 527; CHECK-NEXT: addi a0, a0, 819 528; CHECK-NEXT: vsub.vv v8, v8, v9 529; CHECK-NEXT: vand.vx v9, v8, a0 530; CHECK-NEXT: vsrl.vi v8, v8, 2 531; CHECK-NEXT: vand.vx v8, v8, a0 532; CHECK-NEXT: lui a0, 61681 533; CHECK-NEXT: addi a0, a0, -241 534; CHECK-NEXT: vadd.vv v8, v9, v8 535; CHECK-NEXT: vsrl.vi v9, v8, 4 536; CHECK-NEXT: vadd.vv v8, v8, v9 537; CHECK-NEXT: vand.vx v8, v8, a0 538; CHECK-NEXT: lui a0, 4112 539; CHECK-NEXT: addi a0, a0, 257 540; CHECK-NEXT: vmul.vx v8, v8, a0 541; CHECK-NEXT: vsrl.vi v8, v8, 24 542; CHECK-NEXT: ret 543 %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 544 ret <2 x i32> %v 545} 546 547declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32) 548 549define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { 550; CHECK-LABEL: vp_cttz_v4i32: 551; CHECK: # %bb.0: 552; CHECK-NEXT: li a1, 1 553; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 554; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 555; CHECK-NEXT: lui a0, 349525 556; CHECK-NEXT: vnot.v v8, v8, v0.t 557; CHECK-NEXT: addi a0, a0, 1365 558; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 559; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 560; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 561; CHECK-NEXT: lui a0, 209715 562; CHECK-NEXT: addi a0, a0, 819 563; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 564; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 565; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 566; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 567; CHECK-NEXT: lui a0, 61681 568; CHECK-NEXT: addi a0, a0, -241 569; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 570; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 571; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 572; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 573; CHECK-NEXT: lui a0, 4112 574; CHECK-NEXT: addi a0, a0, 257 575; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 576; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 577; CHECK-NEXT: ret 578 %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) 579 ret <4 x i32> %v 580} 581 582define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { 583; CHECK-LABEL: vp_cttz_v4i32_unmasked: 584; CHECK: # %bb.0: 585; CHECK-NEXT: li a1, 1 586; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 587; CHECK-NEXT: vnot.v v9, v8 588; CHECK-NEXT: vsub.vx v8, v8, a1 589; CHECK-NEXT: lui a0, 349525 590; CHECK-NEXT: addi a0, a0, 1365 591; CHECK-NEXT: vand.vv v8, v9, v8 592; CHECK-NEXT: vsrl.vi v9, v8, 1 593; CHECK-NEXT: vand.vx v9, v9, a0 594; CHECK-NEXT: lui a0, 209715 595; CHECK-NEXT: addi a0, a0, 819 596; CHECK-NEXT: vsub.vv v8, v8, v9 597; CHECK-NEXT: vand.vx v9, v8, a0 598; CHECK-NEXT: vsrl.vi v8, v8, 2 599; CHECK-NEXT: vand.vx v8, v8, a0 600; CHECK-NEXT: lui a0, 61681 601; CHECK-NEXT: addi a0, a0, -241 602; CHECK-NEXT: vadd.vv v8, v9, v8 603; CHECK-NEXT: vsrl.vi v9, v8, 4 604; CHECK-NEXT: vadd.vv v8, v8, v9 605; CHECK-NEXT: vand.vx v8, v8, a0 606; CHECK-NEXT: lui a0, 4112 607; CHECK-NEXT: addi a0, a0, 257 608; CHECK-NEXT: vmul.vx v8, v8, a0 609; CHECK-NEXT: vsrl.vi v8, v8, 24 610; CHECK-NEXT: ret 611 %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 612 ret <4 x i32> %v 613} 614 615declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) 616 617define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { 618; CHECK-LABEL: vp_cttz_v8i32: 619; CHECK: # %bb.0: 620; CHECK-NEXT: li a1, 1 621; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 622; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 623; CHECK-NEXT: lui a0, 349525 624; CHECK-NEXT: vnot.v v8, v8, v0.t 625; CHECK-NEXT: addi a0, a0, 1365 626; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 627; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 628; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 629; CHECK-NEXT: lui a0, 209715 630; CHECK-NEXT: addi a0, a0, 819 631; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 632; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 633; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 634; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 635; CHECK-NEXT: lui a0, 61681 636; CHECK-NEXT: addi a0, a0, -241 637; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 638; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 639; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 640; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 641; CHECK-NEXT: lui a0, 4112 642; CHECK-NEXT: addi a0, a0, 257 643; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 644; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 645; CHECK-NEXT: ret 646 %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) 647 ret <8 x i32> %v 648} 649 650define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { 651; CHECK-LABEL: vp_cttz_v8i32_unmasked: 652; CHECK: # %bb.0: 653; CHECK-NEXT: li a1, 1 654; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 655; CHECK-NEXT: vnot.v v10, v8 656; CHECK-NEXT: vsub.vx v8, v8, a1 657; CHECK-NEXT: lui a0, 349525 658; CHECK-NEXT: addi a0, a0, 1365 659; CHECK-NEXT: vand.vv v8, v10, v8 660; CHECK-NEXT: vsrl.vi v10, v8, 1 661; CHECK-NEXT: vand.vx v10, v10, a0 662; CHECK-NEXT: lui a0, 209715 663; CHECK-NEXT: addi a0, a0, 819 664; CHECK-NEXT: vsub.vv v8, v8, v10 665; CHECK-NEXT: vand.vx v10, v8, a0 666; CHECK-NEXT: vsrl.vi v8, v8, 2 667; CHECK-NEXT: vand.vx v8, v8, a0 668; CHECK-NEXT: lui a0, 61681 669; CHECK-NEXT: addi a0, a0, -241 670; CHECK-NEXT: vadd.vv v8, v10, v8 671; CHECK-NEXT: vsrl.vi v10, v8, 4 672; CHECK-NEXT: vadd.vv v8, v8, v10 673; CHECK-NEXT: vand.vx v8, v8, a0 674; CHECK-NEXT: lui a0, 4112 675; CHECK-NEXT: addi a0, a0, 257 676; CHECK-NEXT: vmul.vx v8, v8, a0 677; CHECK-NEXT: vsrl.vi v8, v8, 24 678; CHECK-NEXT: ret 679 %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 680 ret <8 x i32> %v 681} 682 683declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) 684 685define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { 686; CHECK-LABEL: vp_cttz_v16i32: 687; CHECK: # %bb.0: 688; CHECK-NEXT: li a1, 1 689; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 690; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t 691; CHECK-NEXT: lui a0, 349525 692; CHECK-NEXT: vnot.v v8, v8, v0.t 693; CHECK-NEXT: addi a0, a0, 1365 694; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 695; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 696; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 697; CHECK-NEXT: lui a0, 209715 698; CHECK-NEXT: addi a0, a0, 819 699; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 700; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 701; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 702; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 703; CHECK-NEXT: lui a0, 61681 704; CHECK-NEXT: addi a0, a0, -241 705; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 706; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 707; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 708; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 709; CHECK-NEXT: lui a0, 4112 710; CHECK-NEXT: addi a0, a0, 257 711; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 712; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 713; CHECK-NEXT: ret 714 %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) 715 ret <16 x i32> %v 716} 717 718define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { 719; CHECK-LABEL: vp_cttz_v16i32_unmasked: 720; CHECK: # %bb.0: 721; CHECK-NEXT: li a1, 1 722; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 723; CHECK-NEXT: vnot.v v12, v8 724; CHECK-NEXT: vsub.vx v8, v8, a1 725; CHECK-NEXT: lui a0, 349525 726; CHECK-NEXT: addi a0, a0, 1365 727; CHECK-NEXT: vand.vv v8, v12, v8 728; CHECK-NEXT: vsrl.vi v12, v8, 1 729; CHECK-NEXT: vand.vx v12, v12, a0 730; CHECK-NEXT: lui a0, 209715 731; CHECK-NEXT: addi a0, a0, 819 732; CHECK-NEXT: vsub.vv v8, v8, v12 733; CHECK-NEXT: vand.vx v12, v8, a0 734; CHECK-NEXT: vsrl.vi v8, v8, 2 735; CHECK-NEXT: vand.vx v8, v8, a0 736; CHECK-NEXT: lui a0, 61681 737; CHECK-NEXT: addi a0, a0, -241 738; CHECK-NEXT: vadd.vv v8, v12, v8 739; CHECK-NEXT: vsrl.vi v12, v8, 4 740; CHECK-NEXT: vadd.vv v8, v8, v12 741; CHECK-NEXT: vand.vx v8, v8, a0 742; CHECK-NEXT: lui a0, 4112 743; CHECK-NEXT: addi a0, a0, 257 744; CHECK-NEXT: vmul.vx v8, v8, a0 745; CHECK-NEXT: vsrl.vi v8, v8, 24 746; CHECK-NEXT: ret 747 %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 748 ret <16 x i32> %v 749} 750 751declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32) 752 753define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { 754; RV32-LABEL: vp_cttz_v2i64: 755; RV32: # %bb.0: 756; RV32-NEXT: li a1, 1 757; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 758; RV32-NEXT: vsub.vx v9, v8, a1, v0.t 759; RV32-NEXT: lui a1, 349525 760; RV32-NEXT: addi a1, a1, 1365 761; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 762; RV32-NEXT: vmv.v.x v10, a1 763; RV32-NEXT: lui a1, 209715 764; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 765; RV32-NEXT: vnot.v v8, v8, v0.t 766; RV32-NEXT: addi a1, a1, 819 767; RV32-NEXT: vand.vv v8, v8, v9, v0.t 768; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 769; RV32-NEXT: vand.vv v9, v9, v10, v0.t 770; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 771; RV32-NEXT: vmv.v.x v10, a1 772; RV32-NEXT: lui a1, 61681 773; RV32-NEXT: addi a1, a1, -241 774; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 775; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 776; RV32-NEXT: vand.vv v9, v8, v10, v0.t 777; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 778; RV32-NEXT: vand.vv v8, v8, v10, v0.t 779; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 780; RV32-NEXT: vmv.v.x v10, a1 781; RV32-NEXT: lui a1, 4112 782; RV32-NEXT: addi a1, a1, 257 783; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 784; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 785; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 786; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 787; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 788; RV32-NEXT: vmv.v.x v9, a1 789; RV32-NEXT: li a1, 56 790; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 791; RV32-NEXT: vand.vv v8, v8, v10, v0.t 792; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 793; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 794; RV32-NEXT: ret 795; 796; RV64-LABEL: vp_cttz_v2i64: 797; RV64: # %bb.0: 798; RV64-NEXT: li a1, 1 799; RV64-NEXT: lui a2, 349525 800; RV64-NEXT: lui a3, 209715 801; RV64-NEXT: lui a4, 61681 802; RV64-NEXT: lui a5, 4112 803; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 804; RV64-NEXT: vsub.vx v9, v8, a1, v0.t 805; RV64-NEXT: addiw a0, a2, 1365 806; RV64-NEXT: addiw a1, a3, 819 807; RV64-NEXT: addiw a2, a4, -241 808; RV64-NEXT: addiw a3, a5, 257 809; RV64-NEXT: slli a4, a0, 32 810; RV64-NEXT: add a0, a0, a4 811; RV64-NEXT: slli a4, a1, 32 812; RV64-NEXT: add a1, a1, a4 813; RV64-NEXT: slli a4, a2, 32 814; RV64-NEXT: add a2, a2, a4 815; RV64-NEXT: slli a4, a3, 32 816; RV64-NEXT: add a3, a3, a4 817; RV64-NEXT: vnot.v v8, v8, v0.t 818; RV64-NEXT: vand.vv v8, v8, v9, v0.t 819; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 820; RV64-NEXT: vand.vx v9, v9, a0, v0.t 821; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 822; RV64-NEXT: vand.vx v9, v8, a1, v0.t 823; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 824; RV64-NEXT: vand.vx v8, v8, a1, v0.t 825; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 826; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 827; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 828; RV64-NEXT: vand.vx v8, v8, a2, v0.t 829; RV64-NEXT: li a0, 56 830; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 831; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 832; RV64-NEXT: ret 833 %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) 834 ret <2 x i64> %v 835} 836 837define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { 838; RV32-LABEL: vp_cttz_v2i64_unmasked: 839; RV32: # %bb.0: 840; RV32-NEXT: li a1, 1 841; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 842; RV32-NEXT: vnot.v v9, v8 843; RV32-NEXT: vsub.vx v8, v8, a1 844; RV32-NEXT: lui a1, 349525 845; RV32-NEXT: addi a1, a1, 1365 846; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 847; RV32-NEXT: vmv.v.x v10, a1 848; RV32-NEXT: lui a1, 209715 849; RV32-NEXT: addi a1, a1, 819 850; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 851; RV32-NEXT: vand.vv v8, v9, v8 852; RV32-NEXT: vsrl.vi v9, v8, 1 853; RV32-NEXT: vand.vv v9, v9, v10 854; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 855; RV32-NEXT: vmv.v.x v10, a1 856; RV32-NEXT: lui a1, 61681 857; RV32-NEXT: addi a1, a1, -241 858; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 859; RV32-NEXT: vsub.vv v8, v8, v9 860; RV32-NEXT: vand.vv v9, v8, v10 861; RV32-NEXT: vsrl.vi v8, v8, 2 862; RV32-NEXT: vand.vv v8, v8, v10 863; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 864; RV32-NEXT: vmv.v.x v10, a1 865; RV32-NEXT: lui a1, 4112 866; RV32-NEXT: addi a1, a1, 257 867; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 868; RV32-NEXT: vadd.vv v8, v9, v8 869; RV32-NEXT: vsrl.vi v9, v8, 4 870; RV32-NEXT: vadd.vv v8, v8, v9 871; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 872; RV32-NEXT: vmv.v.x v9, a1 873; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 874; RV32-NEXT: vand.vv v8, v8, v10 875; RV32-NEXT: vmul.vv v8, v8, v9 876; RV32-NEXT: li a0, 56 877; RV32-NEXT: vsrl.vx v8, v8, a0 878; RV32-NEXT: ret 879; 880; RV64-LABEL: vp_cttz_v2i64_unmasked: 881; RV64: # %bb.0: 882; RV64-NEXT: li a1, 1 883; RV64-NEXT: lui a2, 349525 884; RV64-NEXT: lui a3, 209715 885; RV64-NEXT: lui a4, 61681 886; RV64-NEXT: lui a5, 4112 887; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 888; RV64-NEXT: vsub.vx v9, v8, a1 889; RV64-NEXT: addiw a0, a2, 1365 890; RV64-NEXT: addiw a1, a3, 819 891; RV64-NEXT: addiw a2, a4, -241 892; RV64-NEXT: addiw a3, a5, 257 893; RV64-NEXT: slli a4, a0, 32 894; RV64-NEXT: add a0, a0, a4 895; RV64-NEXT: slli a4, a1, 32 896; RV64-NEXT: add a1, a1, a4 897; RV64-NEXT: slli a4, a2, 32 898; RV64-NEXT: add a2, a2, a4 899; RV64-NEXT: slli a4, a3, 32 900; RV64-NEXT: add a3, a3, a4 901; RV64-NEXT: vnot.v v8, v8 902; RV64-NEXT: vand.vv v8, v8, v9 903; RV64-NEXT: vsrl.vi v9, v8, 1 904; RV64-NEXT: vand.vx v9, v9, a0 905; RV64-NEXT: vsub.vv v8, v8, v9 906; RV64-NEXT: vand.vx v9, v8, a1 907; RV64-NEXT: vsrl.vi v8, v8, 2 908; RV64-NEXT: vand.vx v8, v8, a1 909; RV64-NEXT: vadd.vv v8, v9, v8 910; RV64-NEXT: vsrl.vi v9, v8, 4 911; RV64-NEXT: vadd.vv v8, v8, v9 912; RV64-NEXT: vand.vx v8, v8, a2 913; RV64-NEXT: vmul.vx v8, v8, a3 914; RV64-NEXT: li a0, 56 915; RV64-NEXT: vsrl.vx v8, v8, a0 916; RV64-NEXT: ret 917 %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 918 ret <2 x i64> %v 919} 920 921declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32) 922 923define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { 924; RV32-LABEL: vp_cttz_v4i64: 925; RV32: # %bb.0: 926; RV32-NEXT: li a1, 1 927; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 928; RV32-NEXT: vsub.vx v10, v8, a1, v0.t 929; RV32-NEXT: lui a1, 349525 930; RV32-NEXT: addi a1, a1, 1365 931; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 932; RV32-NEXT: vmv.v.x v12, a1 933; RV32-NEXT: lui a1, 209715 934; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 935; RV32-NEXT: vnot.v v8, v8, v0.t 936; RV32-NEXT: addi a1, a1, 819 937; RV32-NEXT: vand.vv v8, v8, v10, v0.t 938; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 939; RV32-NEXT: vand.vv v10, v10, v12, v0.t 940; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 941; RV32-NEXT: vmv.v.x v12, a1 942; RV32-NEXT: lui a1, 61681 943; RV32-NEXT: addi a1, a1, -241 944; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 945; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 946; RV32-NEXT: vand.vv v10, v8, v12, v0.t 947; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 948; RV32-NEXT: vand.vv v8, v8, v12, v0.t 949; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 950; RV32-NEXT: vmv.v.x v12, a1 951; RV32-NEXT: lui a1, 4112 952; RV32-NEXT: addi a1, a1, 257 953; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 954; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 955; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 956; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 957; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 958; RV32-NEXT: vmv.v.x v10, a1 959; RV32-NEXT: li a1, 56 960; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 961; RV32-NEXT: vand.vv v8, v8, v12, v0.t 962; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 963; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 964; RV32-NEXT: ret 965; 966; RV64-LABEL: vp_cttz_v4i64: 967; RV64: # %bb.0: 968; RV64-NEXT: li a1, 1 969; RV64-NEXT: lui a2, 349525 970; RV64-NEXT: lui a3, 209715 971; RV64-NEXT: lui a4, 61681 972; RV64-NEXT: lui a5, 4112 973; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 974; RV64-NEXT: vsub.vx v10, v8, a1, v0.t 975; RV64-NEXT: addiw a0, a2, 1365 976; RV64-NEXT: addiw a1, a3, 819 977; RV64-NEXT: addiw a2, a4, -241 978; RV64-NEXT: addiw a3, a5, 257 979; RV64-NEXT: slli a4, a0, 32 980; RV64-NEXT: add a0, a0, a4 981; RV64-NEXT: slli a4, a1, 32 982; RV64-NEXT: add a1, a1, a4 983; RV64-NEXT: slli a4, a2, 32 984; RV64-NEXT: add a2, a2, a4 985; RV64-NEXT: slli a4, a3, 32 986; RV64-NEXT: add a3, a3, a4 987; RV64-NEXT: vnot.v v8, v8, v0.t 988; RV64-NEXT: vand.vv v8, v8, v10, v0.t 989; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 990; RV64-NEXT: vand.vx v10, v10, a0, v0.t 991; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 992; RV64-NEXT: vand.vx v10, v8, a1, v0.t 993; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 994; RV64-NEXT: vand.vx v8, v8, a1, v0.t 995; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 996; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 997; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 998; RV64-NEXT: vand.vx v8, v8, a2, v0.t 999; RV64-NEXT: li a0, 56 1000; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1001; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1002; RV64-NEXT: ret 1003 %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) 1004 ret <4 x i64> %v 1005} 1006 1007define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { 1008; RV32-LABEL: vp_cttz_v4i64_unmasked: 1009; RV32: # %bb.0: 1010; RV32-NEXT: li a1, 1 1011; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1012; RV32-NEXT: vnot.v v10, v8 1013; RV32-NEXT: vsub.vx v8, v8, a1 1014; RV32-NEXT: lui a1, 349525 1015; RV32-NEXT: addi a1, a1, 1365 1016; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1017; RV32-NEXT: vmv.v.x v12, a1 1018; RV32-NEXT: lui a1, 209715 1019; RV32-NEXT: addi a1, a1, 819 1020; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1021; RV32-NEXT: vand.vv v8, v10, v8 1022; RV32-NEXT: vsrl.vi v10, v8, 1 1023; RV32-NEXT: vand.vv v10, v10, v12 1024; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1025; RV32-NEXT: vmv.v.x v12, a1 1026; RV32-NEXT: lui a1, 61681 1027; RV32-NEXT: addi a1, a1, -241 1028; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1029; RV32-NEXT: vsub.vv v8, v8, v10 1030; RV32-NEXT: vand.vv v10, v8, v12 1031; RV32-NEXT: vsrl.vi v8, v8, 2 1032; RV32-NEXT: vand.vv v8, v8, v12 1033; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1034; RV32-NEXT: vmv.v.x v12, a1 1035; RV32-NEXT: lui a1, 4112 1036; RV32-NEXT: addi a1, a1, 257 1037; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1038; RV32-NEXT: vadd.vv v8, v10, v8 1039; RV32-NEXT: vsrl.vi v10, v8, 4 1040; RV32-NEXT: vadd.vv v8, v8, v10 1041; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1042; RV32-NEXT: vmv.v.x v10, a1 1043; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1044; RV32-NEXT: vand.vv v8, v8, v12 1045; RV32-NEXT: vmul.vv v8, v8, v10 1046; RV32-NEXT: li a0, 56 1047; RV32-NEXT: vsrl.vx v8, v8, a0 1048; RV32-NEXT: ret 1049; 1050; RV64-LABEL: vp_cttz_v4i64_unmasked: 1051; RV64: # %bb.0: 1052; RV64-NEXT: li a1, 1 1053; RV64-NEXT: lui a2, 349525 1054; RV64-NEXT: lui a3, 209715 1055; RV64-NEXT: lui a4, 61681 1056; RV64-NEXT: lui a5, 4112 1057; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1058; RV64-NEXT: vsub.vx v10, v8, a1 1059; RV64-NEXT: addiw a0, a2, 1365 1060; RV64-NEXT: addiw a1, a3, 819 1061; RV64-NEXT: addiw a2, a4, -241 1062; RV64-NEXT: addiw a3, a5, 257 1063; RV64-NEXT: slli a4, a0, 32 1064; RV64-NEXT: add a0, a0, a4 1065; RV64-NEXT: slli a4, a1, 32 1066; RV64-NEXT: add a1, a1, a4 1067; RV64-NEXT: slli a4, a2, 32 1068; RV64-NEXT: add a2, a2, a4 1069; RV64-NEXT: slli a4, a3, 32 1070; RV64-NEXT: add a3, a3, a4 1071; RV64-NEXT: vnot.v v8, v8 1072; RV64-NEXT: vand.vv v8, v8, v10 1073; RV64-NEXT: vsrl.vi v10, v8, 1 1074; RV64-NEXT: vand.vx v10, v10, a0 1075; RV64-NEXT: vsub.vv v8, v8, v10 1076; RV64-NEXT: vand.vx v10, v8, a1 1077; RV64-NEXT: vsrl.vi v8, v8, 2 1078; RV64-NEXT: vand.vx v8, v8, a1 1079; RV64-NEXT: vadd.vv v8, v10, v8 1080; RV64-NEXT: vsrl.vi v10, v8, 4 1081; RV64-NEXT: vadd.vv v8, v8, v10 1082; RV64-NEXT: vand.vx v8, v8, a2 1083; RV64-NEXT: vmul.vx v8, v8, a3 1084; RV64-NEXT: li a0, 56 1085; RV64-NEXT: vsrl.vx v8, v8, a0 1086; RV64-NEXT: ret 1087 %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 1088 ret <4 x i64> %v 1089} 1090 1091declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32) 1092 1093define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { 1094; RV32-LABEL: vp_cttz_v8i64: 1095; RV32: # %bb.0: 1096; RV32-NEXT: li a1, 1 1097; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1098; RV32-NEXT: vsub.vx v12, v8, a1, v0.t 1099; RV32-NEXT: lui a1, 349525 1100; RV32-NEXT: addi a1, a1, 1365 1101; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1102; RV32-NEXT: vmv.v.x v16, a1 1103; RV32-NEXT: lui a1, 209715 1104; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1105; RV32-NEXT: vnot.v v8, v8, v0.t 1106; RV32-NEXT: addi a1, a1, 819 1107; RV32-NEXT: vand.vv v12, v8, v12, v0.t 1108; RV32-NEXT: vsrl.vi v8, v12, 1, v0.t 1109; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1110; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1111; RV32-NEXT: vmv.v.x v8, a1 1112; RV32-NEXT: lui a1, 61681 1113; RV32-NEXT: addi a1, a1, -241 1114; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1115; RV32-NEXT: vsub.vv v12, v12, v16, v0.t 1116; RV32-NEXT: vand.vv v16, v12, v8, v0.t 1117; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t 1118; RV32-NEXT: vand.vv v8, v12, v8, v0.t 1119; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1120; RV32-NEXT: vmv.v.x v12, a1 1121; RV32-NEXT: lui a1, 4112 1122; RV32-NEXT: addi a1, a1, 257 1123; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1124; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 1125; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1126; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1127; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1128; RV32-NEXT: vmv.v.x v16, a1 1129; RV32-NEXT: li a1, 56 1130; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1131; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1132; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1133; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1134; RV32-NEXT: ret 1135; 1136; RV64-LABEL: vp_cttz_v8i64: 1137; RV64: # %bb.0: 1138; RV64-NEXT: li a1, 1 1139; RV64-NEXT: lui a2, 349525 1140; RV64-NEXT: lui a3, 209715 1141; RV64-NEXT: lui a4, 61681 1142; RV64-NEXT: lui a5, 4112 1143; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1144; RV64-NEXT: vsub.vx v12, v8, a1, v0.t 1145; RV64-NEXT: addiw a0, a2, 1365 1146; RV64-NEXT: addiw a1, a3, 819 1147; RV64-NEXT: addiw a2, a4, -241 1148; RV64-NEXT: addiw a3, a5, 257 1149; RV64-NEXT: slli a4, a0, 32 1150; RV64-NEXT: add a0, a0, a4 1151; RV64-NEXT: slli a4, a1, 32 1152; RV64-NEXT: add a1, a1, a4 1153; RV64-NEXT: slli a4, a2, 32 1154; RV64-NEXT: add a2, a2, a4 1155; RV64-NEXT: slli a4, a3, 32 1156; RV64-NEXT: add a3, a3, a4 1157; RV64-NEXT: vnot.v v8, v8, v0.t 1158; RV64-NEXT: vand.vv v8, v8, v12, v0.t 1159; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1160; RV64-NEXT: vand.vx v12, v12, a0, v0.t 1161; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 1162; RV64-NEXT: vand.vx v12, v8, a1, v0.t 1163; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1164; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1165; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 1166; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1167; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 1168; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1169; RV64-NEXT: li a0, 56 1170; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1171; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1172; RV64-NEXT: ret 1173 %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) 1174 ret <8 x i64> %v 1175} 1176 1177define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { 1178; RV32-LABEL: vp_cttz_v8i64_unmasked: 1179; RV32: # %bb.0: 1180; RV32-NEXT: li a1, 1 1181; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1182; RV32-NEXT: vnot.v v12, v8 1183; RV32-NEXT: vsub.vx v8, v8, a1 1184; RV32-NEXT: lui a1, 349525 1185; RV32-NEXT: addi a1, a1, 1365 1186; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1187; RV32-NEXT: vmv.v.x v16, a1 1188; RV32-NEXT: lui a1, 209715 1189; RV32-NEXT: addi a1, a1, 819 1190; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1191; RV32-NEXT: vand.vv v8, v12, v8 1192; RV32-NEXT: vsrl.vi v12, v8, 1 1193; RV32-NEXT: vand.vv v12, v12, v16 1194; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1195; RV32-NEXT: vmv.v.x v16, a1 1196; RV32-NEXT: lui a1, 61681 1197; RV32-NEXT: addi a1, a1, -241 1198; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1199; RV32-NEXT: vsub.vv v8, v8, v12 1200; RV32-NEXT: vand.vv v12, v8, v16 1201; RV32-NEXT: vsrl.vi v8, v8, 2 1202; RV32-NEXT: vand.vv v8, v8, v16 1203; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1204; RV32-NEXT: vmv.v.x v16, a1 1205; RV32-NEXT: lui a1, 4112 1206; RV32-NEXT: addi a1, a1, 257 1207; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1208; RV32-NEXT: vadd.vv v8, v12, v8 1209; RV32-NEXT: vsrl.vi v12, v8, 4 1210; RV32-NEXT: vadd.vv v8, v8, v12 1211; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1212; RV32-NEXT: vmv.v.x v12, a1 1213; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1214; RV32-NEXT: vand.vv v8, v8, v16 1215; RV32-NEXT: vmul.vv v8, v8, v12 1216; RV32-NEXT: li a0, 56 1217; RV32-NEXT: vsrl.vx v8, v8, a0 1218; RV32-NEXT: ret 1219; 1220; RV64-LABEL: vp_cttz_v8i64_unmasked: 1221; RV64: # %bb.0: 1222; RV64-NEXT: li a1, 1 1223; RV64-NEXT: lui a2, 349525 1224; RV64-NEXT: lui a3, 209715 1225; RV64-NEXT: lui a4, 61681 1226; RV64-NEXT: lui a5, 4112 1227; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1228; RV64-NEXT: vsub.vx v12, v8, a1 1229; RV64-NEXT: addiw a0, a2, 1365 1230; RV64-NEXT: addiw a1, a3, 819 1231; RV64-NEXT: addiw a2, a4, -241 1232; RV64-NEXT: addiw a3, a5, 257 1233; RV64-NEXT: slli a4, a0, 32 1234; RV64-NEXT: add a0, a0, a4 1235; RV64-NEXT: slli a4, a1, 32 1236; RV64-NEXT: add a1, a1, a4 1237; RV64-NEXT: slli a4, a2, 32 1238; RV64-NEXT: add a2, a2, a4 1239; RV64-NEXT: slli a4, a3, 32 1240; RV64-NEXT: add a3, a3, a4 1241; RV64-NEXT: vnot.v v8, v8 1242; RV64-NEXT: vand.vv v8, v8, v12 1243; RV64-NEXT: vsrl.vi v12, v8, 1 1244; RV64-NEXT: vand.vx v12, v12, a0 1245; RV64-NEXT: vsub.vv v8, v8, v12 1246; RV64-NEXT: vand.vx v12, v8, a1 1247; RV64-NEXT: vsrl.vi v8, v8, 2 1248; RV64-NEXT: vand.vx v8, v8, a1 1249; RV64-NEXT: vadd.vv v8, v12, v8 1250; RV64-NEXT: vsrl.vi v12, v8, 4 1251; RV64-NEXT: vadd.vv v8, v8, v12 1252; RV64-NEXT: vand.vx v8, v8, a2 1253; RV64-NEXT: vmul.vx v8, v8, a3 1254; RV64-NEXT: li a0, 56 1255; RV64-NEXT: vsrl.vx v8, v8, a0 1256; RV64-NEXT: ret 1257 %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 1258 ret <8 x i64> %v 1259} 1260 1261declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) 1262 1263define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { 1264; RV32-LABEL: vp_cttz_v15i64: 1265; RV32: # %bb.0: 1266; RV32-NEXT: addi sp, sp, -48 1267; RV32-NEXT: .cfi_def_cfa_offset 48 1268; RV32-NEXT: csrr a1, vlenb 1269; RV32-NEXT: slli a1, a1, 4 1270; RV32-NEXT: sub sp, sp, a1 1271; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 1272; RV32-NEXT: li a1, 1 1273; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1274; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 1275; RV32-NEXT: lui a1, 349525 1276; RV32-NEXT: addi a1, a1, 1365 1277; RV32-NEXT: sw a1, 40(sp) 1278; RV32-NEXT: sw a1, 44(sp) 1279; RV32-NEXT: lui a1, 209715 1280; RV32-NEXT: addi a1, a1, 819 1281; RV32-NEXT: sw a1, 32(sp) 1282; RV32-NEXT: sw a1, 36(sp) 1283; RV32-NEXT: lui a1, 61681 1284; RV32-NEXT: addi a1, a1, -241 1285; RV32-NEXT: sw a1, 24(sp) 1286; RV32-NEXT: sw a1, 28(sp) 1287; RV32-NEXT: lui a1, 4112 1288; RV32-NEXT: addi a1, a1, 257 1289; RV32-NEXT: sw a1, 16(sp) 1290; RV32-NEXT: sw a1, 20(sp) 1291; RV32-NEXT: addi a1, sp, 40 1292; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1293; RV32-NEXT: vlse64.v v24, (a1), zero 1294; RV32-NEXT: csrr a1, vlenb 1295; RV32-NEXT: slli a1, a1, 3 1296; RV32-NEXT: add a1, sp, a1 1297; RV32-NEXT: addi a1, a1, 48 1298; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 1299; RV32-NEXT: addi a1, sp, 32 1300; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1301; RV32-NEXT: vnot.v v8, v8, v0.t 1302; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1303; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 1304; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1305; RV32-NEXT: vlse64.v v24, (a1), zero 1306; RV32-NEXT: addi a1, sp, 48 1307; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 1308; RV32-NEXT: csrr a1, vlenb 1309; RV32-NEXT: slli a1, a1, 3 1310; RV32-NEXT: add a1, sp, a1 1311; RV32-NEXT: addi a1, a1, 48 1312; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1313; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1314; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1315; RV32-NEXT: addi a1, sp, 24 1316; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 1317; RV32-NEXT: addi a2, sp, 48 1318; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1319; RV32-NEXT: vand.vv v16, v8, v24, v0.t 1320; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1321; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1322; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1323; RV32-NEXT: vlse64.v v8, (a1), zero 1324; RV32-NEXT: csrr a1, vlenb 1325; RV32-NEXT: slli a1, a1, 3 1326; RV32-NEXT: add a1, sp, a1 1327; RV32-NEXT: addi a1, a1, 48 1328; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1329; RV32-NEXT: addi a1, sp, 16 1330; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1331; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 1332; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1333; RV32-NEXT: vlse64.v v16, (a1), zero 1334; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1335; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 1336; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1337; RV32-NEXT: li a0, 56 1338; RV32-NEXT: csrr a1, vlenb 1339; RV32-NEXT: slli a1, a1, 3 1340; RV32-NEXT: add a1, sp, a1 1341; RV32-NEXT: addi a1, a1, 48 1342; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1343; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1344; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1345; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 1346; RV32-NEXT: csrr a0, vlenb 1347; RV32-NEXT: slli a0, a0, 4 1348; RV32-NEXT: add sp, sp, a0 1349; RV32-NEXT: .cfi_def_cfa sp, 48 1350; RV32-NEXT: addi sp, sp, 48 1351; RV32-NEXT: .cfi_def_cfa_offset 0 1352; RV32-NEXT: ret 1353; 1354; RV64-LABEL: vp_cttz_v15i64: 1355; RV64: # %bb.0: 1356; RV64-NEXT: li a1, 1 1357; RV64-NEXT: lui a2, 349525 1358; RV64-NEXT: lui a3, 209715 1359; RV64-NEXT: lui a4, 61681 1360; RV64-NEXT: lui a5, 4112 1361; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1362; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 1363; RV64-NEXT: addiw a0, a2, 1365 1364; RV64-NEXT: addiw a1, a3, 819 1365; RV64-NEXT: addiw a2, a4, -241 1366; RV64-NEXT: addiw a3, a5, 257 1367; RV64-NEXT: slli a4, a0, 32 1368; RV64-NEXT: add a0, a0, a4 1369; RV64-NEXT: slli a4, a1, 32 1370; RV64-NEXT: add a1, a1, a4 1371; RV64-NEXT: slli a4, a2, 32 1372; RV64-NEXT: add a2, a2, a4 1373; RV64-NEXT: slli a4, a3, 32 1374; RV64-NEXT: add a3, a3, a4 1375; RV64-NEXT: vnot.v v8, v8, v0.t 1376; RV64-NEXT: vand.vv v8, v8, v16, v0.t 1377; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1378; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1379; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1380; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1381; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1382; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1383; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1384; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1385; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1386; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1387; RV64-NEXT: li a0, 56 1388; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1389; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1390; RV64-NEXT: ret 1391 %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) 1392 ret <15 x i64> %v 1393} 1394 1395define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { 1396; RV32-LABEL: vp_cttz_v15i64_unmasked: 1397; RV32: # %bb.0: 1398; RV32-NEXT: addi sp, sp, -32 1399; RV32-NEXT: .cfi_def_cfa_offset 32 1400; RV32-NEXT: li a1, 1 1401; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1402; RV32-NEXT: vsub.vx v16, v8, a1 1403; RV32-NEXT: lui a1, 349525 1404; RV32-NEXT: addi a1, a1, 1365 1405; RV32-NEXT: sw a1, 24(sp) 1406; RV32-NEXT: sw a1, 28(sp) 1407; RV32-NEXT: lui a1, 209715 1408; RV32-NEXT: addi a1, a1, 819 1409; RV32-NEXT: sw a1, 16(sp) 1410; RV32-NEXT: sw a1, 20(sp) 1411; RV32-NEXT: lui a1, 61681 1412; RV32-NEXT: addi a1, a1, -241 1413; RV32-NEXT: sw a1, 8(sp) 1414; RV32-NEXT: sw a1, 12(sp) 1415; RV32-NEXT: lui a1, 4112 1416; RV32-NEXT: vnot.v v8, v8 1417; RV32-NEXT: addi a1, a1, 257 1418; RV32-NEXT: sw a1, 0(sp) 1419; RV32-NEXT: sw a1, 4(sp) 1420; RV32-NEXT: addi a1, sp, 24 1421; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1422; RV32-NEXT: vlse64.v v0, (a1), zero 1423; RV32-NEXT: addi a1, sp, 16 1424; RV32-NEXT: vlse64.v v24, (a1), zero 1425; RV32-NEXT: addi a1, sp, 8 1426; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1427; RV32-NEXT: vand.vv v16, v8, v16 1428; RV32-NEXT: vsrl.vi v8, v16, 1 1429; RV32-NEXT: vand.vv v0, v8, v0 1430; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1431; RV32-NEXT: vlse64.v v8, (a1), zero 1432; RV32-NEXT: mv a1, sp 1433; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1434; RV32-NEXT: vsub.vv v16, v16, v0 1435; RV32-NEXT: vand.vv v0, v16, v24 1436; RV32-NEXT: vsrl.vi v16, v16, 2 1437; RV32-NEXT: vand.vv v16, v16, v24 1438; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1439; RV32-NEXT: vlse64.v v24, (a1), zero 1440; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1441; RV32-NEXT: vadd.vv v16, v0, v16 1442; RV32-NEXT: vsrl.vi v0, v16, 4 1443; RV32-NEXT: vadd.vv v16, v16, v0 1444; RV32-NEXT: vand.vv v8, v16, v8 1445; RV32-NEXT: vmul.vv v8, v8, v24 1446; RV32-NEXT: li a0, 56 1447; RV32-NEXT: vsrl.vx v8, v8, a0 1448; RV32-NEXT: addi sp, sp, 32 1449; RV32-NEXT: .cfi_def_cfa_offset 0 1450; RV32-NEXT: ret 1451; 1452; RV64-LABEL: vp_cttz_v15i64_unmasked: 1453; RV64: # %bb.0: 1454; RV64-NEXT: li a1, 1 1455; RV64-NEXT: lui a2, 349525 1456; RV64-NEXT: lui a3, 209715 1457; RV64-NEXT: lui a4, 61681 1458; RV64-NEXT: lui a5, 4112 1459; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1460; RV64-NEXT: vsub.vx v16, v8, a1 1461; RV64-NEXT: addiw a0, a2, 1365 1462; RV64-NEXT: addiw a1, a3, 819 1463; RV64-NEXT: addiw a2, a4, -241 1464; RV64-NEXT: addiw a3, a5, 257 1465; RV64-NEXT: slli a4, a0, 32 1466; RV64-NEXT: add a0, a0, a4 1467; RV64-NEXT: slli a4, a1, 32 1468; RV64-NEXT: add a1, a1, a4 1469; RV64-NEXT: slli a4, a2, 32 1470; RV64-NEXT: add a2, a2, a4 1471; RV64-NEXT: slli a4, a3, 32 1472; RV64-NEXT: add a3, a3, a4 1473; RV64-NEXT: vnot.v v8, v8 1474; RV64-NEXT: vand.vv v8, v8, v16 1475; RV64-NEXT: vsrl.vi v16, v8, 1 1476; RV64-NEXT: vand.vx v16, v16, a0 1477; RV64-NEXT: vsub.vv v8, v8, v16 1478; RV64-NEXT: vand.vx v16, v8, a1 1479; RV64-NEXT: vsrl.vi v8, v8, 2 1480; RV64-NEXT: vand.vx v8, v8, a1 1481; RV64-NEXT: vadd.vv v8, v16, v8 1482; RV64-NEXT: vsrl.vi v16, v8, 4 1483; RV64-NEXT: vadd.vv v8, v8, v16 1484; RV64-NEXT: vand.vx v8, v8, a2 1485; RV64-NEXT: vmul.vx v8, v8, a3 1486; RV64-NEXT: li a0, 56 1487; RV64-NEXT: vsrl.vx v8, v8, a0 1488; RV64-NEXT: ret 1489 %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl) 1490 ret <15 x i64> %v 1491} 1492 1493declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) 1494 1495define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { 1496; RV32-LABEL: vp_cttz_v16i64: 1497; RV32: # %bb.0: 1498; RV32-NEXT: addi sp, sp, -48 1499; RV32-NEXT: .cfi_def_cfa_offset 48 1500; RV32-NEXT: csrr a1, vlenb 1501; RV32-NEXT: slli a1, a1, 4 1502; RV32-NEXT: sub sp, sp, a1 1503; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 1504; RV32-NEXT: li a1, 1 1505; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1506; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 1507; RV32-NEXT: lui a1, 349525 1508; RV32-NEXT: addi a1, a1, 1365 1509; RV32-NEXT: sw a1, 40(sp) 1510; RV32-NEXT: sw a1, 44(sp) 1511; RV32-NEXT: lui a1, 209715 1512; RV32-NEXT: addi a1, a1, 819 1513; RV32-NEXT: sw a1, 32(sp) 1514; RV32-NEXT: sw a1, 36(sp) 1515; RV32-NEXT: lui a1, 61681 1516; RV32-NEXT: addi a1, a1, -241 1517; RV32-NEXT: sw a1, 24(sp) 1518; RV32-NEXT: sw a1, 28(sp) 1519; RV32-NEXT: lui a1, 4112 1520; RV32-NEXT: addi a1, a1, 257 1521; RV32-NEXT: sw a1, 16(sp) 1522; RV32-NEXT: sw a1, 20(sp) 1523; RV32-NEXT: addi a1, sp, 40 1524; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1525; RV32-NEXT: vlse64.v v24, (a1), zero 1526; RV32-NEXT: csrr a1, vlenb 1527; RV32-NEXT: slli a1, a1, 3 1528; RV32-NEXT: add a1, sp, a1 1529; RV32-NEXT: addi a1, a1, 48 1530; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 1531; RV32-NEXT: addi a1, sp, 32 1532; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1533; RV32-NEXT: vnot.v v8, v8, v0.t 1534; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1535; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 1536; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1537; RV32-NEXT: vlse64.v v24, (a1), zero 1538; RV32-NEXT: addi a1, sp, 48 1539; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 1540; RV32-NEXT: csrr a1, vlenb 1541; RV32-NEXT: slli a1, a1, 3 1542; RV32-NEXT: add a1, sp, a1 1543; RV32-NEXT: addi a1, a1, 48 1544; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1545; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1546; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1547; RV32-NEXT: addi a1, sp, 24 1548; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 1549; RV32-NEXT: addi a2, sp, 48 1550; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1551; RV32-NEXT: vand.vv v16, v8, v24, v0.t 1552; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1553; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1554; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1555; RV32-NEXT: vlse64.v v8, (a1), zero 1556; RV32-NEXT: csrr a1, vlenb 1557; RV32-NEXT: slli a1, a1, 3 1558; RV32-NEXT: add a1, sp, a1 1559; RV32-NEXT: addi a1, a1, 48 1560; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1561; RV32-NEXT: addi a1, sp, 16 1562; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1563; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 1564; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1565; RV32-NEXT: vlse64.v v16, (a1), zero 1566; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1567; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 1568; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1569; RV32-NEXT: li a0, 56 1570; RV32-NEXT: csrr a1, vlenb 1571; RV32-NEXT: slli a1, a1, 3 1572; RV32-NEXT: add a1, sp, a1 1573; RV32-NEXT: addi a1, a1, 48 1574; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1575; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1576; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1577; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 1578; RV32-NEXT: csrr a0, vlenb 1579; RV32-NEXT: slli a0, a0, 4 1580; RV32-NEXT: add sp, sp, a0 1581; RV32-NEXT: .cfi_def_cfa sp, 48 1582; RV32-NEXT: addi sp, sp, 48 1583; RV32-NEXT: .cfi_def_cfa_offset 0 1584; RV32-NEXT: ret 1585; 1586; RV64-LABEL: vp_cttz_v16i64: 1587; RV64: # %bb.0: 1588; RV64-NEXT: li a1, 1 1589; RV64-NEXT: lui a2, 349525 1590; RV64-NEXT: lui a3, 209715 1591; RV64-NEXT: lui a4, 61681 1592; RV64-NEXT: lui a5, 4112 1593; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1594; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 1595; RV64-NEXT: addiw a0, a2, 1365 1596; RV64-NEXT: addiw a1, a3, 819 1597; RV64-NEXT: addiw a2, a4, -241 1598; RV64-NEXT: addiw a3, a5, 257 1599; RV64-NEXT: slli a4, a0, 32 1600; RV64-NEXT: add a0, a0, a4 1601; RV64-NEXT: slli a4, a1, 32 1602; RV64-NEXT: add a1, a1, a4 1603; RV64-NEXT: slli a4, a2, 32 1604; RV64-NEXT: add a2, a2, a4 1605; RV64-NEXT: slli a4, a3, 32 1606; RV64-NEXT: add a3, a3, a4 1607; RV64-NEXT: vnot.v v8, v8, v0.t 1608; RV64-NEXT: vand.vv v8, v8, v16, v0.t 1609; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1610; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1611; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1612; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1613; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1614; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1615; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1616; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1617; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1618; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1619; RV64-NEXT: li a0, 56 1620; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1621; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1622; RV64-NEXT: ret 1623 %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) 1624 ret <16 x i64> %v 1625} 1626 1627define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { 1628; RV32-LABEL: vp_cttz_v16i64_unmasked: 1629; RV32: # %bb.0: 1630; RV32-NEXT: addi sp, sp, -32 1631; RV32-NEXT: .cfi_def_cfa_offset 32 1632; RV32-NEXT: li a1, 1 1633; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1634; RV32-NEXT: vsub.vx v16, v8, a1 1635; RV32-NEXT: lui a1, 349525 1636; RV32-NEXT: addi a1, a1, 1365 1637; RV32-NEXT: sw a1, 24(sp) 1638; RV32-NEXT: sw a1, 28(sp) 1639; RV32-NEXT: lui a1, 209715 1640; RV32-NEXT: addi a1, a1, 819 1641; RV32-NEXT: sw a1, 16(sp) 1642; RV32-NEXT: sw a1, 20(sp) 1643; RV32-NEXT: lui a1, 61681 1644; RV32-NEXT: addi a1, a1, -241 1645; RV32-NEXT: sw a1, 8(sp) 1646; RV32-NEXT: sw a1, 12(sp) 1647; RV32-NEXT: lui a1, 4112 1648; RV32-NEXT: vnot.v v8, v8 1649; RV32-NEXT: addi a1, a1, 257 1650; RV32-NEXT: sw a1, 0(sp) 1651; RV32-NEXT: sw a1, 4(sp) 1652; RV32-NEXT: addi a1, sp, 24 1653; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1654; RV32-NEXT: vlse64.v v0, (a1), zero 1655; RV32-NEXT: addi a1, sp, 16 1656; RV32-NEXT: vlse64.v v24, (a1), zero 1657; RV32-NEXT: addi a1, sp, 8 1658; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1659; RV32-NEXT: vand.vv v16, v8, v16 1660; RV32-NEXT: vsrl.vi v8, v16, 1 1661; RV32-NEXT: vand.vv v0, v8, v0 1662; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1663; RV32-NEXT: vlse64.v v8, (a1), zero 1664; RV32-NEXT: mv a1, sp 1665; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1666; RV32-NEXT: vsub.vv v16, v16, v0 1667; RV32-NEXT: vand.vv v0, v16, v24 1668; RV32-NEXT: vsrl.vi v16, v16, 2 1669; RV32-NEXT: vand.vv v16, v16, v24 1670; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1671; RV32-NEXT: vlse64.v v24, (a1), zero 1672; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1673; RV32-NEXT: vadd.vv v16, v0, v16 1674; RV32-NEXT: vsrl.vi v0, v16, 4 1675; RV32-NEXT: vadd.vv v16, v16, v0 1676; RV32-NEXT: vand.vv v8, v16, v8 1677; RV32-NEXT: vmul.vv v8, v8, v24 1678; RV32-NEXT: li a0, 56 1679; RV32-NEXT: vsrl.vx v8, v8, a0 1680; RV32-NEXT: addi sp, sp, 32 1681; RV32-NEXT: .cfi_def_cfa_offset 0 1682; RV32-NEXT: ret 1683; 1684; RV64-LABEL: vp_cttz_v16i64_unmasked: 1685; RV64: # %bb.0: 1686; RV64-NEXT: li a1, 1 1687; RV64-NEXT: lui a2, 349525 1688; RV64-NEXT: lui a3, 209715 1689; RV64-NEXT: lui a4, 61681 1690; RV64-NEXT: lui a5, 4112 1691; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1692; RV64-NEXT: vsub.vx v16, v8, a1 1693; RV64-NEXT: addiw a0, a2, 1365 1694; RV64-NEXT: addiw a1, a3, 819 1695; RV64-NEXT: addiw a2, a4, -241 1696; RV64-NEXT: addiw a3, a5, 257 1697; RV64-NEXT: slli a4, a0, 32 1698; RV64-NEXT: add a0, a0, a4 1699; RV64-NEXT: slli a4, a1, 32 1700; RV64-NEXT: add a1, a1, a4 1701; RV64-NEXT: slli a4, a2, 32 1702; RV64-NEXT: add a2, a2, a4 1703; RV64-NEXT: slli a4, a3, 32 1704; RV64-NEXT: add a3, a3, a4 1705; RV64-NEXT: vnot.v v8, v8 1706; RV64-NEXT: vand.vv v8, v8, v16 1707; RV64-NEXT: vsrl.vi v16, v8, 1 1708; RV64-NEXT: vand.vx v16, v16, a0 1709; RV64-NEXT: vsub.vv v8, v8, v16 1710; RV64-NEXT: vand.vx v16, v8, a1 1711; RV64-NEXT: vsrl.vi v8, v8, 2 1712; RV64-NEXT: vand.vx v8, v8, a1 1713; RV64-NEXT: vadd.vv v8, v16, v8 1714; RV64-NEXT: vsrl.vi v16, v8, 4 1715; RV64-NEXT: vadd.vv v8, v8, v16 1716; RV64-NEXT: vand.vx v8, v8, a2 1717; RV64-NEXT: vmul.vx v8, v8, a3 1718; RV64-NEXT: li a0, 56 1719; RV64-NEXT: vsrl.vx v8, v8, a0 1720; RV64-NEXT: ret 1721 %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 1722 ret <16 x i64> %v 1723} 1724 1725declare <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) 1726 1727define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { 1728; RV32-LABEL: vp_cttz_v32i64: 1729; RV32: # %bb.0: 1730; RV32-NEXT: addi sp, sp, -48 1731; RV32-NEXT: .cfi_def_cfa_offset 48 1732; RV32-NEXT: csrr a1, vlenb 1733; RV32-NEXT: li a2, 48 1734; RV32-NEXT: mul a1, a1, a2 1735; RV32-NEXT: sub sp, sp, a1 1736; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb 1737; RV32-NEXT: csrr a1, vlenb 1738; RV32-NEXT: slli a1, a1, 4 1739; RV32-NEXT: add a1, sp, a1 1740; RV32-NEXT: addi a1, a1, 48 1741; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1742; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1743; RV32-NEXT: vslidedown.vi v7, v0, 2 1744; RV32-NEXT: lui a1, 349525 1745; RV32-NEXT: lui a2, 209715 1746; RV32-NEXT: addi a1, a1, 1365 1747; RV32-NEXT: sw a1, 40(sp) 1748; RV32-NEXT: sw a1, 44(sp) 1749; RV32-NEXT: lui a1, 61681 1750; RV32-NEXT: addi a2, a2, 819 1751; RV32-NEXT: sw a2, 32(sp) 1752; RV32-NEXT: sw a2, 36(sp) 1753; RV32-NEXT: lui a2, 4112 1754; RV32-NEXT: addi a1, a1, -241 1755; RV32-NEXT: sw a1, 24(sp) 1756; RV32-NEXT: sw a1, 28(sp) 1757; RV32-NEXT: li a1, 16 1758; RV32-NEXT: addi a2, a2, 257 1759; RV32-NEXT: sw a2, 16(sp) 1760; RV32-NEXT: sw a2, 20(sp) 1761; RV32-NEXT: mv a2, a0 1762; RV32-NEXT: bltu a0, a1, .LBB34_2 1763; RV32-NEXT: # %bb.1: 1764; RV32-NEXT: li a2, 16 1765; RV32-NEXT: .LBB34_2: 1766; RV32-NEXT: li a1, 1 1767; RV32-NEXT: addi a3, sp, 40 1768; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1769; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 1770; RV32-NEXT: vnot.v v8, v8, v0.t 1771; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1772; RV32-NEXT: csrr a4, vlenb 1773; RV32-NEXT: li a5, 24 1774; RV32-NEXT: mul a4, a4, a5 1775; RV32-NEXT: add a4, sp, a4 1776; RV32-NEXT: addi a4, a4, 48 1777; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 1778; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1779; RV32-NEXT: vlse64.v v8, (a3), zero 1780; RV32-NEXT: csrr a3, vlenb 1781; RV32-NEXT: li a4, 40 1782; RV32-NEXT: mul a3, a3, a4 1783; RV32-NEXT: add a3, sp, a3 1784; RV32-NEXT: addi a3, a3, 48 1785; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1786; RV32-NEXT: addi a3, sp, 32 1787; RV32-NEXT: vlse64.v v8, (a3), zero 1788; RV32-NEXT: csrr a3, vlenb 1789; RV32-NEXT: slli a3, a3, 5 1790; RV32-NEXT: add a3, sp, a3 1791; RV32-NEXT: addi a3, a3, 48 1792; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1793; RV32-NEXT: csrr a3, vlenb 1794; RV32-NEXT: li a4, 24 1795; RV32-NEXT: mul a3, a3, a4 1796; RV32-NEXT: add a3, sp, a3 1797; RV32-NEXT: addi a3, a3, 48 1798; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1799; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1800; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 1801; RV32-NEXT: csrr a3, vlenb 1802; RV32-NEXT: li a4, 40 1803; RV32-NEXT: mul a3, a3, a4 1804; RV32-NEXT: add a3, sp, a3 1805; RV32-NEXT: addi a3, a3, 48 1806; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1807; RV32-NEXT: vand.vv v24, v24, v16, v0.t 1808; RV32-NEXT: csrr a3, vlenb 1809; RV32-NEXT: li a4, 24 1810; RV32-NEXT: mul a3, a3, a4 1811; RV32-NEXT: add a3, sp, a3 1812; RV32-NEXT: addi a3, a3, 48 1813; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1814; RV32-NEXT: vsub.vv v24, v16, v24, v0.t 1815; RV32-NEXT: vand.vv v16, v24, v8, v0.t 1816; RV32-NEXT: csrr a3, vlenb 1817; RV32-NEXT: li a4, 24 1818; RV32-NEXT: mul a3, a3, a4 1819; RV32-NEXT: add a3, sp, a3 1820; RV32-NEXT: addi a3, a3, 48 1821; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1822; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t 1823; RV32-NEXT: vand.vv v16, v16, v8, v0.t 1824; RV32-NEXT: csrr a3, vlenb 1825; RV32-NEXT: li a4, 24 1826; RV32-NEXT: mul a3, a3, a4 1827; RV32-NEXT: add a3, sp, a3 1828; RV32-NEXT: addi a3, a3, 48 1829; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 1830; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1831; RV32-NEXT: csrr a3, vlenb 1832; RV32-NEXT: slli a3, a3, 3 1833; RV32-NEXT: add a3, sp, a3 1834; RV32-NEXT: addi a3, a3, 48 1835; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1836; RV32-NEXT: addi a3, sp, 24 1837; RV32-NEXT: addi a4, sp, 16 1838; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1839; RV32-NEXT: vlse64.v v16, (a3), zero 1840; RV32-NEXT: addi a3, sp, 48 1841; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1842; RV32-NEXT: vlse64.v v8, (a4), zero 1843; RV32-NEXT: csrr a3, vlenb 1844; RV32-NEXT: li a4, 24 1845; RV32-NEXT: mul a3, a3, a4 1846; RV32-NEXT: add a3, sp, a3 1847; RV32-NEXT: addi a3, a3, 48 1848; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1849; RV32-NEXT: csrr a3, vlenb 1850; RV32-NEXT: slli a3, a3, 3 1851; RV32-NEXT: add a3, sp, a3 1852; RV32-NEXT: addi a3, a3, 48 1853; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 1854; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 1855; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t 1856; RV32-NEXT: vadd.vv v8, v8, v24, v0.t 1857; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1858; RV32-NEXT: csrr a2, vlenb 1859; RV32-NEXT: li a3, 24 1860; RV32-NEXT: mul a2, a2, a3 1861; RV32-NEXT: add a2, sp, a2 1862; RV32-NEXT: addi a2, a2, 48 1863; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1864; RV32-NEXT: vmul.vv v8, v16, v8, v0.t 1865; RV32-NEXT: li a2, 56 1866; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t 1867; RV32-NEXT: csrr a3, vlenb 1868; RV32-NEXT: slli a3, a3, 3 1869; RV32-NEXT: add a3, sp, a3 1870; RV32-NEXT: addi a3, a3, 48 1871; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 1872; RV32-NEXT: addi a3, a0, -16 1873; RV32-NEXT: sltu a0, a0, a3 1874; RV32-NEXT: addi a0, a0, -1 1875; RV32-NEXT: and a0, a0, a3 1876; RV32-NEXT: vmv1r.v v0, v7 1877; RV32-NEXT: csrr a3, vlenb 1878; RV32-NEXT: slli a3, a3, 4 1879; RV32-NEXT: add a3, sp, a3 1880; RV32-NEXT: addi a3, a3, 48 1881; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 1882; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1883; RV32-NEXT: vsub.vx v8, v16, a1, v0.t 1884; RV32-NEXT: vnot.v v16, v16, v0.t 1885; RV32-NEXT: vand.vv v8, v16, v8, v0.t 1886; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t 1887; RV32-NEXT: csrr a0, vlenb 1888; RV32-NEXT: li a1, 40 1889; RV32-NEXT: mul a0, a0, a1 1890; RV32-NEXT: add a0, sp, a0 1891; RV32-NEXT: addi a0, a0, 48 1892; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1893; RV32-NEXT: vand.vv v16, v24, v16, v0.t 1894; RV32-NEXT: vsub.vv v24, v8, v16, v0.t 1895; RV32-NEXT: csrr a0, vlenb 1896; RV32-NEXT: slli a0, a0, 5 1897; RV32-NEXT: add a0, sp, a0 1898; RV32-NEXT: addi a0, a0, 48 1899; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1900; RV32-NEXT: vand.vv v16, v24, v8, v0.t 1901; RV32-NEXT: csrr a0, vlenb 1902; RV32-NEXT: li a1, 40 1903; RV32-NEXT: mul a0, a0, a1 1904; RV32-NEXT: add a0, sp, a0 1905; RV32-NEXT: addi a0, a0, 48 1906; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 1907; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t 1908; RV32-NEXT: csrr a0, vlenb 1909; RV32-NEXT: slli a0, a0, 5 1910; RV32-NEXT: add a0, sp, a0 1911; RV32-NEXT: addi a0, a0, 48 1912; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1913; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1914; RV32-NEXT: csrr a0, vlenb 1915; RV32-NEXT: li a1, 40 1916; RV32-NEXT: mul a0, a0, a1 1917; RV32-NEXT: add a0, sp, a0 1918; RV32-NEXT: addi a0, a0, 48 1919; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1920; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 1921; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1922; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1923; RV32-NEXT: addi a0, sp, 48 1924; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1925; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1926; RV32-NEXT: csrr a0, vlenb 1927; RV32-NEXT: li a1, 24 1928; RV32-NEXT: mul a0, a0, a1 1929; RV32-NEXT: add a0, sp, a0 1930; RV32-NEXT: addi a0, a0, 48 1931; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1932; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1933; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 1934; RV32-NEXT: csrr a0, vlenb 1935; RV32-NEXT: slli a0, a0, 3 1936; RV32-NEXT: add a0, sp, a0 1937; RV32-NEXT: addi a0, a0, 48 1938; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1939; RV32-NEXT: csrr a0, vlenb 1940; RV32-NEXT: li a1, 48 1941; RV32-NEXT: mul a0, a0, a1 1942; RV32-NEXT: add sp, sp, a0 1943; RV32-NEXT: .cfi_def_cfa sp, 48 1944; RV32-NEXT: addi sp, sp, 48 1945; RV32-NEXT: .cfi_def_cfa_offset 0 1946; RV32-NEXT: ret 1947; 1948; RV64-LABEL: vp_cttz_v32i64: 1949; RV64: # %bb.0: 1950; RV64-NEXT: addi sp, sp, -16 1951; RV64-NEXT: .cfi_def_cfa_offset 16 1952; RV64-NEXT: csrr a1, vlenb 1953; RV64-NEXT: slli a1, a1, 4 1954; RV64-NEXT: sub sp, sp, a1 1955; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1956; RV64-NEXT: csrr a1, vlenb 1957; RV64-NEXT: slli a1, a1, 3 1958; RV64-NEXT: add a1, sp, a1 1959; RV64-NEXT: addi a1, a1, 16 1960; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1961; RV64-NEXT: li a1, 16 1962; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1963; RV64-NEXT: vslidedown.vi v24, v0, 2 1964; RV64-NEXT: mv a4, a0 1965; RV64-NEXT: bltu a0, a1, .LBB34_2 1966; RV64-NEXT: # %bb.1: 1967; RV64-NEXT: li a4, 16 1968; RV64-NEXT: .LBB34_2: 1969; RV64-NEXT: li a1, 1 1970; RV64-NEXT: lui a2, 349525 1971; RV64-NEXT: lui a3, 209715 1972; RV64-NEXT: lui a5, 61681 1973; RV64-NEXT: lui a6, 4112 1974; RV64-NEXT: addiw a2, a2, 1365 1975; RV64-NEXT: addiw a3, a3, 819 1976; RV64-NEXT: addiw a7, a5, -241 1977; RV64-NEXT: addiw t0, a6, 257 1978; RV64-NEXT: slli a6, a2, 32 1979; RV64-NEXT: add a6, a2, a6 1980; RV64-NEXT: slli a5, a3, 32 1981; RV64-NEXT: add a5, a3, a5 1982; RV64-NEXT: slli a2, a7, 32 1983; RV64-NEXT: add a2, a7, a2 1984; RV64-NEXT: slli a3, t0, 32 1985; RV64-NEXT: add a3, t0, a3 1986; RV64-NEXT: addi a7, a0, -16 1987; RV64-NEXT: sltu a0, a0, a7 1988; RV64-NEXT: addi a0, a0, -1 1989; RV64-NEXT: and a7, a0, a7 1990; RV64-NEXT: li a0, 56 1991; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma 1992; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 1993; RV64-NEXT: vnot.v v8, v8, v0.t 1994; RV64-NEXT: vand.vv v8, v8, v16, v0.t 1995; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1996; RV64-NEXT: vand.vx v16, v16, a6, v0.t 1997; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1998; RV64-NEXT: vand.vx v16, v8, a5, v0.t 1999; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2000; RV64-NEXT: vand.vx v8, v8, a5, v0.t 2001; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2002; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2003; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2004; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2005; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2006; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 2007; RV64-NEXT: addi a4, sp, 16 2008; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 2009; RV64-NEXT: vmv1r.v v0, v24 2010; RV64-NEXT: csrr a4, vlenb 2011; RV64-NEXT: slli a4, a4, 3 2012; RV64-NEXT: add a4, sp, a4 2013; RV64-NEXT: addi a4, a4, 16 2014; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 2015; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma 2016; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 2017; RV64-NEXT: vnot.v v8, v8, v0.t 2018; RV64-NEXT: vand.vv v8, v8, v16, v0.t 2019; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2020; RV64-NEXT: vand.vx v16, v16, a6, v0.t 2021; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2022; RV64-NEXT: vand.vx v16, v8, a5, v0.t 2023; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2024; RV64-NEXT: vand.vx v8, v8, a5, v0.t 2025; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2026; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2027; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2028; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2029; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2030; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t 2031; RV64-NEXT: addi a0, sp, 16 2032; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2033; RV64-NEXT: csrr a0, vlenb 2034; RV64-NEXT: slli a0, a0, 4 2035; RV64-NEXT: add sp, sp, a0 2036; RV64-NEXT: .cfi_def_cfa sp, 16 2037; RV64-NEXT: addi sp, sp, 16 2038; RV64-NEXT: .cfi_def_cfa_offset 0 2039; RV64-NEXT: ret 2040 %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) 2041 ret <32 x i64> %v 2042} 2043 2044define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { 2045; RV32-LABEL: vp_cttz_v32i64_unmasked: 2046; RV32: # %bb.0: 2047; RV32-NEXT: addi sp, sp, -48 2048; RV32-NEXT: .cfi_def_cfa_offset 48 2049; RV32-NEXT: csrr a1, vlenb 2050; RV32-NEXT: slli a1, a1, 3 2051; RV32-NEXT: sub sp, sp, a1 2052; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb 2053; RV32-NEXT: lui a1, 349525 2054; RV32-NEXT: lui a2, 209715 2055; RV32-NEXT: addi a1, a1, 1365 2056; RV32-NEXT: sw a1, 40(sp) 2057; RV32-NEXT: sw a1, 44(sp) 2058; RV32-NEXT: lui a1, 61681 2059; RV32-NEXT: addi a2, a2, 819 2060; RV32-NEXT: sw a2, 32(sp) 2061; RV32-NEXT: sw a2, 36(sp) 2062; RV32-NEXT: lui a2, 4112 2063; RV32-NEXT: addi a1, a1, -241 2064; RV32-NEXT: sw a1, 24(sp) 2065; RV32-NEXT: sw a1, 28(sp) 2066; RV32-NEXT: li a3, 16 2067; RV32-NEXT: addi a1, a2, 257 2068; RV32-NEXT: sw a1, 16(sp) 2069; RV32-NEXT: sw a1, 20(sp) 2070; RV32-NEXT: mv a1, a0 2071; RV32-NEXT: bltu a0, a3, .LBB35_2 2072; RV32-NEXT: # %bb.1: 2073; RV32-NEXT: li a1, 16 2074; RV32-NEXT: .LBB35_2: 2075; RV32-NEXT: li a2, 1 2076; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2077; RV32-NEXT: vnot.v v0, v8 2078; RV32-NEXT: addi a3, sp, 40 2079; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2080; RV32-NEXT: vlse64.v v24, (a3), zero 2081; RV32-NEXT: addi a3, a0, -16 2082; RV32-NEXT: sltu a0, a0, a3 2083; RV32-NEXT: addi a0, a0, -1 2084; RV32-NEXT: and a0, a0, a3 2085; RV32-NEXT: addi a3, sp, 32 2086; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2087; RV32-NEXT: vsub.vx v8, v8, a2 2088; RV32-NEXT: vand.vv v8, v0, v8 2089; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2090; RV32-NEXT: vsub.vx v0, v16, a2 2091; RV32-NEXT: vnot.v v16, v16 2092; RV32-NEXT: vand.vv v16, v16, v0 2093; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2094; RV32-NEXT: vsrl.vi v0, v8, 1 2095; RV32-NEXT: vand.vv v0, v0, v24 2096; RV32-NEXT: vsub.vv v0, v8, v0 2097; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2098; RV32-NEXT: vsrl.vi v8, v16, 1 2099; RV32-NEXT: vand.vv v24, v8, v24 2100; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2101; RV32-NEXT: vlse64.v v8, (a3), zero 2102; RV32-NEXT: addi a2, sp, 24 2103; RV32-NEXT: addi a3, sp, 16 2104; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2105; RV32-NEXT: vsub.vv v16, v16, v24 2106; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2107; RV32-NEXT: vand.vv v24, v0, v8 2108; RV32-NEXT: vsrl.vi v0, v0, 2 2109; RV32-NEXT: vand.vv v0, v0, v8 2110; RV32-NEXT: vadd.vv v24, v24, v0 2111; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2112; RV32-NEXT: vand.vv v0, v16, v8 2113; RV32-NEXT: vsrl.vi v16, v16, 2 2114; RV32-NEXT: vand.vv v8, v16, v8 2115; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2116; RV32-NEXT: vsrl.vi v16, v24, 4 2117; RV32-NEXT: vadd.vv v16, v24, v16 2118; RV32-NEXT: addi a4, sp, 48 2119; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2120; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2121; RV32-NEXT: vlse64.v v24, (a2), zero 2122; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2123; RV32-NEXT: vadd.vv v8, v0, v8 2124; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2125; RV32-NEXT: vlse64.v v0, (a3), zero 2126; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2127; RV32-NEXT: vsrl.vi v16, v8, 4 2128; RV32-NEXT: vadd.vv v8, v8, v16 2129; RV32-NEXT: addi a2, sp, 48 2130; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 2131; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2132; RV32-NEXT: vand.vv v16, v16, v24 2133; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2134; RV32-NEXT: vand.vv v8, v8, v24 2135; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2136; RV32-NEXT: vmul.vv v16, v16, v0 2137; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2138; RV32-NEXT: vmul.vv v24, v8, v0 2139; RV32-NEXT: li a2, 56 2140; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2141; RV32-NEXT: vsrl.vx v8, v16, a2 2142; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2143; RV32-NEXT: vsrl.vx v16, v24, a2 2144; RV32-NEXT: csrr a0, vlenb 2145; RV32-NEXT: slli a0, a0, 3 2146; RV32-NEXT: add sp, sp, a0 2147; RV32-NEXT: .cfi_def_cfa sp, 48 2148; RV32-NEXT: addi sp, sp, 48 2149; RV32-NEXT: .cfi_def_cfa_offset 0 2150; RV32-NEXT: ret 2151; 2152; RV64-LABEL: vp_cttz_v32i64_unmasked: 2153; RV64: # %bb.0: 2154; RV64-NEXT: li a2, 16 2155; RV64-NEXT: mv a1, a0 2156; RV64-NEXT: bltu a0, a2, .LBB35_2 2157; RV64-NEXT: # %bb.1: 2158; RV64-NEXT: li a1, 16 2159; RV64-NEXT: .LBB35_2: 2160; RV64-NEXT: li a2, 1 2161; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2162; RV64-NEXT: vnot.v v24, v8 2163; RV64-NEXT: lui a3, 349525 2164; RV64-NEXT: lui a4, 209715 2165; RV64-NEXT: lui a5, 61681 2166; RV64-NEXT: lui a6, 4112 2167; RV64-NEXT: addiw a3, a3, 1365 2168; RV64-NEXT: addiw a4, a4, 819 2169; RV64-NEXT: addiw a5, a5, -241 2170; RV64-NEXT: addiw a6, a6, 257 2171; RV64-NEXT: slli a7, a3, 32 2172; RV64-NEXT: add a3, a3, a7 2173; RV64-NEXT: slli a7, a4, 32 2174; RV64-NEXT: add a4, a4, a7 2175; RV64-NEXT: slli a7, a5, 32 2176; RV64-NEXT: add a5, a5, a7 2177; RV64-NEXT: slli a7, a6, 32 2178; RV64-NEXT: add a6, a6, a7 2179; RV64-NEXT: addi a7, a0, -16 2180; RV64-NEXT: sltu a0, a0, a7 2181; RV64-NEXT: addi a0, a0, -1 2182; RV64-NEXT: and a0, a0, a7 2183; RV64-NEXT: li a7, 56 2184; RV64-NEXT: vsub.vx v8, v8, a2 2185; RV64-NEXT: vand.vv v8, v24, v8 2186; RV64-NEXT: vsrl.vi v24, v8, 1 2187; RV64-NEXT: vand.vx v24, v24, a3 2188; RV64-NEXT: vsub.vv v8, v8, v24 2189; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2190; RV64-NEXT: vsub.vx v24, v16, a2 2191; RV64-NEXT: vnot.v v16, v16 2192; RV64-NEXT: vand.vv v16, v16, v24 2193; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2194; RV64-NEXT: vand.vx v24, v8, a4 2195; RV64-NEXT: vsrl.vi v8, v8, 2 2196; RV64-NEXT: vand.vx v8, v8, a4 2197; RV64-NEXT: vadd.vv v8, v24, v8 2198; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2199; RV64-NEXT: vsrl.vi v24, v16, 1 2200; RV64-NEXT: vand.vx v24, v24, a3 2201; RV64-NEXT: vsub.vv v16, v16, v24 2202; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2203; RV64-NEXT: vsrl.vi v24, v8, 4 2204; RV64-NEXT: vadd.vv v8, v8, v24 2205; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2206; RV64-NEXT: vand.vx v24, v16, a4 2207; RV64-NEXT: vsrl.vi v16, v16, 2 2208; RV64-NEXT: vand.vx v16, v16, a4 2209; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2210; RV64-NEXT: vand.vx v8, v8, a5 2211; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2212; RV64-NEXT: vadd.vv v16, v24, v16 2213; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2214; RV64-NEXT: vmul.vx v8, v8, a6 2215; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2216; RV64-NEXT: vsrl.vi v24, v16, 4 2217; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2218; RV64-NEXT: vsrl.vx v8, v8, a7 2219; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2220; RV64-NEXT: vadd.vv v16, v16, v24 2221; RV64-NEXT: vand.vx v16, v16, a5 2222; RV64-NEXT: vmul.vx v16, v16, a6 2223; RV64-NEXT: vsrl.vx v16, v16, a7 2224; RV64-NEXT: ret 2225 %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) 2226 ret <32 x i64> %v 2227} 2228 2229define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { 2230; CHECK-LABEL: vp_cttz_zero_undef_v2i8: 2231; CHECK: # %bb.0: 2232; CHECK-NEXT: li a1, 1 2233; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2234; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2235; CHECK-NEXT: li a0, 85 2236; CHECK-NEXT: vnot.v v8, v8, v0.t 2237; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2238; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2239; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2240; CHECK-NEXT: li a0, 51 2241; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2242; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2243; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2244; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2245; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2246; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2247; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2248; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2249; CHECK-NEXT: ret 2250 %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 true, <2 x i1> %m, i32 %evl) 2251 ret <2 x i8> %v 2252} 2253 2254define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { 2255; CHECK-LABEL: vp_cttz_zero_undef_v2i8_unmasked: 2256; CHECK: # %bb.0: 2257; CHECK-NEXT: li a1, 1 2258; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2259; CHECK-NEXT: vnot.v v9, v8 2260; CHECK-NEXT: vsub.vx v8, v8, a1 2261; CHECK-NEXT: li a0, 85 2262; CHECK-NEXT: vand.vv v8, v9, v8 2263; CHECK-NEXT: vsrl.vi v9, v8, 1 2264; CHECK-NEXT: vand.vx v9, v9, a0 2265; CHECK-NEXT: li a0, 51 2266; CHECK-NEXT: vsub.vv v8, v8, v9 2267; CHECK-NEXT: vand.vx v9, v8, a0 2268; CHECK-NEXT: vsrl.vi v8, v8, 2 2269; CHECK-NEXT: vand.vx v8, v8, a0 2270; CHECK-NEXT: vadd.vv v8, v9, v8 2271; CHECK-NEXT: vsrl.vi v9, v8, 4 2272; CHECK-NEXT: vadd.vv v8, v8, v9 2273; CHECK-NEXT: vand.vi v8, v8, 15 2274; CHECK-NEXT: ret 2275 %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 2276 ret <2 x i8> %v 2277} 2278 2279define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { 2280; CHECK-LABEL: vp_cttz_zero_undef_v4i8: 2281; CHECK: # %bb.0: 2282; CHECK-NEXT: li a1, 1 2283; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2284; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2285; CHECK-NEXT: li a0, 85 2286; CHECK-NEXT: vnot.v v8, v8, v0.t 2287; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2288; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2289; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2290; CHECK-NEXT: li a0, 51 2291; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2292; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2293; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2294; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2295; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2296; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2297; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2298; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2299; CHECK-NEXT: ret 2300 %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 true, <4 x i1> %m, i32 %evl) 2301 ret <4 x i8> %v 2302} 2303 2304define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { 2305; CHECK-LABEL: vp_cttz_zero_undef_v4i8_unmasked: 2306; CHECK: # %bb.0: 2307; CHECK-NEXT: li a1, 1 2308; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2309; CHECK-NEXT: vnot.v v9, v8 2310; CHECK-NEXT: vsub.vx v8, v8, a1 2311; CHECK-NEXT: li a0, 85 2312; CHECK-NEXT: vand.vv v8, v9, v8 2313; CHECK-NEXT: vsrl.vi v9, v8, 1 2314; CHECK-NEXT: vand.vx v9, v9, a0 2315; CHECK-NEXT: li a0, 51 2316; CHECK-NEXT: vsub.vv v8, v8, v9 2317; CHECK-NEXT: vand.vx v9, v8, a0 2318; CHECK-NEXT: vsrl.vi v8, v8, 2 2319; CHECK-NEXT: vand.vx v8, v8, a0 2320; CHECK-NEXT: vadd.vv v8, v9, v8 2321; CHECK-NEXT: vsrl.vi v9, v8, 4 2322; CHECK-NEXT: vadd.vv v8, v8, v9 2323; CHECK-NEXT: vand.vi v8, v8, 15 2324; CHECK-NEXT: ret 2325 %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 2326 ret <4 x i8> %v 2327} 2328 2329define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { 2330; CHECK-LABEL: vp_cttz_zero_undef_v8i8: 2331; CHECK: # %bb.0: 2332; CHECK-NEXT: li a1, 1 2333; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2334; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2335; CHECK-NEXT: li a0, 85 2336; CHECK-NEXT: vnot.v v8, v8, v0.t 2337; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2338; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2339; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2340; CHECK-NEXT: li a0, 51 2341; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2342; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2343; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2344; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2345; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2346; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2347; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2348; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2349; CHECK-NEXT: ret 2350 %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 true, <8 x i1> %m, i32 %evl) 2351 ret <8 x i8> %v 2352} 2353 2354define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { 2355; CHECK-LABEL: vp_cttz_zero_undef_v8i8_unmasked: 2356; CHECK: # %bb.0: 2357; CHECK-NEXT: li a1, 1 2358; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2359; CHECK-NEXT: vnot.v v9, v8 2360; CHECK-NEXT: vsub.vx v8, v8, a1 2361; CHECK-NEXT: li a0, 85 2362; CHECK-NEXT: vand.vv v8, v9, v8 2363; CHECK-NEXT: vsrl.vi v9, v8, 1 2364; CHECK-NEXT: vand.vx v9, v9, a0 2365; CHECK-NEXT: li a0, 51 2366; CHECK-NEXT: vsub.vv v8, v8, v9 2367; CHECK-NEXT: vand.vx v9, v8, a0 2368; CHECK-NEXT: vsrl.vi v8, v8, 2 2369; CHECK-NEXT: vand.vx v8, v8, a0 2370; CHECK-NEXT: vadd.vv v8, v9, v8 2371; CHECK-NEXT: vsrl.vi v9, v8, 4 2372; CHECK-NEXT: vadd.vv v8, v8, v9 2373; CHECK-NEXT: vand.vi v8, v8, 15 2374; CHECK-NEXT: ret 2375 %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 2376 ret <8 x i8> %v 2377} 2378 2379define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { 2380; CHECK-LABEL: vp_cttz_zero_undef_v16i8: 2381; CHECK: # %bb.0: 2382; CHECK-NEXT: li a1, 1 2383; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 2384; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2385; CHECK-NEXT: li a0, 85 2386; CHECK-NEXT: vnot.v v8, v8, v0.t 2387; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2388; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2389; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2390; CHECK-NEXT: li a0, 51 2391; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2392; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2393; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2394; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2395; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2396; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2397; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2398; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2399; CHECK-NEXT: ret 2400 %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 true, <16 x i1> %m, i32 %evl) 2401 ret <16 x i8> %v 2402} 2403 2404define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { 2405; CHECK-LABEL: vp_cttz_zero_undef_v16i8_unmasked: 2406; CHECK: # %bb.0: 2407; CHECK-NEXT: li a1, 1 2408; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 2409; CHECK-NEXT: vnot.v v9, v8 2410; CHECK-NEXT: vsub.vx v8, v8, a1 2411; CHECK-NEXT: li a0, 85 2412; CHECK-NEXT: vand.vv v8, v9, v8 2413; CHECK-NEXT: vsrl.vi v9, v8, 1 2414; CHECK-NEXT: vand.vx v9, v9, a0 2415; CHECK-NEXT: li a0, 51 2416; CHECK-NEXT: vsub.vv v8, v8, v9 2417; CHECK-NEXT: vand.vx v9, v8, a0 2418; CHECK-NEXT: vsrl.vi v8, v8, 2 2419; CHECK-NEXT: vand.vx v8, v8, a0 2420; CHECK-NEXT: vadd.vv v8, v9, v8 2421; CHECK-NEXT: vsrl.vi v9, v8, 4 2422; CHECK-NEXT: vadd.vv v8, v8, v9 2423; CHECK-NEXT: vand.vi v8, v8, 15 2424; CHECK-NEXT: ret 2425 %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 2426 ret <16 x i8> %v 2427} 2428 2429define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { 2430; CHECK-LABEL: vp_cttz_zero_undef_v2i16: 2431; CHECK: # %bb.0: 2432; CHECK-NEXT: li a1, 1 2433; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 2434; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2435; CHECK-NEXT: lui a0, 5 2436; CHECK-NEXT: vnot.v v8, v8, v0.t 2437; CHECK-NEXT: addi a0, a0, 1365 2438; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2439; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2440; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2441; CHECK-NEXT: lui a0, 3 2442; CHECK-NEXT: addi a0, a0, 819 2443; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2444; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2445; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2446; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2447; CHECK-NEXT: lui a0, 1 2448; CHECK-NEXT: addi a0, a0, -241 2449; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2450; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2451; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2452; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2453; CHECK-NEXT: li a0, 257 2454; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2455; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 2456; CHECK-NEXT: ret 2457 %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) 2458 ret <2 x i16> %v 2459} 2460 2461define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { 2462; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked: 2463; CHECK: # %bb.0: 2464; CHECK-NEXT: li a1, 1 2465; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 2466; CHECK-NEXT: vnot.v v9, v8 2467; CHECK-NEXT: vsub.vx v8, v8, a1 2468; CHECK-NEXT: lui a0, 5 2469; CHECK-NEXT: addi a0, a0, 1365 2470; CHECK-NEXT: vand.vv v8, v9, v8 2471; CHECK-NEXT: vsrl.vi v9, v8, 1 2472; CHECK-NEXT: vand.vx v9, v9, a0 2473; CHECK-NEXT: lui a0, 3 2474; CHECK-NEXT: addi a0, a0, 819 2475; CHECK-NEXT: vsub.vv v8, v8, v9 2476; CHECK-NEXT: vand.vx v9, v8, a0 2477; CHECK-NEXT: vsrl.vi v8, v8, 2 2478; CHECK-NEXT: vand.vx v8, v8, a0 2479; CHECK-NEXT: lui a0, 1 2480; CHECK-NEXT: addi a0, a0, -241 2481; CHECK-NEXT: vadd.vv v8, v9, v8 2482; CHECK-NEXT: vsrl.vi v9, v8, 4 2483; CHECK-NEXT: vadd.vv v8, v8, v9 2484; CHECK-NEXT: vand.vx v8, v8, a0 2485; CHECK-NEXT: li a0, 257 2486; CHECK-NEXT: vmul.vx v8, v8, a0 2487; CHECK-NEXT: vsrl.vi v8, v8, 8 2488; CHECK-NEXT: ret 2489 %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 2490 ret <2 x i16> %v 2491} 2492 2493define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { 2494; CHECK-LABEL: vp_cttz_zero_undef_v4i16: 2495; CHECK: # %bb.0: 2496; CHECK-NEXT: li a1, 1 2497; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 2498; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2499; CHECK-NEXT: lui a0, 5 2500; CHECK-NEXT: vnot.v v8, v8, v0.t 2501; CHECK-NEXT: addi a0, a0, 1365 2502; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2503; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2504; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2505; CHECK-NEXT: lui a0, 3 2506; CHECK-NEXT: addi a0, a0, 819 2507; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2508; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2509; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2510; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2511; CHECK-NEXT: lui a0, 1 2512; CHECK-NEXT: addi a0, a0, -241 2513; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2514; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2515; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2516; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2517; CHECK-NEXT: li a0, 257 2518; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2519; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 2520; CHECK-NEXT: ret 2521 %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) 2522 ret <4 x i16> %v 2523} 2524 2525define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { 2526; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked: 2527; CHECK: # %bb.0: 2528; CHECK-NEXT: li a1, 1 2529; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 2530; CHECK-NEXT: vnot.v v9, v8 2531; CHECK-NEXT: vsub.vx v8, v8, a1 2532; CHECK-NEXT: lui a0, 5 2533; CHECK-NEXT: addi a0, a0, 1365 2534; CHECK-NEXT: vand.vv v8, v9, v8 2535; CHECK-NEXT: vsrl.vi v9, v8, 1 2536; CHECK-NEXT: vand.vx v9, v9, a0 2537; CHECK-NEXT: lui a0, 3 2538; CHECK-NEXT: addi a0, a0, 819 2539; CHECK-NEXT: vsub.vv v8, v8, v9 2540; CHECK-NEXT: vand.vx v9, v8, a0 2541; CHECK-NEXT: vsrl.vi v8, v8, 2 2542; CHECK-NEXT: vand.vx v8, v8, a0 2543; CHECK-NEXT: lui a0, 1 2544; CHECK-NEXT: addi a0, a0, -241 2545; CHECK-NEXT: vadd.vv v8, v9, v8 2546; CHECK-NEXT: vsrl.vi v9, v8, 4 2547; CHECK-NEXT: vadd.vv v8, v8, v9 2548; CHECK-NEXT: vand.vx v8, v8, a0 2549; CHECK-NEXT: li a0, 257 2550; CHECK-NEXT: vmul.vx v8, v8, a0 2551; CHECK-NEXT: vsrl.vi v8, v8, 8 2552; CHECK-NEXT: ret 2553 %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 2554 ret <4 x i16> %v 2555} 2556 2557define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { 2558; CHECK-LABEL: vp_cttz_zero_undef_v8i16: 2559; CHECK: # %bb.0: 2560; CHECK-NEXT: li a1, 1 2561; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 2562; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2563; CHECK-NEXT: lui a0, 5 2564; CHECK-NEXT: vnot.v v8, v8, v0.t 2565; CHECK-NEXT: addi a0, a0, 1365 2566; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2567; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2568; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2569; CHECK-NEXT: lui a0, 3 2570; CHECK-NEXT: addi a0, a0, 819 2571; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2572; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2573; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2574; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2575; CHECK-NEXT: lui a0, 1 2576; CHECK-NEXT: addi a0, a0, -241 2577; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2578; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2579; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2580; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2581; CHECK-NEXT: li a0, 257 2582; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2583; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 2584; CHECK-NEXT: ret 2585 %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) 2586 ret <8 x i16> %v 2587} 2588 2589define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { 2590; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked: 2591; CHECK: # %bb.0: 2592; CHECK-NEXT: li a1, 1 2593; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 2594; CHECK-NEXT: vnot.v v9, v8 2595; CHECK-NEXT: vsub.vx v8, v8, a1 2596; CHECK-NEXT: lui a0, 5 2597; CHECK-NEXT: addi a0, a0, 1365 2598; CHECK-NEXT: vand.vv v8, v9, v8 2599; CHECK-NEXT: vsrl.vi v9, v8, 1 2600; CHECK-NEXT: vand.vx v9, v9, a0 2601; CHECK-NEXT: lui a0, 3 2602; CHECK-NEXT: addi a0, a0, 819 2603; CHECK-NEXT: vsub.vv v8, v8, v9 2604; CHECK-NEXT: vand.vx v9, v8, a0 2605; CHECK-NEXT: vsrl.vi v8, v8, 2 2606; CHECK-NEXT: vand.vx v8, v8, a0 2607; CHECK-NEXT: lui a0, 1 2608; CHECK-NEXT: addi a0, a0, -241 2609; CHECK-NEXT: vadd.vv v8, v9, v8 2610; CHECK-NEXT: vsrl.vi v9, v8, 4 2611; CHECK-NEXT: vadd.vv v8, v8, v9 2612; CHECK-NEXT: vand.vx v8, v8, a0 2613; CHECK-NEXT: li a0, 257 2614; CHECK-NEXT: vmul.vx v8, v8, a0 2615; CHECK-NEXT: vsrl.vi v8, v8, 8 2616; CHECK-NEXT: ret 2617 %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 2618 ret <8 x i16> %v 2619} 2620 2621define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { 2622; CHECK-LABEL: vp_cttz_zero_undef_v16i16: 2623; CHECK: # %bb.0: 2624; CHECK-NEXT: li a1, 1 2625; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 2626; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 2627; CHECK-NEXT: lui a0, 5 2628; CHECK-NEXT: vnot.v v8, v8, v0.t 2629; CHECK-NEXT: addi a0, a0, 1365 2630; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 2631; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 2632; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 2633; CHECK-NEXT: lui a0, 3 2634; CHECK-NEXT: addi a0, a0, 819 2635; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 2636; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 2637; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2638; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2639; CHECK-NEXT: lui a0, 1 2640; CHECK-NEXT: addi a0, a0, -241 2641; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 2642; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 2643; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 2644; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2645; CHECK-NEXT: li a0, 257 2646; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2647; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 2648; CHECK-NEXT: ret 2649 %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) 2650 ret <16 x i16> %v 2651} 2652 2653define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { 2654; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked: 2655; CHECK: # %bb.0: 2656; CHECK-NEXT: li a1, 1 2657; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 2658; CHECK-NEXT: vnot.v v10, v8 2659; CHECK-NEXT: vsub.vx v8, v8, a1 2660; CHECK-NEXT: lui a0, 5 2661; CHECK-NEXT: addi a0, a0, 1365 2662; CHECK-NEXT: vand.vv v8, v10, v8 2663; CHECK-NEXT: vsrl.vi v10, v8, 1 2664; CHECK-NEXT: vand.vx v10, v10, a0 2665; CHECK-NEXT: lui a0, 3 2666; CHECK-NEXT: addi a0, a0, 819 2667; CHECK-NEXT: vsub.vv v8, v8, v10 2668; CHECK-NEXT: vand.vx v10, v8, a0 2669; CHECK-NEXT: vsrl.vi v8, v8, 2 2670; CHECK-NEXT: vand.vx v8, v8, a0 2671; CHECK-NEXT: lui a0, 1 2672; CHECK-NEXT: addi a0, a0, -241 2673; CHECK-NEXT: vadd.vv v8, v10, v8 2674; CHECK-NEXT: vsrl.vi v10, v8, 4 2675; CHECK-NEXT: vadd.vv v8, v8, v10 2676; CHECK-NEXT: vand.vx v8, v8, a0 2677; CHECK-NEXT: li a0, 257 2678; CHECK-NEXT: vmul.vx v8, v8, a0 2679; CHECK-NEXT: vsrl.vi v8, v8, 8 2680; CHECK-NEXT: ret 2681 %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 2682 ret <16 x i16> %v 2683} 2684 2685define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { 2686; CHECK-LABEL: vp_cttz_zero_undef_v2i32: 2687; CHECK: # %bb.0: 2688; CHECK-NEXT: li a1, 1 2689; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 2690; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2691; CHECK-NEXT: lui a0, 349525 2692; CHECK-NEXT: vnot.v v8, v8, v0.t 2693; CHECK-NEXT: addi a0, a0, 1365 2694; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2695; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2696; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2697; CHECK-NEXT: lui a0, 209715 2698; CHECK-NEXT: addi a0, a0, 819 2699; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2700; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2701; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2702; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2703; CHECK-NEXT: lui a0, 61681 2704; CHECK-NEXT: addi a0, a0, -241 2705; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2706; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2707; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2708; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2709; CHECK-NEXT: lui a0, 4112 2710; CHECK-NEXT: addi a0, a0, 257 2711; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2712; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 2713; CHECK-NEXT: ret 2714 %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) 2715 ret <2 x i32> %v 2716} 2717 2718define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { 2719; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked: 2720; CHECK: # %bb.0: 2721; CHECK-NEXT: li a1, 1 2722; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 2723; CHECK-NEXT: vnot.v v9, v8 2724; CHECK-NEXT: vsub.vx v8, v8, a1 2725; CHECK-NEXT: lui a0, 349525 2726; CHECK-NEXT: addi a0, a0, 1365 2727; CHECK-NEXT: vand.vv v8, v9, v8 2728; CHECK-NEXT: vsrl.vi v9, v8, 1 2729; CHECK-NEXT: vand.vx v9, v9, a0 2730; CHECK-NEXT: lui a0, 209715 2731; CHECK-NEXT: addi a0, a0, 819 2732; CHECK-NEXT: vsub.vv v8, v8, v9 2733; CHECK-NEXT: vand.vx v9, v8, a0 2734; CHECK-NEXT: vsrl.vi v8, v8, 2 2735; CHECK-NEXT: vand.vx v8, v8, a0 2736; CHECK-NEXT: lui a0, 61681 2737; CHECK-NEXT: addi a0, a0, -241 2738; CHECK-NEXT: vadd.vv v8, v9, v8 2739; CHECK-NEXT: vsrl.vi v9, v8, 4 2740; CHECK-NEXT: vadd.vv v8, v8, v9 2741; CHECK-NEXT: vand.vx v8, v8, a0 2742; CHECK-NEXT: lui a0, 4112 2743; CHECK-NEXT: addi a0, a0, 257 2744; CHECK-NEXT: vmul.vx v8, v8, a0 2745; CHECK-NEXT: vsrl.vi v8, v8, 24 2746; CHECK-NEXT: ret 2747 %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 2748 ret <2 x i32> %v 2749} 2750 2751define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { 2752; CHECK-LABEL: vp_cttz_zero_undef_v4i32: 2753; CHECK: # %bb.0: 2754; CHECK-NEXT: li a1, 1 2755; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 2756; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t 2757; CHECK-NEXT: lui a0, 349525 2758; CHECK-NEXT: vnot.v v8, v8, v0.t 2759; CHECK-NEXT: addi a0, a0, 1365 2760; CHECK-NEXT: vand.vv v8, v8, v9, v0.t 2761; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2762; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2763; CHECK-NEXT: lui a0, 209715 2764; CHECK-NEXT: addi a0, a0, 819 2765; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2766; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2767; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2768; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2769; CHECK-NEXT: lui a0, 61681 2770; CHECK-NEXT: addi a0, a0, -241 2771; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2772; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2773; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2774; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2775; CHECK-NEXT: lui a0, 4112 2776; CHECK-NEXT: addi a0, a0, 257 2777; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2778; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 2779; CHECK-NEXT: ret 2780 %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) 2781 ret <4 x i32> %v 2782} 2783 2784define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { 2785; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked: 2786; CHECK: # %bb.0: 2787; CHECK-NEXT: li a1, 1 2788; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 2789; CHECK-NEXT: vnot.v v9, v8 2790; CHECK-NEXT: vsub.vx v8, v8, a1 2791; CHECK-NEXT: lui a0, 349525 2792; CHECK-NEXT: addi a0, a0, 1365 2793; CHECK-NEXT: vand.vv v8, v9, v8 2794; CHECK-NEXT: vsrl.vi v9, v8, 1 2795; CHECK-NEXT: vand.vx v9, v9, a0 2796; CHECK-NEXT: lui a0, 209715 2797; CHECK-NEXT: addi a0, a0, 819 2798; CHECK-NEXT: vsub.vv v8, v8, v9 2799; CHECK-NEXT: vand.vx v9, v8, a0 2800; CHECK-NEXT: vsrl.vi v8, v8, 2 2801; CHECK-NEXT: vand.vx v8, v8, a0 2802; CHECK-NEXT: lui a0, 61681 2803; CHECK-NEXT: addi a0, a0, -241 2804; CHECK-NEXT: vadd.vv v8, v9, v8 2805; CHECK-NEXT: vsrl.vi v9, v8, 4 2806; CHECK-NEXT: vadd.vv v8, v8, v9 2807; CHECK-NEXT: vand.vx v8, v8, a0 2808; CHECK-NEXT: lui a0, 4112 2809; CHECK-NEXT: addi a0, a0, 257 2810; CHECK-NEXT: vmul.vx v8, v8, a0 2811; CHECK-NEXT: vsrl.vi v8, v8, 24 2812; CHECK-NEXT: ret 2813 %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 2814 ret <4 x i32> %v 2815} 2816 2817define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { 2818; CHECK-LABEL: vp_cttz_zero_undef_v8i32: 2819; CHECK: # %bb.0: 2820; CHECK-NEXT: li a1, 1 2821; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 2822; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t 2823; CHECK-NEXT: lui a0, 349525 2824; CHECK-NEXT: vnot.v v8, v8, v0.t 2825; CHECK-NEXT: addi a0, a0, 1365 2826; CHECK-NEXT: vand.vv v8, v8, v10, v0.t 2827; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 2828; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 2829; CHECK-NEXT: lui a0, 209715 2830; CHECK-NEXT: addi a0, a0, 819 2831; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 2832; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 2833; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2834; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2835; CHECK-NEXT: lui a0, 61681 2836; CHECK-NEXT: addi a0, a0, -241 2837; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 2838; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 2839; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 2840; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2841; CHECK-NEXT: lui a0, 4112 2842; CHECK-NEXT: addi a0, a0, 257 2843; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2844; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 2845; CHECK-NEXT: ret 2846 %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) 2847 ret <8 x i32> %v 2848} 2849 2850define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { 2851; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked: 2852; CHECK: # %bb.0: 2853; CHECK-NEXT: li a1, 1 2854; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 2855; CHECK-NEXT: vnot.v v10, v8 2856; CHECK-NEXT: vsub.vx v8, v8, a1 2857; CHECK-NEXT: lui a0, 349525 2858; CHECK-NEXT: addi a0, a0, 1365 2859; CHECK-NEXT: vand.vv v8, v10, v8 2860; CHECK-NEXT: vsrl.vi v10, v8, 1 2861; CHECK-NEXT: vand.vx v10, v10, a0 2862; CHECK-NEXT: lui a0, 209715 2863; CHECK-NEXT: addi a0, a0, 819 2864; CHECK-NEXT: vsub.vv v8, v8, v10 2865; CHECK-NEXT: vand.vx v10, v8, a0 2866; CHECK-NEXT: vsrl.vi v8, v8, 2 2867; CHECK-NEXT: vand.vx v8, v8, a0 2868; CHECK-NEXT: lui a0, 61681 2869; CHECK-NEXT: addi a0, a0, -241 2870; CHECK-NEXT: vadd.vv v8, v10, v8 2871; CHECK-NEXT: vsrl.vi v10, v8, 4 2872; CHECK-NEXT: vadd.vv v8, v8, v10 2873; CHECK-NEXT: vand.vx v8, v8, a0 2874; CHECK-NEXT: lui a0, 4112 2875; CHECK-NEXT: addi a0, a0, 257 2876; CHECK-NEXT: vmul.vx v8, v8, a0 2877; CHECK-NEXT: vsrl.vi v8, v8, 24 2878; CHECK-NEXT: ret 2879 %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 2880 ret <8 x i32> %v 2881} 2882 2883define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { 2884; CHECK-LABEL: vp_cttz_zero_undef_v16i32: 2885; CHECK: # %bb.0: 2886; CHECK-NEXT: li a1, 1 2887; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 2888; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t 2889; CHECK-NEXT: lui a0, 349525 2890; CHECK-NEXT: vnot.v v8, v8, v0.t 2891; CHECK-NEXT: addi a0, a0, 1365 2892; CHECK-NEXT: vand.vv v8, v8, v12, v0.t 2893; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 2894; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 2895; CHECK-NEXT: lui a0, 209715 2896; CHECK-NEXT: addi a0, a0, 819 2897; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 2898; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 2899; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2900; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2901; CHECK-NEXT: lui a0, 61681 2902; CHECK-NEXT: addi a0, a0, -241 2903; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 2904; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 2905; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 2906; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2907; CHECK-NEXT: lui a0, 4112 2908; CHECK-NEXT: addi a0, a0, 257 2909; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2910; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 2911; CHECK-NEXT: ret 2912 %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) 2913 ret <16 x i32> %v 2914} 2915 2916define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { 2917; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked: 2918; CHECK: # %bb.0: 2919; CHECK-NEXT: li a1, 1 2920; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 2921; CHECK-NEXT: vnot.v v12, v8 2922; CHECK-NEXT: vsub.vx v8, v8, a1 2923; CHECK-NEXT: lui a0, 349525 2924; CHECK-NEXT: addi a0, a0, 1365 2925; CHECK-NEXT: vand.vv v8, v12, v8 2926; CHECK-NEXT: vsrl.vi v12, v8, 1 2927; CHECK-NEXT: vand.vx v12, v12, a0 2928; CHECK-NEXT: lui a0, 209715 2929; CHECK-NEXT: addi a0, a0, 819 2930; CHECK-NEXT: vsub.vv v8, v8, v12 2931; CHECK-NEXT: vand.vx v12, v8, a0 2932; CHECK-NEXT: vsrl.vi v8, v8, 2 2933; CHECK-NEXT: vand.vx v8, v8, a0 2934; CHECK-NEXT: lui a0, 61681 2935; CHECK-NEXT: addi a0, a0, -241 2936; CHECK-NEXT: vadd.vv v8, v12, v8 2937; CHECK-NEXT: vsrl.vi v12, v8, 4 2938; CHECK-NEXT: vadd.vv v8, v8, v12 2939; CHECK-NEXT: vand.vx v8, v8, a0 2940; CHECK-NEXT: lui a0, 4112 2941; CHECK-NEXT: addi a0, a0, 257 2942; CHECK-NEXT: vmul.vx v8, v8, a0 2943; CHECK-NEXT: vsrl.vi v8, v8, 24 2944; CHECK-NEXT: ret 2945 %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 2946 ret <16 x i32> %v 2947} 2948 2949define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { 2950; RV32-LABEL: vp_cttz_zero_undef_v2i64: 2951; RV32: # %bb.0: 2952; RV32-NEXT: li a1, 1 2953; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 2954; RV32-NEXT: vsub.vx v9, v8, a1, v0.t 2955; RV32-NEXT: lui a1, 349525 2956; RV32-NEXT: addi a1, a1, 1365 2957; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2958; RV32-NEXT: vmv.v.x v10, a1 2959; RV32-NEXT: lui a1, 209715 2960; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 2961; RV32-NEXT: vnot.v v8, v8, v0.t 2962; RV32-NEXT: addi a1, a1, 819 2963; RV32-NEXT: vand.vv v8, v8, v9, v0.t 2964; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 2965; RV32-NEXT: vand.vv v9, v9, v10, v0.t 2966; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2967; RV32-NEXT: vmv.v.x v10, a1 2968; RV32-NEXT: lui a1, 61681 2969; RV32-NEXT: addi a1, a1, -241 2970; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 2971; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 2972; RV32-NEXT: vand.vv v9, v8, v10, v0.t 2973; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 2974; RV32-NEXT: vand.vv v8, v8, v10, v0.t 2975; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2976; RV32-NEXT: vmv.v.x v10, a1 2977; RV32-NEXT: lui a1, 4112 2978; RV32-NEXT: addi a1, a1, 257 2979; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 2980; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 2981; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 2982; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 2983; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2984; RV32-NEXT: vmv.v.x v9, a1 2985; RV32-NEXT: li a1, 56 2986; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 2987; RV32-NEXT: vand.vv v8, v8, v10, v0.t 2988; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 2989; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 2990; RV32-NEXT: ret 2991; 2992; RV64-LABEL: vp_cttz_zero_undef_v2i64: 2993; RV64: # %bb.0: 2994; RV64-NEXT: li a1, 1 2995; RV64-NEXT: lui a2, 349525 2996; RV64-NEXT: lui a3, 209715 2997; RV64-NEXT: lui a4, 61681 2998; RV64-NEXT: lui a5, 4112 2999; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3000; RV64-NEXT: vsub.vx v9, v8, a1, v0.t 3001; RV64-NEXT: addiw a0, a2, 1365 3002; RV64-NEXT: addiw a1, a3, 819 3003; RV64-NEXT: addiw a2, a4, -241 3004; RV64-NEXT: addiw a3, a5, 257 3005; RV64-NEXT: slli a4, a0, 32 3006; RV64-NEXT: add a0, a0, a4 3007; RV64-NEXT: slli a4, a1, 32 3008; RV64-NEXT: add a1, a1, a4 3009; RV64-NEXT: slli a4, a2, 32 3010; RV64-NEXT: add a2, a2, a4 3011; RV64-NEXT: slli a4, a3, 32 3012; RV64-NEXT: add a3, a3, a4 3013; RV64-NEXT: vnot.v v8, v8, v0.t 3014; RV64-NEXT: vand.vv v8, v8, v9, v0.t 3015; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 3016; RV64-NEXT: vand.vx v9, v9, a0, v0.t 3017; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 3018; RV64-NEXT: vand.vx v9, v8, a1, v0.t 3019; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3020; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3021; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 3022; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 3023; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 3024; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3025; RV64-NEXT: li a0, 56 3026; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3027; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3028; RV64-NEXT: ret 3029 %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) 3030 ret <2 x i64> %v 3031} 3032 3033define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { 3034; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked: 3035; RV32: # %bb.0: 3036; RV32-NEXT: li a1, 1 3037; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3038; RV32-NEXT: vnot.v v9, v8 3039; RV32-NEXT: vsub.vx v8, v8, a1 3040; RV32-NEXT: lui a1, 349525 3041; RV32-NEXT: addi a1, a1, 1365 3042; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3043; RV32-NEXT: vmv.v.x v10, a1 3044; RV32-NEXT: lui a1, 209715 3045; RV32-NEXT: addi a1, a1, 819 3046; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3047; RV32-NEXT: vand.vv v8, v9, v8 3048; RV32-NEXT: vsrl.vi v9, v8, 1 3049; RV32-NEXT: vand.vv v9, v9, v10 3050; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3051; RV32-NEXT: vmv.v.x v10, a1 3052; RV32-NEXT: lui a1, 61681 3053; RV32-NEXT: addi a1, a1, -241 3054; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3055; RV32-NEXT: vsub.vv v8, v8, v9 3056; RV32-NEXT: vand.vv v9, v8, v10 3057; RV32-NEXT: vsrl.vi v8, v8, 2 3058; RV32-NEXT: vand.vv v8, v8, v10 3059; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3060; RV32-NEXT: vmv.v.x v10, a1 3061; RV32-NEXT: lui a1, 4112 3062; RV32-NEXT: addi a1, a1, 257 3063; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3064; RV32-NEXT: vadd.vv v8, v9, v8 3065; RV32-NEXT: vsrl.vi v9, v8, 4 3066; RV32-NEXT: vadd.vv v8, v8, v9 3067; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3068; RV32-NEXT: vmv.v.x v9, a1 3069; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3070; RV32-NEXT: vand.vv v8, v8, v10 3071; RV32-NEXT: vmul.vv v8, v8, v9 3072; RV32-NEXT: li a0, 56 3073; RV32-NEXT: vsrl.vx v8, v8, a0 3074; RV32-NEXT: ret 3075; 3076; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked: 3077; RV64: # %bb.0: 3078; RV64-NEXT: li a1, 1 3079; RV64-NEXT: lui a2, 349525 3080; RV64-NEXT: lui a3, 209715 3081; RV64-NEXT: lui a4, 61681 3082; RV64-NEXT: lui a5, 4112 3083; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3084; RV64-NEXT: vsub.vx v9, v8, a1 3085; RV64-NEXT: addiw a0, a2, 1365 3086; RV64-NEXT: addiw a1, a3, 819 3087; RV64-NEXT: addiw a2, a4, -241 3088; RV64-NEXT: addiw a3, a5, 257 3089; RV64-NEXT: slli a4, a0, 32 3090; RV64-NEXT: add a0, a0, a4 3091; RV64-NEXT: slli a4, a1, 32 3092; RV64-NEXT: add a1, a1, a4 3093; RV64-NEXT: slli a4, a2, 32 3094; RV64-NEXT: add a2, a2, a4 3095; RV64-NEXT: slli a4, a3, 32 3096; RV64-NEXT: add a3, a3, a4 3097; RV64-NEXT: vnot.v v8, v8 3098; RV64-NEXT: vand.vv v8, v8, v9 3099; RV64-NEXT: vsrl.vi v9, v8, 1 3100; RV64-NEXT: vand.vx v9, v9, a0 3101; RV64-NEXT: vsub.vv v8, v8, v9 3102; RV64-NEXT: vand.vx v9, v8, a1 3103; RV64-NEXT: vsrl.vi v8, v8, 2 3104; RV64-NEXT: vand.vx v8, v8, a1 3105; RV64-NEXT: vadd.vv v8, v9, v8 3106; RV64-NEXT: vsrl.vi v9, v8, 4 3107; RV64-NEXT: vadd.vv v8, v8, v9 3108; RV64-NEXT: vand.vx v8, v8, a2 3109; RV64-NEXT: vmul.vx v8, v8, a3 3110; RV64-NEXT: li a0, 56 3111; RV64-NEXT: vsrl.vx v8, v8, a0 3112; RV64-NEXT: ret 3113 %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 3114 ret <2 x i64> %v 3115} 3116 3117define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { 3118; RV32-LABEL: vp_cttz_zero_undef_v4i64: 3119; RV32: # %bb.0: 3120; RV32-NEXT: li a1, 1 3121; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3122; RV32-NEXT: vsub.vx v10, v8, a1, v0.t 3123; RV32-NEXT: lui a1, 349525 3124; RV32-NEXT: addi a1, a1, 1365 3125; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3126; RV32-NEXT: vmv.v.x v12, a1 3127; RV32-NEXT: lui a1, 209715 3128; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3129; RV32-NEXT: vnot.v v8, v8, v0.t 3130; RV32-NEXT: addi a1, a1, 819 3131; RV32-NEXT: vand.vv v8, v8, v10, v0.t 3132; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 3133; RV32-NEXT: vand.vv v10, v10, v12, v0.t 3134; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3135; RV32-NEXT: vmv.v.x v12, a1 3136; RV32-NEXT: lui a1, 61681 3137; RV32-NEXT: addi a1, a1, -241 3138; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3139; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 3140; RV32-NEXT: vand.vv v10, v8, v12, v0.t 3141; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 3142; RV32-NEXT: vand.vv v8, v8, v12, v0.t 3143; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3144; RV32-NEXT: vmv.v.x v12, a1 3145; RV32-NEXT: lui a1, 4112 3146; RV32-NEXT: addi a1, a1, 257 3147; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3148; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 3149; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 3150; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 3151; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3152; RV32-NEXT: vmv.v.x v10, a1 3153; RV32-NEXT: li a1, 56 3154; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3155; RV32-NEXT: vand.vv v8, v8, v12, v0.t 3156; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 3157; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 3158; RV32-NEXT: ret 3159; 3160; RV64-LABEL: vp_cttz_zero_undef_v4i64: 3161; RV64: # %bb.0: 3162; RV64-NEXT: li a1, 1 3163; RV64-NEXT: lui a2, 349525 3164; RV64-NEXT: lui a3, 209715 3165; RV64-NEXT: lui a4, 61681 3166; RV64-NEXT: lui a5, 4112 3167; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3168; RV64-NEXT: vsub.vx v10, v8, a1, v0.t 3169; RV64-NEXT: addiw a0, a2, 1365 3170; RV64-NEXT: addiw a1, a3, 819 3171; RV64-NEXT: addiw a2, a4, -241 3172; RV64-NEXT: addiw a3, a5, 257 3173; RV64-NEXT: slli a4, a0, 32 3174; RV64-NEXT: add a0, a0, a4 3175; RV64-NEXT: slli a4, a1, 32 3176; RV64-NEXT: add a1, a1, a4 3177; RV64-NEXT: slli a4, a2, 32 3178; RV64-NEXT: add a2, a2, a4 3179; RV64-NEXT: slli a4, a3, 32 3180; RV64-NEXT: add a3, a3, a4 3181; RV64-NEXT: vnot.v v8, v8, v0.t 3182; RV64-NEXT: vand.vv v8, v8, v10, v0.t 3183; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 3184; RV64-NEXT: vand.vx v10, v10, a0, v0.t 3185; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 3186; RV64-NEXT: vand.vx v10, v8, a1, v0.t 3187; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3188; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3189; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 3190; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 3191; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 3192; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3193; RV64-NEXT: li a0, 56 3194; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3195; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3196; RV64-NEXT: ret 3197 %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) 3198 ret <4 x i64> %v 3199} 3200 3201define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { 3202; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked: 3203; RV32: # %bb.0: 3204; RV32-NEXT: li a1, 1 3205; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3206; RV32-NEXT: vnot.v v10, v8 3207; RV32-NEXT: vsub.vx v8, v8, a1 3208; RV32-NEXT: lui a1, 349525 3209; RV32-NEXT: addi a1, a1, 1365 3210; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3211; RV32-NEXT: vmv.v.x v12, a1 3212; RV32-NEXT: lui a1, 209715 3213; RV32-NEXT: addi a1, a1, 819 3214; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3215; RV32-NEXT: vand.vv v8, v10, v8 3216; RV32-NEXT: vsrl.vi v10, v8, 1 3217; RV32-NEXT: vand.vv v10, v10, v12 3218; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3219; RV32-NEXT: vmv.v.x v12, a1 3220; RV32-NEXT: lui a1, 61681 3221; RV32-NEXT: addi a1, a1, -241 3222; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3223; RV32-NEXT: vsub.vv v8, v8, v10 3224; RV32-NEXT: vand.vv v10, v8, v12 3225; RV32-NEXT: vsrl.vi v8, v8, 2 3226; RV32-NEXT: vand.vv v8, v8, v12 3227; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3228; RV32-NEXT: vmv.v.x v12, a1 3229; RV32-NEXT: lui a1, 4112 3230; RV32-NEXT: addi a1, a1, 257 3231; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3232; RV32-NEXT: vadd.vv v8, v10, v8 3233; RV32-NEXT: vsrl.vi v10, v8, 4 3234; RV32-NEXT: vadd.vv v8, v8, v10 3235; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3236; RV32-NEXT: vmv.v.x v10, a1 3237; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3238; RV32-NEXT: vand.vv v8, v8, v12 3239; RV32-NEXT: vmul.vv v8, v8, v10 3240; RV32-NEXT: li a0, 56 3241; RV32-NEXT: vsrl.vx v8, v8, a0 3242; RV32-NEXT: ret 3243; 3244; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked: 3245; RV64: # %bb.0: 3246; RV64-NEXT: li a1, 1 3247; RV64-NEXT: lui a2, 349525 3248; RV64-NEXT: lui a3, 209715 3249; RV64-NEXT: lui a4, 61681 3250; RV64-NEXT: lui a5, 4112 3251; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3252; RV64-NEXT: vsub.vx v10, v8, a1 3253; RV64-NEXT: addiw a0, a2, 1365 3254; RV64-NEXT: addiw a1, a3, 819 3255; RV64-NEXT: addiw a2, a4, -241 3256; RV64-NEXT: addiw a3, a5, 257 3257; RV64-NEXT: slli a4, a0, 32 3258; RV64-NEXT: add a0, a0, a4 3259; RV64-NEXT: slli a4, a1, 32 3260; RV64-NEXT: add a1, a1, a4 3261; RV64-NEXT: slli a4, a2, 32 3262; RV64-NEXT: add a2, a2, a4 3263; RV64-NEXT: slli a4, a3, 32 3264; RV64-NEXT: add a3, a3, a4 3265; RV64-NEXT: vnot.v v8, v8 3266; RV64-NEXT: vand.vv v8, v8, v10 3267; RV64-NEXT: vsrl.vi v10, v8, 1 3268; RV64-NEXT: vand.vx v10, v10, a0 3269; RV64-NEXT: vsub.vv v8, v8, v10 3270; RV64-NEXT: vand.vx v10, v8, a1 3271; RV64-NEXT: vsrl.vi v8, v8, 2 3272; RV64-NEXT: vand.vx v8, v8, a1 3273; RV64-NEXT: vadd.vv v8, v10, v8 3274; RV64-NEXT: vsrl.vi v10, v8, 4 3275; RV64-NEXT: vadd.vv v8, v8, v10 3276; RV64-NEXT: vand.vx v8, v8, a2 3277; RV64-NEXT: vmul.vx v8, v8, a3 3278; RV64-NEXT: li a0, 56 3279; RV64-NEXT: vsrl.vx v8, v8, a0 3280; RV64-NEXT: ret 3281 %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 3282 ret <4 x i64> %v 3283} 3284 3285define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { 3286; RV32-LABEL: vp_cttz_zero_undef_v8i64: 3287; RV32: # %bb.0: 3288; RV32-NEXT: li a1, 1 3289; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3290; RV32-NEXT: vsub.vx v12, v8, a1, v0.t 3291; RV32-NEXT: lui a1, 349525 3292; RV32-NEXT: addi a1, a1, 1365 3293; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3294; RV32-NEXT: vmv.v.x v16, a1 3295; RV32-NEXT: lui a1, 209715 3296; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3297; RV32-NEXT: vnot.v v8, v8, v0.t 3298; RV32-NEXT: addi a1, a1, 819 3299; RV32-NEXT: vand.vv v12, v8, v12, v0.t 3300; RV32-NEXT: vsrl.vi v8, v12, 1, v0.t 3301; RV32-NEXT: vand.vv v16, v8, v16, v0.t 3302; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3303; RV32-NEXT: vmv.v.x v8, a1 3304; RV32-NEXT: lui a1, 61681 3305; RV32-NEXT: addi a1, a1, -241 3306; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3307; RV32-NEXT: vsub.vv v12, v12, v16, v0.t 3308; RV32-NEXT: vand.vv v16, v12, v8, v0.t 3309; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t 3310; RV32-NEXT: vand.vv v8, v12, v8, v0.t 3311; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3312; RV32-NEXT: vmv.v.x v12, a1 3313; RV32-NEXT: lui a1, 4112 3314; RV32-NEXT: addi a1, a1, 257 3315; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3316; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 3317; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 3318; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 3319; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3320; RV32-NEXT: vmv.v.x v16, a1 3321; RV32-NEXT: li a1, 56 3322; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3323; RV32-NEXT: vand.vv v8, v8, v12, v0.t 3324; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 3325; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 3326; RV32-NEXT: ret 3327; 3328; RV64-LABEL: vp_cttz_zero_undef_v8i64: 3329; RV64: # %bb.0: 3330; RV64-NEXT: li a1, 1 3331; RV64-NEXT: lui a2, 349525 3332; RV64-NEXT: lui a3, 209715 3333; RV64-NEXT: lui a4, 61681 3334; RV64-NEXT: lui a5, 4112 3335; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3336; RV64-NEXT: vsub.vx v12, v8, a1, v0.t 3337; RV64-NEXT: addiw a0, a2, 1365 3338; RV64-NEXT: addiw a1, a3, 819 3339; RV64-NEXT: addiw a2, a4, -241 3340; RV64-NEXT: addiw a3, a5, 257 3341; RV64-NEXT: slli a4, a0, 32 3342; RV64-NEXT: add a0, a0, a4 3343; RV64-NEXT: slli a4, a1, 32 3344; RV64-NEXT: add a1, a1, a4 3345; RV64-NEXT: slli a4, a2, 32 3346; RV64-NEXT: add a2, a2, a4 3347; RV64-NEXT: slli a4, a3, 32 3348; RV64-NEXT: add a3, a3, a4 3349; RV64-NEXT: vnot.v v8, v8, v0.t 3350; RV64-NEXT: vand.vv v8, v8, v12, v0.t 3351; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 3352; RV64-NEXT: vand.vx v12, v12, a0, v0.t 3353; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 3354; RV64-NEXT: vand.vx v12, v8, a1, v0.t 3355; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3356; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3357; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 3358; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 3359; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 3360; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3361; RV64-NEXT: li a0, 56 3362; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3363; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3364; RV64-NEXT: ret 3365 %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) 3366 ret <8 x i64> %v 3367} 3368 3369define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { 3370; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked: 3371; RV32: # %bb.0: 3372; RV32-NEXT: li a1, 1 3373; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3374; RV32-NEXT: vnot.v v12, v8 3375; RV32-NEXT: vsub.vx v8, v8, a1 3376; RV32-NEXT: lui a1, 349525 3377; RV32-NEXT: addi a1, a1, 1365 3378; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3379; RV32-NEXT: vmv.v.x v16, a1 3380; RV32-NEXT: lui a1, 209715 3381; RV32-NEXT: addi a1, a1, 819 3382; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3383; RV32-NEXT: vand.vv v8, v12, v8 3384; RV32-NEXT: vsrl.vi v12, v8, 1 3385; RV32-NEXT: vand.vv v12, v12, v16 3386; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3387; RV32-NEXT: vmv.v.x v16, a1 3388; RV32-NEXT: lui a1, 61681 3389; RV32-NEXT: addi a1, a1, -241 3390; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3391; RV32-NEXT: vsub.vv v8, v8, v12 3392; RV32-NEXT: vand.vv v12, v8, v16 3393; RV32-NEXT: vsrl.vi v8, v8, 2 3394; RV32-NEXT: vand.vv v8, v8, v16 3395; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3396; RV32-NEXT: vmv.v.x v16, a1 3397; RV32-NEXT: lui a1, 4112 3398; RV32-NEXT: addi a1, a1, 257 3399; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3400; RV32-NEXT: vadd.vv v8, v12, v8 3401; RV32-NEXT: vsrl.vi v12, v8, 4 3402; RV32-NEXT: vadd.vv v8, v8, v12 3403; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 3404; RV32-NEXT: vmv.v.x v12, a1 3405; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3406; RV32-NEXT: vand.vv v8, v8, v16 3407; RV32-NEXT: vmul.vv v8, v8, v12 3408; RV32-NEXT: li a0, 56 3409; RV32-NEXT: vsrl.vx v8, v8, a0 3410; RV32-NEXT: ret 3411; 3412; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked: 3413; RV64: # %bb.0: 3414; RV64-NEXT: li a1, 1 3415; RV64-NEXT: lui a2, 349525 3416; RV64-NEXT: lui a3, 209715 3417; RV64-NEXT: lui a4, 61681 3418; RV64-NEXT: lui a5, 4112 3419; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 3420; RV64-NEXT: vsub.vx v12, v8, a1 3421; RV64-NEXT: addiw a0, a2, 1365 3422; RV64-NEXT: addiw a1, a3, 819 3423; RV64-NEXT: addiw a2, a4, -241 3424; RV64-NEXT: addiw a3, a5, 257 3425; RV64-NEXT: slli a4, a0, 32 3426; RV64-NEXT: add a0, a0, a4 3427; RV64-NEXT: slli a4, a1, 32 3428; RV64-NEXT: add a1, a1, a4 3429; RV64-NEXT: slli a4, a2, 32 3430; RV64-NEXT: add a2, a2, a4 3431; RV64-NEXT: slli a4, a3, 32 3432; RV64-NEXT: add a3, a3, a4 3433; RV64-NEXT: vnot.v v8, v8 3434; RV64-NEXT: vand.vv v8, v8, v12 3435; RV64-NEXT: vsrl.vi v12, v8, 1 3436; RV64-NEXT: vand.vx v12, v12, a0 3437; RV64-NEXT: vsub.vv v8, v8, v12 3438; RV64-NEXT: vand.vx v12, v8, a1 3439; RV64-NEXT: vsrl.vi v8, v8, 2 3440; RV64-NEXT: vand.vx v8, v8, a1 3441; RV64-NEXT: vadd.vv v8, v12, v8 3442; RV64-NEXT: vsrl.vi v12, v8, 4 3443; RV64-NEXT: vadd.vv v8, v8, v12 3444; RV64-NEXT: vand.vx v8, v8, a2 3445; RV64-NEXT: vmul.vx v8, v8, a3 3446; RV64-NEXT: li a0, 56 3447; RV64-NEXT: vsrl.vx v8, v8, a0 3448; RV64-NEXT: ret 3449 %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 3450 ret <8 x i64> %v 3451} 3452 3453define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { 3454; RV32-LABEL: vp_cttz_zero_undef_v15i64: 3455; RV32: # %bb.0: 3456; RV32-NEXT: addi sp, sp, -48 3457; RV32-NEXT: .cfi_def_cfa_offset 48 3458; RV32-NEXT: csrr a1, vlenb 3459; RV32-NEXT: slli a1, a1, 4 3460; RV32-NEXT: sub sp, sp, a1 3461; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 3462; RV32-NEXT: li a1, 1 3463; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3464; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 3465; RV32-NEXT: lui a1, 349525 3466; RV32-NEXT: addi a1, a1, 1365 3467; RV32-NEXT: sw a1, 40(sp) 3468; RV32-NEXT: sw a1, 44(sp) 3469; RV32-NEXT: lui a1, 209715 3470; RV32-NEXT: addi a1, a1, 819 3471; RV32-NEXT: sw a1, 32(sp) 3472; RV32-NEXT: sw a1, 36(sp) 3473; RV32-NEXT: lui a1, 61681 3474; RV32-NEXT: addi a1, a1, -241 3475; RV32-NEXT: sw a1, 24(sp) 3476; RV32-NEXT: sw a1, 28(sp) 3477; RV32-NEXT: lui a1, 4112 3478; RV32-NEXT: addi a1, a1, 257 3479; RV32-NEXT: sw a1, 16(sp) 3480; RV32-NEXT: sw a1, 20(sp) 3481; RV32-NEXT: addi a1, sp, 40 3482; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3483; RV32-NEXT: vlse64.v v24, (a1), zero 3484; RV32-NEXT: csrr a1, vlenb 3485; RV32-NEXT: slli a1, a1, 3 3486; RV32-NEXT: add a1, sp, a1 3487; RV32-NEXT: addi a1, a1, 48 3488; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 3489; RV32-NEXT: addi a1, sp, 32 3490; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3491; RV32-NEXT: vnot.v v8, v8, v0.t 3492; RV32-NEXT: vand.vv v16, v8, v16, v0.t 3493; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 3494; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3495; RV32-NEXT: vlse64.v v24, (a1), zero 3496; RV32-NEXT: addi a1, sp, 48 3497; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 3498; RV32-NEXT: csrr a1, vlenb 3499; RV32-NEXT: slli a1, a1, 3 3500; RV32-NEXT: add a1, sp, a1 3501; RV32-NEXT: addi a1, a1, 48 3502; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 3503; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3504; RV32-NEXT: vand.vv v8, v8, v24, v0.t 3505; RV32-NEXT: addi a1, sp, 24 3506; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 3507; RV32-NEXT: addi a2, sp, 48 3508; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 3509; RV32-NEXT: vand.vv v16, v8, v24, v0.t 3510; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 3511; RV32-NEXT: vand.vv v24, v8, v24, v0.t 3512; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3513; RV32-NEXT: vlse64.v v8, (a1), zero 3514; RV32-NEXT: csrr a1, vlenb 3515; RV32-NEXT: slli a1, a1, 3 3516; RV32-NEXT: add a1, sp, a1 3517; RV32-NEXT: addi a1, a1, 48 3518; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 3519; RV32-NEXT: addi a1, sp, 16 3520; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3521; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 3522; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3523; RV32-NEXT: vlse64.v v16, (a1), zero 3524; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3525; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 3526; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 3527; RV32-NEXT: li a0, 56 3528; RV32-NEXT: csrr a1, vlenb 3529; RV32-NEXT: slli a1, a1, 3 3530; RV32-NEXT: add a1, sp, a1 3531; RV32-NEXT: addi a1, a1, 48 3532; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 3533; RV32-NEXT: vand.vv v8, v8, v24, v0.t 3534; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 3535; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 3536; RV32-NEXT: csrr a0, vlenb 3537; RV32-NEXT: slli a0, a0, 4 3538; RV32-NEXT: add sp, sp, a0 3539; RV32-NEXT: .cfi_def_cfa sp, 48 3540; RV32-NEXT: addi sp, sp, 48 3541; RV32-NEXT: .cfi_def_cfa_offset 0 3542; RV32-NEXT: ret 3543; 3544; RV64-LABEL: vp_cttz_zero_undef_v15i64: 3545; RV64: # %bb.0: 3546; RV64-NEXT: li a1, 1 3547; RV64-NEXT: lui a2, 349525 3548; RV64-NEXT: lui a3, 209715 3549; RV64-NEXT: lui a4, 61681 3550; RV64-NEXT: lui a5, 4112 3551; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3552; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 3553; RV64-NEXT: addiw a0, a2, 1365 3554; RV64-NEXT: addiw a1, a3, 819 3555; RV64-NEXT: addiw a2, a4, -241 3556; RV64-NEXT: addiw a3, a5, 257 3557; RV64-NEXT: slli a4, a0, 32 3558; RV64-NEXT: add a0, a0, a4 3559; RV64-NEXT: slli a4, a1, 32 3560; RV64-NEXT: add a1, a1, a4 3561; RV64-NEXT: slli a4, a2, 32 3562; RV64-NEXT: add a2, a2, a4 3563; RV64-NEXT: slli a4, a3, 32 3564; RV64-NEXT: add a3, a3, a4 3565; RV64-NEXT: vnot.v v8, v8, v0.t 3566; RV64-NEXT: vand.vv v8, v8, v16, v0.t 3567; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 3568; RV64-NEXT: vand.vx v16, v16, a0, v0.t 3569; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 3570; RV64-NEXT: vand.vx v16, v8, a1, v0.t 3571; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3572; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3573; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 3574; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 3575; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 3576; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3577; RV64-NEXT: li a0, 56 3578; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3579; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3580; RV64-NEXT: ret 3581 %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) 3582 ret <15 x i64> %v 3583} 3584 3585define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { 3586; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked: 3587; RV32: # %bb.0: 3588; RV32-NEXT: addi sp, sp, -32 3589; RV32-NEXT: .cfi_def_cfa_offset 32 3590; RV32-NEXT: li a1, 1 3591; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3592; RV32-NEXT: vsub.vx v16, v8, a1 3593; RV32-NEXT: lui a1, 349525 3594; RV32-NEXT: addi a1, a1, 1365 3595; RV32-NEXT: sw a1, 24(sp) 3596; RV32-NEXT: sw a1, 28(sp) 3597; RV32-NEXT: lui a1, 209715 3598; RV32-NEXT: addi a1, a1, 819 3599; RV32-NEXT: sw a1, 16(sp) 3600; RV32-NEXT: sw a1, 20(sp) 3601; RV32-NEXT: lui a1, 61681 3602; RV32-NEXT: addi a1, a1, -241 3603; RV32-NEXT: sw a1, 8(sp) 3604; RV32-NEXT: sw a1, 12(sp) 3605; RV32-NEXT: lui a1, 4112 3606; RV32-NEXT: vnot.v v8, v8 3607; RV32-NEXT: addi a1, a1, 257 3608; RV32-NEXT: sw a1, 0(sp) 3609; RV32-NEXT: sw a1, 4(sp) 3610; RV32-NEXT: addi a1, sp, 24 3611; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3612; RV32-NEXT: vlse64.v v0, (a1), zero 3613; RV32-NEXT: addi a1, sp, 16 3614; RV32-NEXT: vlse64.v v24, (a1), zero 3615; RV32-NEXT: addi a1, sp, 8 3616; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3617; RV32-NEXT: vand.vv v16, v8, v16 3618; RV32-NEXT: vsrl.vi v8, v16, 1 3619; RV32-NEXT: vand.vv v0, v8, v0 3620; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3621; RV32-NEXT: vlse64.v v8, (a1), zero 3622; RV32-NEXT: mv a1, sp 3623; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3624; RV32-NEXT: vsub.vv v16, v16, v0 3625; RV32-NEXT: vand.vv v0, v16, v24 3626; RV32-NEXT: vsrl.vi v16, v16, 2 3627; RV32-NEXT: vand.vv v16, v16, v24 3628; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3629; RV32-NEXT: vlse64.v v24, (a1), zero 3630; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3631; RV32-NEXT: vadd.vv v16, v0, v16 3632; RV32-NEXT: vsrl.vi v0, v16, 4 3633; RV32-NEXT: vadd.vv v16, v16, v0 3634; RV32-NEXT: vand.vv v8, v16, v8 3635; RV32-NEXT: vmul.vv v8, v8, v24 3636; RV32-NEXT: li a0, 56 3637; RV32-NEXT: vsrl.vx v8, v8, a0 3638; RV32-NEXT: addi sp, sp, 32 3639; RV32-NEXT: .cfi_def_cfa_offset 0 3640; RV32-NEXT: ret 3641; 3642; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked: 3643; RV64: # %bb.0: 3644; RV64-NEXT: li a1, 1 3645; RV64-NEXT: lui a2, 349525 3646; RV64-NEXT: lui a3, 209715 3647; RV64-NEXT: lui a4, 61681 3648; RV64-NEXT: lui a5, 4112 3649; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3650; RV64-NEXT: vsub.vx v16, v8, a1 3651; RV64-NEXT: addiw a0, a2, 1365 3652; RV64-NEXT: addiw a1, a3, 819 3653; RV64-NEXT: addiw a2, a4, -241 3654; RV64-NEXT: addiw a3, a5, 257 3655; RV64-NEXT: slli a4, a0, 32 3656; RV64-NEXT: add a0, a0, a4 3657; RV64-NEXT: slli a4, a1, 32 3658; RV64-NEXT: add a1, a1, a4 3659; RV64-NEXT: slli a4, a2, 32 3660; RV64-NEXT: add a2, a2, a4 3661; RV64-NEXT: slli a4, a3, 32 3662; RV64-NEXT: add a3, a3, a4 3663; RV64-NEXT: vnot.v v8, v8 3664; RV64-NEXT: vand.vv v8, v8, v16 3665; RV64-NEXT: vsrl.vi v16, v8, 1 3666; RV64-NEXT: vand.vx v16, v16, a0 3667; RV64-NEXT: vsub.vv v8, v8, v16 3668; RV64-NEXT: vand.vx v16, v8, a1 3669; RV64-NEXT: vsrl.vi v8, v8, 2 3670; RV64-NEXT: vand.vx v8, v8, a1 3671; RV64-NEXT: vadd.vv v8, v16, v8 3672; RV64-NEXT: vsrl.vi v16, v8, 4 3673; RV64-NEXT: vadd.vv v8, v8, v16 3674; RV64-NEXT: vand.vx v8, v8, a2 3675; RV64-NEXT: vmul.vx v8, v8, a3 3676; RV64-NEXT: li a0, 56 3677; RV64-NEXT: vsrl.vx v8, v8, a0 3678; RV64-NEXT: ret 3679 %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> splat (i1 true), i32 %evl) 3680 ret <15 x i64> %v 3681} 3682 3683define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { 3684; RV32-LABEL: vp_cttz_zero_undef_v16i64: 3685; RV32: # %bb.0: 3686; RV32-NEXT: addi sp, sp, -48 3687; RV32-NEXT: .cfi_def_cfa_offset 48 3688; RV32-NEXT: csrr a1, vlenb 3689; RV32-NEXT: slli a1, a1, 4 3690; RV32-NEXT: sub sp, sp, a1 3691; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 3692; RV32-NEXT: li a1, 1 3693; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3694; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 3695; RV32-NEXT: lui a1, 349525 3696; RV32-NEXT: addi a1, a1, 1365 3697; RV32-NEXT: sw a1, 40(sp) 3698; RV32-NEXT: sw a1, 44(sp) 3699; RV32-NEXT: lui a1, 209715 3700; RV32-NEXT: addi a1, a1, 819 3701; RV32-NEXT: sw a1, 32(sp) 3702; RV32-NEXT: sw a1, 36(sp) 3703; RV32-NEXT: lui a1, 61681 3704; RV32-NEXT: addi a1, a1, -241 3705; RV32-NEXT: sw a1, 24(sp) 3706; RV32-NEXT: sw a1, 28(sp) 3707; RV32-NEXT: lui a1, 4112 3708; RV32-NEXT: addi a1, a1, 257 3709; RV32-NEXT: sw a1, 16(sp) 3710; RV32-NEXT: sw a1, 20(sp) 3711; RV32-NEXT: addi a1, sp, 40 3712; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3713; RV32-NEXT: vlse64.v v24, (a1), zero 3714; RV32-NEXT: csrr a1, vlenb 3715; RV32-NEXT: slli a1, a1, 3 3716; RV32-NEXT: add a1, sp, a1 3717; RV32-NEXT: addi a1, a1, 48 3718; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 3719; RV32-NEXT: addi a1, sp, 32 3720; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3721; RV32-NEXT: vnot.v v8, v8, v0.t 3722; RV32-NEXT: vand.vv v16, v8, v16, v0.t 3723; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 3724; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3725; RV32-NEXT: vlse64.v v24, (a1), zero 3726; RV32-NEXT: addi a1, sp, 48 3727; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 3728; RV32-NEXT: csrr a1, vlenb 3729; RV32-NEXT: slli a1, a1, 3 3730; RV32-NEXT: add a1, sp, a1 3731; RV32-NEXT: addi a1, a1, 48 3732; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 3733; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3734; RV32-NEXT: vand.vv v8, v8, v24, v0.t 3735; RV32-NEXT: addi a1, sp, 24 3736; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 3737; RV32-NEXT: addi a2, sp, 48 3738; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 3739; RV32-NEXT: vand.vv v16, v8, v24, v0.t 3740; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 3741; RV32-NEXT: vand.vv v24, v8, v24, v0.t 3742; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3743; RV32-NEXT: vlse64.v v8, (a1), zero 3744; RV32-NEXT: csrr a1, vlenb 3745; RV32-NEXT: slli a1, a1, 3 3746; RV32-NEXT: add a1, sp, a1 3747; RV32-NEXT: addi a1, a1, 48 3748; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 3749; RV32-NEXT: addi a1, sp, 16 3750; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3751; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 3752; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3753; RV32-NEXT: vlse64.v v16, (a1), zero 3754; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3755; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 3756; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 3757; RV32-NEXT: li a0, 56 3758; RV32-NEXT: csrr a1, vlenb 3759; RV32-NEXT: slli a1, a1, 3 3760; RV32-NEXT: add a1, sp, a1 3761; RV32-NEXT: addi a1, a1, 48 3762; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 3763; RV32-NEXT: vand.vv v8, v8, v24, v0.t 3764; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 3765; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 3766; RV32-NEXT: csrr a0, vlenb 3767; RV32-NEXT: slli a0, a0, 4 3768; RV32-NEXT: add sp, sp, a0 3769; RV32-NEXT: .cfi_def_cfa sp, 48 3770; RV32-NEXT: addi sp, sp, 48 3771; RV32-NEXT: .cfi_def_cfa_offset 0 3772; RV32-NEXT: ret 3773; 3774; RV64-LABEL: vp_cttz_zero_undef_v16i64: 3775; RV64: # %bb.0: 3776; RV64-NEXT: li a1, 1 3777; RV64-NEXT: lui a2, 349525 3778; RV64-NEXT: lui a3, 209715 3779; RV64-NEXT: lui a4, 61681 3780; RV64-NEXT: lui a5, 4112 3781; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3782; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 3783; RV64-NEXT: addiw a0, a2, 1365 3784; RV64-NEXT: addiw a1, a3, 819 3785; RV64-NEXT: addiw a2, a4, -241 3786; RV64-NEXT: addiw a3, a5, 257 3787; RV64-NEXT: slli a4, a0, 32 3788; RV64-NEXT: add a0, a0, a4 3789; RV64-NEXT: slli a4, a1, 32 3790; RV64-NEXT: add a1, a1, a4 3791; RV64-NEXT: slli a4, a2, 32 3792; RV64-NEXT: add a2, a2, a4 3793; RV64-NEXT: slli a4, a3, 32 3794; RV64-NEXT: add a3, a3, a4 3795; RV64-NEXT: vnot.v v8, v8, v0.t 3796; RV64-NEXT: vand.vv v8, v8, v16, v0.t 3797; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 3798; RV64-NEXT: vand.vx v16, v16, a0, v0.t 3799; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 3800; RV64-NEXT: vand.vx v16, v8, a1, v0.t 3801; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3802; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3803; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 3804; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 3805; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 3806; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3807; RV64-NEXT: li a0, 56 3808; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3809; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3810; RV64-NEXT: ret 3811 %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) 3812 ret <16 x i64> %v 3813} 3814 3815define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { 3816; RV32-LABEL: vp_cttz_zero_undef_v16i64_unmasked: 3817; RV32: # %bb.0: 3818; RV32-NEXT: addi sp, sp, -32 3819; RV32-NEXT: .cfi_def_cfa_offset 32 3820; RV32-NEXT: li a1, 1 3821; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3822; RV32-NEXT: vsub.vx v16, v8, a1 3823; RV32-NEXT: lui a1, 349525 3824; RV32-NEXT: addi a1, a1, 1365 3825; RV32-NEXT: sw a1, 24(sp) 3826; RV32-NEXT: sw a1, 28(sp) 3827; RV32-NEXT: lui a1, 209715 3828; RV32-NEXT: addi a1, a1, 819 3829; RV32-NEXT: sw a1, 16(sp) 3830; RV32-NEXT: sw a1, 20(sp) 3831; RV32-NEXT: lui a1, 61681 3832; RV32-NEXT: addi a1, a1, -241 3833; RV32-NEXT: sw a1, 8(sp) 3834; RV32-NEXT: sw a1, 12(sp) 3835; RV32-NEXT: lui a1, 4112 3836; RV32-NEXT: vnot.v v8, v8 3837; RV32-NEXT: addi a1, a1, 257 3838; RV32-NEXT: sw a1, 0(sp) 3839; RV32-NEXT: sw a1, 4(sp) 3840; RV32-NEXT: addi a1, sp, 24 3841; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3842; RV32-NEXT: vlse64.v v0, (a1), zero 3843; RV32-NEXT: addi a1, sp, 16 3844; RV32-NEXT: vlse64.v v24, (a1), zero 3845; RV32-NEXT: addi a1, sp, 8 3846; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3847; RV32-NEXT: vand.vv v16, v8, v16 3848; RV32-NEXT: vsrl.vi v8, v16, 1 3849; RV32-NEXT: vand.vv v0, v8, v0 3850; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3851; RV32-NEXT: vlse64.v v8, (a1), zero 3852; RV32-NEXT: mv a1, sp 3853; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3854; RV32-NEXT: vsub.vv v16, v16, v0 3855; RV32-NEXT: vand.vv v0, v16, v24 3856; RV32-NEXT: vsrl.vi v16, v16, 2 3857; RV32-NEXT: vand.vv v16, v16, v24 3858; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3859; RV32-NEXT: vlse64.v v24, (a1), zero 3860; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3861; RV32-NEXT: vadd.vv v16, v0, v16 3862; RV32-NEXT: vsrl.vi v0, v16, 4 3863; RV32-NEXT: vadd.vv v16, v16, v0 3864; RV32-NEXT: vand.vv v8, v16, v8 3865; RV32-NEXT: vmul.vv v8, v8, v24 3866; RV32-NEXT: li a0, 56 3867; RV32-NEXT: vsrl.vx v8, v8, a0 3868; RV32-NEXT: addi sp, sp, 32 3869; RV32-NEXT: .cfi_def_cfa_offset 0 3870; RV32-NEXT: ret 3871; 3872; RV64-LABEL: vp_cttz_zero_undef_v16i64_unmasked: 3873; RV64: # %bb.0: 3874; RV64-NEXT: li a1, 1 3875; RV64-NEXT: lui a2, 349525 3876; RV64-NEXT: lui a3, 209715 3877; RV64-NEXT: lui a4, 61681 3878; RV64-NEXT: lui a5, 4112 3879; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 3880; RV64-NEXT: vsub.vx v16, v8, a1 3881; RV64-NEXT: addiw a0, a2, 1365 3882; RV64-NEXT: addiw a1, a3, 819 3883; RV64-NEXT: addiw a2, a4, -241 3884; RV64-NEXT: addiw a3, a5, 257 3885; RV64-NEXT: slli a4, a0, 32 3886; RV64-NEXT: add a0, a0, a4 3887; RV64-NEXT: slli a4, a1, 32 3888; RV64-NEXT: add a1, a1, a4 3889; RV64-NEXT: slli a4, a2, 32 3890; RV64-NEXT: add a2, a2, a4 3891; RV64-NEXT: slli a4, a3, 32 3892; RV64-NEXT: add a3, a3, a4 3893; RV64-NEXT: vnot.v v8, v8 3894; RV64-NEXT: vand.vv v8, v8, v16 3895; RV64-NEXT: vsrl.vi v16, v8, 1 3896; RV64-NEXT: vand.vx v16, v16, a0 3897; RV64-NEXT: vsub.vv v8, v8, v16 3898; RV64-NEXT: vand.vx v16, v8, a1 3899; RV64-NEXT: vsrl.vi v8, v8, 2 3900; RV64-NEXT: vand.vx v8, v8, a1 3901; RV64-NEXT: vadd.vv v8, v16, v8 3902; RV64-NEXT: vsrl.vi v16, v8, 4 3903; RV64-NEXT: vadd.vv v8, v8, v16 3904; RV64-NEXT: vand.vx v8, v8, a2 3905; RV64-NEXT: vmul.vx v8, v8, a3 3906; RV64-NEXT: li a0, 56 3907; RV64-NEXT: vsrl.vx v8, v8, a0 3908; RV64-NEXT: ret 3909 %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 3910 ret <16 x i64> %v 3911} 3912 3913define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { 3914; RV32-LABEL: vp_cttz_zero_undef_v32i64: 3915; RV32: # %bb.0: 3916; RV32-NEXT: addi sp, sp, -48 3917; RV32-NEXT: .cfi_def_cfa_offset 48 3918; RV32-NEXT: csrr a1, vlenb 3919; RV32-NEXT: li a2, 48 3920; RV32-NEXT: mul a1, a1, a2 3921; RV32-NEXT: sub sp, sp, a1 3922; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb 3923; RV32-NEXT: csrr a1, vlenb 3924; RV32-NEXT: slli a1, a1, 4 3925; RV32-NEXT: add a1, sp, a1 3926; RV32-NEXT: addi a1, a1, 48 3927; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 3928; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 3929; RV32-NEXT: vslidedown.vi v7, v0, 2 3930; RV32-NEXT: lui a1, 349525 3931; RV32-NEXT: lui a2, 209715 3932; RV32-NEXT: addi a1, a1, 1365 3933; RV32-NEXT: sw a1, 40(sp) 3934; RV32-NEXT: sw a1, 44(sp) 3935; RV32-NEXT: lui a1, 61681 3936; RV32-NEXT: addi a2, a2, 819 3937; RV32-NEXT: sw a2, 32(sp) 3938; RV32-NEXT: sw a2, 36(sp) 3939; RV32-NEXT: lui a2, 4112 3940; RV32-NEXT: addi a1, a1, -241 3941; RV32-NEXT: sw a1, 24(sp) 3942; RV32-NEXT: sw a1, 28(sp) 3943; RV32-NEXT: li a1, 16 3944; RV32-NEXT: addi a2, a2, 257 3945; RV32-NEXT: sw a2, 16(sp) 3946; RV32-NEXT: sw a2, 20(sp) 3947; RV32-NEXT: mv a2, a0 3948; RV32-NEXT: bltu a0, a1, .LBB70_2 3949; RV32-NEXT: # %bb.1: 3950; RV32-NEXT: li a2, 16 3951; RV32-NEXT: .LBB70_2: 3952; RV32-NEXT: li a1, 1 3953; RV32-NEXT: addi a3, sp, 40 3954; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 3955; RV32-NEXT: vsub.vx v16, v8, a1, v0.t 3956; RV32-NEXT: vnot.v v8, v8, v0.t 3957; RV32-NEXT: vand.vv v8, v8, v16, v0.t 3958; RV32-NEXT: csrr a4, vlenb 3959; RV32-NEXT: li a5, 24 3960; RV32-NEXT: mul a4, a4, a5 3961; RV32-NEXT: add a4, sp, a4 3962; RV32-NEXT: addi a4, a4, 48 3963; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 3964; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 3965; RV32-NEXT: vlse64.v v8, (a3), zero 3966; RV32-NEXT: csrr a3, vlenb 3967; RV32-NEXT: li a4, 40 3968; RV32-NEXT: mul a3, a3, a4 3969; RV32-NEXT: add a3, sp, a3 3970; RV32-NEXT: addi a3, a3, 48 3971; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 3972; RV32-NEXT: addi a3, sp, 32 3973; RV32-NEXT: vlse64.v v8, (a3), zero 3974; RV32-NEXT: csrr a3, vlenb 3975; RV32-NEXT: slli a3, a3, 5 3976; RV32-NEXT: add a3, sp, a3 3977; RV32-NEXT: addi a3, a3, 48 3978; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 3979; RV32-NEXT: csrr a3, vlenb 3980; RV32-NEXT: li a4, 24 3981; RV32-NEXT: mul a3, a3, a4 3982; RV32-NEXT: add a3, sp, a3 3983; RV32-NEXT: addi a3, a3, 48 3984; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 3985; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 3986; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 3987; RV32-NEXT: csrr a3, vlenb 3988; RV32-NEXT: li a4, 40 3989; RV32-NEXT: mul a3, a3, a4 3990; RV32-NEXT: add a3, sp, a3 3991; RV32-NEXT: addi a3, a3, 48 3992; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 3993; RV32-NEXT: vand.vv v24, v24, v16, v0.t 3994; RV32-NEXT: csrr a3, vlenb 3995; RV32-NEXT: li a4, 24 3996; RV32-NEXT: mul a3, a3, a4 3997; RV32-NEXT: add a3, sp, a3 3998; RV32-NEXT: addi a3, a3, 48 3999; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4000; RV32-NEXT: vsub.vv v24, v16, v24, v0.t 4001; RV32-NEXT: vand.vv v16, v24, v8, v0.t 4002; RV32-NEXT: csrr a3, vlenb 4003; RV32-NEXT: li a4, 24 4004; RV32-NEXT: mul a3, a3, a4 4005; RV32-NEXT: add a3, sp, a3 4006; RV32-NEXT: addi a3, a3, 48 4007; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 4008; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t 4009; RV32-NEXT: vand.vv v16, v16, v8, v0.t 4010; RV32-NEXT: csrr a3, vlenb 4011; RV32-NEXT: li a4, 24 4012; RV32-NEXT: mul a3, a3, a4 4013; RV32-NEXT: add a3, sp, a3 4014; RV32-NEXT: addi a3, a3, 48 4015; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 4016; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 4017; RV32-NEXT: csrr a3, vlenb 4018; RV32-NEXT: slli a3, a3, 3 4019; RV32-NEXT: add a3, sp, a3 4020; RV32-NEXT: addi a3, a3, 48 4021; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4022; RV32-NEXT: addi a3, sp, 24 4023; RV32-NEXT: addi a4, sp, 16 4024; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4025; RV32-NEXT: vlse64.v v16, (a3), zero 4026; RV32-NEXT: addi a3, sp, 48 4027; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 4028; RV32-NEXT: vlse64.v v8, (a4), zero 4029; RV32-NEXT: csrr a3, vlenb 4030; RV32-NEXT: li a4, 24 4031; RV32-NEXT: mul a3, a3, a4 4032; RV32-NEXT: add a3, sp, a3 4033; RV32-NEXT: addi a3, a3, 48 4034; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4035; RV32-NEXT: csrr a3, vlenb 4036; RV32-NEXT: slli a3, a3, 3 4037; RV32-NEXT: add a3, sp, a3 4038; RV32-NEXT: addi a3, a3, 48 4039; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 4040; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4041; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t 4042; RV32-NEXT: vadd.vv v8, v8, v24, v0.t 4043; RV32-NEXT: vand.vv v16, v8, v16, v0.t 4044; RV32-NEXT: csrr a2, vlenb 4045; RV32-NEXT: li a3, 24 4046; RV32-NEXT: mul a2, a2, a3 4047; RV32-NEXT: add a2, sp, a2 4048; RV32-NEXT: addi a2, a2, 48 4049; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 4050; RV32-NEXT: vmul.vv v8, v16, v8, v0.t 4051; RV32-NEXT: li a2, 56 4052; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t 4053; RV32-NEXT: csrr a3, vlenb 4054; RV32-NEXT: slli a3, a3, 3 4055; RV32-NEXT: add a3, sp, a3 4056; RV32-NEXT: addi a3, a3, 48 4057; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4058; RV32-NEXT: addi a3, a0, -16 4059; RV32-NEXT: sltu a0, a0, a3 4060; RV32-NEXT: addi a0, a0, -1 4061; RV32-NEXT: and a0, a0, a3 4062; RV32-NEXT: vmv1r.v v0, v7 4063; RV32-NEXT: csrr a3, vlenb 4064; RV32-NEXT: slli a3, a3, 4 4065; RV32-NEXT: add a3, sp, a3 4066; RV32-NEXT: addi a3, a3, 48 4067; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4068; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4069; RV32-NEXT: vsub.vx v8, v16, a1, v0.t 4070; RV32-NEXT: vnot.v v16, v16, v0.t 4071; RV32-NEXT: vand.vv v8, v16, v8, v0.t 4072; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t 4073; RV32-NEXT: csrr a0, vlenb 4074; RV32-NEXT: li a1, 40 4075; RV32-NEXT: mul a0, a0, a1 4076; RV32-NEXT: add a0, sp, a0 4077; RV32-NEXT: addi a0, a0, 48 4078; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4079; RV32-NEXT: vand.vv v16, v24, v16, v0.t 4080; RV32-NEXT: vsub.vv v24, v8, v16, v0.t 4081; RV32-NEXT: csrr a0, vlenb 4082; RV32-NEXT: slli a0, a0, 5 4083; RV32-NEXT: add a0, sp, a0 4084; RV32-NEXT: addi a0, a0, 48 4085; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 4086; RV32-NEXT: vand.vv v16, v24, v8, v0.t 4087; RV32-NEXT: csrr a0, vlenb 4088; RV32-NEXT: li a1, 40 4089; RV32-NEXT: mul a0, a0, a1 4090; RV32-NEXT: add a0, sp, a0 4091; RV32-NEXT: addi a0, a0, 48 4092; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 4093; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t 4094; RV32-NEXT: csrr a0, vlenb 4095; RV32-NEXT: slli a0, a0, 5 4096; RV32-NEXT: add a0, sp, a0 4097; RV32-NEXT: addi a0, a0, 48 4098; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4099; RV32-NEXT: vand.vv v8, v8, v16, v0.t 4100; RV32-NEXT: csrr a0, vlenb 4101; RV32-NEXT: li a1, 40 4102; RV32-NEXT: mul a0, a0, a1 4103; RV32-NEXT: add a0, sp, a0 4104; RV32-NEXT: addi a0, a0, 48 4105; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4106; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 4107; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4108; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 4109; RV32-NEXT: addi a0, sp, 48 4110; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4111; RV32-NEXT: vand.vv v8, v8, v16, v0.t 4112; RV32-NEXT: csrr a0, vlenb 4113; RV32-NEXT: li a1, 24 4114; RV32-NEXT: mul a0, a0, a1 4115; RV32-NEXT: add a0, sp, a0 4116; RV32-NEXT: addi a0, a0, 48 4117; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4118; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 4119; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 4120; RV32-NEXT: csrr a0, vlenb 4121; RV32-NEXT: slli a0, a0, 3 4122; RV32-NEXT: add a0, sp, a0 4123; RV32-NEXT: addi a0, a0, 48 4124; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 4125; RV32-NEXT: csrr a0, vlenb 4126; RV32-NEXT: li a1, 48 4127; RV32-NEXT: mul a0, a0, a1 4128; RV32-NEXT: add sp, sp, a0 4129; RV32-NEXT: .cfi_def_cfa sp, 48 4130; RV32-NEXT: addi sp, sp, 48 4131; RV32-NEXT: .cfi_def_cfa_offset 0 4132; RV32-NEXT: ret 4133; 4134; RV64-LABEL: vp_cttz_zero_undef_v32i64: 4135; RV64: # %bb.0: 4136; RV64-NEXT: addi sp, sp, -16 4137; RV64-NEXT: .cfi_def_cfa_offset 16 4138; RV64-NEXT: csrr a1, vlenb 4139; RV64-NEXT: slli a1, a1, 4 4140; RV64-NEXT: sub sp, sp, a1 4141; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 4142; RV64-NEXT: csrr a1, vlenb 4143; RV64-NEXT: slli a1, a1, 3 4144; RV64-NEXT: add a1, sp, a1 4145; RV64-NEXT: addi a1, a1, 16 4146; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 4147; RV64-NEXT: li a1, 16 4148; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 4149; RV64-NEXT: vslidedown.vi v24, v0, 2 4150; RV64-NEXT: mv a4, a0 4151; RV64-NEXT: bltu a0, a1, .LBB70_2 4152; RV64-NEXT: # %bb.1: 4153; RV64-NEXT: li a4, 16 4154; RV64-NEXT: .LBB70_2: 4155; RV64-NEXT: li a1, 1 4156; RV64-NEXT: lui a2, 349525 4157; RV64-NEXT: lui a3, 209715 4158; RV64-NEXT: lui a5, 61681 4159; RV64-NEXT: lui a6, 4112 4160; RV64-NEXT: addiw a2, a2, 1365 4161; RV64-NEXT: addiw a3, a3, 819 4162; RV64-NEXT: addiw a7, a5, -241 4163; RV64-NEXT: addiw t0, a6, 257 4164; RV64-NEXT: slli a6, a2, 32 4165; RV64-NEXT: add a6, a2, a6 4166; RV64-NEXT: slli a5, a3, 32 4167; RV64-NEXT: add a5, a3, a5 4168; RV64-NEXT: slli a2, a7, 32 4169; RV64-NEXT: add a2, a7, a2 4170; RV64-NEXT: slli a3, t0, 32 4171; RV64-NEXT: add a3, t0, a3 4172; RV64-NEXT: addi a7, a0, -16 4173; RV64-NEXT: sltu a0, a0, a7 4174; RV64-NEXT: addi a0, a0, -1 4175; RV64-NEXT: and a7, a0, a7 4176; RV64-NEXT: li a0, 56 4177; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma 4178; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 4179; RV64-NEXT: vnot.v v8, v8, v0.t 4180; RV64-NEXT: vand.vv v8, v8, v16, v0.t 4181; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 4182; RV64-NEXT: vand.vx v16, v16, a6, v0.t 4183; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 4184; RV64-NEXT: vand.vx v16, v8, a5, v0.t 4185; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 4186; RV64-NEXT: vand.vx v8, v8, a5, v0.t 4187; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 4188; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 4189; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 4190; RV64-NEXT: vand.vx v8, v8, a2, v0.t 4191; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 4192; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 4193; RV64-NEXT: addi a4, sp, 16 4194; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 4195; RV64-NEXT: vmv1r.v v0, v24 4196; RV64-NEXT: csrr a4, vlenb 4197; RV64-NEXT: slli a4, a4, 3 4198; RV64-NEXT: add a4, sp, a4 4199; RV64-NEXT: addi a4, a4, 16 4200; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 4201; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma 4202; RV64-NEXT: vsub.vx v16, v8, a1, v0.t 4203; RV64-NEXT: vnot.v v8, v8, v0.t 4204; RV64-NEXT: vand.vv v8, v8, v16, v0.t 4205; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 4206; RV64-NEXT: vand.vx v16, v16, a6, v0.t 4207; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 4208; RV64-NEXT: vand.vx v16, v8, a5, v0.t 4209; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 4210; RV64-NEXT: vand.vx v8, v8, a5, v0.t 4211; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 4212; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 4213; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 4214; RV64-NEXT: vand.vx v8, v8, a2, v0.t 4215; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 4216; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t 4217; RV64-NEXT: addi a0, sp, 16 4218; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 4219; RV64-NEXT: csrr a0, vlenb 4220; RV64-NEXT: slli a0, a0, 4 4221; RV64-NEXT: add sp, sp, a0 4222; RV64-NEXT: .cfi_def_cfa sp, 16 4223; RV64-NEXT: addi sp, sp, 16 4224; RV64-NEXT: .cfi_def_cfa_offset 0 4225; RV64-NEXT: ret 4226 %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) 4227 ret <32 x i64> %v 4228} 4229 4230define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { 4231; RV32-LABEL: vp_cttz_zero_undef_v32i64_unmasked: 4232; RV32: # %bb.0: 4233; RV32-NEXT: addi sp, sp, -48 4234; RV32-NEXT: .cfi_def_cfa_offset 48 4235; RV32-NEXT: csrr a1, vlenb 4236; RV32-NEXT: slli a1, a1, 3 4237; RV32-NEXT: sub sp, sp, a1 4238; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb 4239; RV32-NEXT: lui a1, 349525 4240; RV32-NEXT: lui a2, 209715 4241; RV32-NEXT: addi a1, a1, 1365 4242; RV32-NEXT: sw a1, 40(sp) 4243; RV32-NEXT: sw a1, 44(sp) 4244; RV32-NEXT: lui a1, 61681 4245; RV32-NEXT: addi a2, a2, 819 4246; RV32-NEXT: sw a2, 32(sp) 4247; RV32-NEXT: sw a2, 36(sp) 4248; RV32-NEXT: lui a2, 4112 4249; RV32-NEXT: addi a1, a1, -241 4250; RV32-NEXT: sw a1, 24(sp) 4251; RV32-NEXT: sw a1, 28(sp) 4252; RV32-NEXT: li a3, 16 4253; RV32-NEXT: addi a1, a2, 257 4254; RV32-NEXT: sw a1, 16(sp) 4255; RV32-NEXT: sw a1, 20(sp) 4256; RV32-NEXT: mv a1, a0 4257; RV32-NEXT: bltu a0, a3, .LBB71_2 4258; RV32-NEXT: # %bb.1: 4259; RV32-NEXT: li a1, 16 4260; RV32-NEXT: .LBB71_2: 4261; RV32-NEXT: li a2, 1 4262; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4263; RV32-NEXT: vnot.v v0, v8 4264; RV32-NEXT: addi a3, sp, 40 4265; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4266; RV32-NEXT: vlse64.v v24, (a3), zero 4267; RV32-NEXT: addi a3, a0, -16 4268; RV32-NEXT: sltu a0, a0, a3 4269; RV32-NEXT: addi a0, a0, -1 4270; RV32-NEXT: and a0, a0, a3 4271; RV32-NEXT: addi a3, sp, 32 4272; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4273; RV32-NEXT: vsub.vx v8, v8, a2 4274; RV32-NEXT: vand.vv v8, v0, v8 4275; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4276; RV32-NEXT: vsub.vx v0, v16, a2 4277; RV32-NEXT: vnot.v v16, v16 4278; RV32-NEXT: vand.vv v16, v16, v0 4279; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4280; RV32-NEXT: vsrl.vi v0, v8, 1 4281; RV32-NEXT: vand.vv v0, v0, v24 4282; RV32-NEXT: vsub.vv v0, v8, v0 4283; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4284; RV32-NEXT: vsrl.vi v8, v16, 1 4285; RV32-NEXT: vand.vv v24, v8, v24 4286; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4287; RV32-NEXT: vlse64.v v8, (a3), zero 4288; RV32-NEXT: addi a2, sp, 24 4289; RV32-NEXT: addi a3, sp, 16 4290; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4291; RV32-NEXT: vsub.vv v16, v16, v24 4292; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4293; RV32-NEXT: vand.vv v24, v0, v8 4294; RV32-NEXT: vsrl.vi v0, v0, 2 4295; RV32-NEXT: vand.vv v0, v0, v8 4296; RV32-NEXT: vadd.vv v24, v24, v0 4297; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4298; RV32-NEXT: vand.vv v0, v16, v8 4299; RV32-NEXT: vsrl.vi v16, v16, 2 4300; RV32-NEXT: vand.vv v8, v16, v8 4301; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4302; RV32-NEXT: vsrl.vi v16, v24, 4 4303; RV32-NEXT: vadd.vv v16, v24, v16 4304; RV32-NEXT: addi a4, sp, 48 4305; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 4306; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4307; RV32-NEXT: vlse64.v v24, (a2), zero 4308; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4309; RV32-NEXT: vadd.vv v8, v0, v8 4310; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4311; RV32-NEXT: vlse64.v v0, (a3), zero 4312; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4313; RV32-NEXT: vsrl.vi v16, v8, 4 4314; RV32-NEXT: vadd.vv v8, v8, v16 4315; RV32-NEXT: addi a2, sp, 48 4316; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 4317; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4318; RV32-NEXT: vand.vv v16, v16, v24 4319; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4320; RV32-NEXT: vand.vv v8, v8, v24 4321; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4322; RV32-NEXT: vmul.vv v16, v16, v0 4323; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4324; RV32-NEXT: vmul.vv v24, v8, v0 4325; RV32-NEXT: li a2, 56 4326; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4327; RV32-NEXT: vsrl.vx v8, v16, a2 4328; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4329; RV32-NEXT: vsrl.vx v16, v24, a2 4330; RV32-NEXT: csrr a0, vlenb 4331; RV32-NEXT: slli a0, a0, 3 4332; RV32-NEXT: add sp, sp, a0 4333; RV32-NEXT: .cfi_def_cfa sp, 48 4334; RV32-NEXT: addi sp, sp, 48 4335; RV32-NEXT: .cfi_def_cfa_offset 0 4336; RV32-NEXT: ret 4337; 4338; RV64-LABEL: vp_cttz_zero_undef_v32i64_unmasked: 4339; RV64: # %bb.0: 4340; RV64-NEXT: li a2, 16 4341; RV64-NEXT: mv a1, a0 4342; RV64-NEXT: bltu a0, a2, .LBB71_2 4343; RV64-NEXT: # %bb.1: 4344; RV64-NEXT: li a1, 16 4345; RV64-NEXT: .LBB71_2: 4346; RV64-NEXT: li a2, 1 4347; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4348; RV64-NEXT: vnot.v v24, v8 4349; RV64-NEXT: lui a3, 349525 4350; RV64-NEXT: lui a4, 209715 4351; RV64-NEXT: lui a5, 61681 4352; RV64-NEXT: lui a6, 4112 4353; RV64-NEXT: addiw a3, a3, 1365 4354; RV64-NEXT: addiw a4, a4, 819 4355; RV64-NEXT: addiw a5, a5, -241 4356; RV64-NEXT: addiw a6, a6, 257 4357; RV64-NEXT: slli a7, a3, 32 4358; RV64-NEXT: add a3, a3, a7 4359; RV64-NEXT: slli a7, a4, 32 4360; RV64-NEXT: add a4, a4, a7 4361; RV64-NEXT: slli a7, a5, 32 4362; RV64-NEXT: add a5, a5, a7 4363; RV64-NEXT: slli a7, a6, 32 4364; RV64-NEXT: add a6, a6, a7 4365; RV64-NEXT: addi a7, a0, -16 4366; RV64-NEXT: sltu a0, a0, a7 4367; RV64-NEXT: addi a0, a0, -1 4368; RV64-NEXT: and a0, a0, a7 4369; RV64-NEXT: li a7, 56 4370; RV64-NEXT: vsub.vx v8, v8, a2 4371; RV64-NEXT: vand.vv v8, v24, v8 4372; RV64-NEXT: vsrl.vi v24, v8, 1 4373; RV64-NEXT: vand.vx v24, v24, a3 4374; RV64-NEXT: vsub.vv v8, v8, v24 4375; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4376; RV64-NEXT: vsub.vx v24, v16, a2 4377; RV64-NEXT: vnot.v v16, v16 4378; RV64-NEXT: vand.vv v16, v16, v24 4379; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4380; RV64-NEXT: vand.vx v24, v8, a4 4381; RV64-NEXT: vsrl.vi v8, v8, 2 4382; RV64-NEXT: vand.vx v8, v8, a4 4383; RV64-NEXT: vadd.vv v8, v24, v8 4384; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4385; RV64-NEXT: vsrl.vi v24, v16, 1 4386; RV64-NEXT: vand.vx v24, v24, a3 4387; RV64-NEXT: vsub.vv v16, v16, v24 4388; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4389; RV64-NEXT: vsrl.vi v24, v8, 4 4390; RV64-NEXT: vadd.vv v8, v8, v24 4391; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4392; RV64-NEXT: vand.vx v24, v16, a4 4393; RV64-NEXT: vsrl.vi v16, v16, 2 4394; RV64-NEXT: vand.vx v16, v16, a4 4395; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4396; RV64-NEXT: vand.vx v8, v8, a5 4397; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4398; RV64-NEXT: vadd.vv v16, v24, v16 4399; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4400; RV64-NEXT: vmul.vx v8, v8, a6 4401; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4402; RV64-NEXT: vsrl.vi v24, v16, 4 4403; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 4404; RV64-NEXT: vsrl.vx v8, v8, a7 4405; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4406; RV64-NEXT: vadd.vv v16, v16, v24 4407; RV64-NEXT: vand.vx v16, v16, a5 4408; RV64-NEXT: vmul.vx v16, v16, a6 4409; RV64-NEXT: vsrl.vx v16, v16, a7 4410; RV64-NEXT: ret 4411 %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl) 4412 ret <32 x i64> %v 4413} 4414