1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) 8 9define <2 x i8> @vp_ctlz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vp_ctlz_v2i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 13; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 14; CHECK-NEXT: li a0, 85 15; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 16; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 17; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 18; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 19; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 20; CHECK-NEXT: vnot.v v8, v8, v0.t 21; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 22; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 23; CHECK-NEXT: li a0, 51 24; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 25; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 26; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 27; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 28; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 29; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 30; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 31; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 32; CHECK-NEXT: ret 33 %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl) 34 ret <2 x i8> %v 35} 36 37define <2 x i8> @vp_ctlz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { 38; CHECK-LABEL: vp_ctlz_v2i8_unmasked: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 41; CHECK-NEXT: vsrl.vi v9, v8, 1 42; CHECK-NEXT: li a0, 85 43; CHECK-NEXT: vor.vv v8, v8, v9 44; CHECK-NEXT: vsrl.vi v9, v8, 2 45; CHECK-NEXT: vor.vv v8, v8, v9 46; CHECK-NEXT: vsrl.vi v9, v8, 4 47; CHECK-NEXT: vor.vv v8, v8, v9 48; CHECK-NEXT: vnot.v v8, v8 49; CHECK-NEXT: vsrl.vi v9, v8, 1 50; CHECK-NEXT: vand.vx v9, v9, a0 51; CHECK-NEXT: li a0, 51 52; CHECK-NEXT: vsub.vv v8, v8, v9 53; CHECK-NEXT: vand.vx v9, v8, a0 54; CHECK-NEXT: vsrl.vi v8, v8, 2 55; CHECK-NEXT: vand.vx v8, v8, a0 56; CHECK-NEXT: vadd.vv v8, v9, v8 57; CHECK-NEXT: vsrl.vi v9, v8, 4 58; CHECK-NEXT: vadd.vv v8, v8, v9 59; CHECK-NEXT: vand.vi v8, v8, 15 60; CHECK-NEXT: ret 61 %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 62 ret <2 x i8> %v 63} 64 65declare <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32) 66 67define <4 x i8> @vp_ctlz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { 68; CHECK-LABEL: vp_ctlz_v4i8: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 71; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 72; CHECK-NEXT: li a0, 85 73; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 74; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 75; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 76; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 77; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 78; CHECK-NEXT: vnot.v v8, v8, v0.t 79; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 80; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 81; CHECK-NEXT: li a0, 51 82; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 83; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 84; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 85; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 86; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 87; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 88; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 89; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 90; CHECK-NEXT: ret 91 %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl) 92 ret <4 x i8> %v 93} 94 95define <4 x i8> @vp_ctlz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { 96; CHECK-LABEL: vp_ctlz_v4i8_unmasked: 97; CHECK: # %bb.0: 98; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 99; CHECK-NEXT: vsrl.vi v9, v8, 1 100; CHECK-NEXT: li a0, 85 101; CHECK-NEXT: vor.vv v8, v8, v9 102; CHECK-NEXT: vsrl.vi v9, v8, 2 103; CHECK-NEXT: vor.vv v8, v8, v9 104; CHECK-NEXT: vsrl.vi v9, v8, 4 105; CHECK-NEXT: vor.vv v8, v8, v9 106; CHECK-NEXT: vnot.v v8, v8 107; CHECK-NEXT: vsrl.vi v9, v8, 1 108; CHECK-NEXT: vand.vx v9, v9, a0 109; CHECK-NEXT: li a0, 51 110; CHECK-NEXT: vsub.vv v8, v8, v9 111; CHECK-NEXT: vand.vx v9, v8, a0 112; CHECK-NEXT: vsrl.vi v8, v8, 2 113; CHECK-NEXT: vand.vx v8, v8, a0 114; CHECK-NEXT: vadd.vv v8, v9, v8 115; CHECK-NEXT: vsrl.vi v9, v8, 4 116; CHECK-NEXT: vadd.vv v8, v8, v9 117; CHECK-NEXT: vand.vi v8, v8, 15 118; CHECK-NEXT: ret 119 %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 120 ret <4 x i8> %v 121} 122 123declare <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32) 124 125define <8 x i8> @vp_ctlz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vp_ctlz_v8i8: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 129; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 130; CHECK-NEXT: li a0, 85 131; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 132; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 133; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 134; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 135; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 136; CHECK-NEXT: vnot.v v8, v8, v0.t 137; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 138; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 139; CHECK-NEXT: li a0, 51 140; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 141; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 142; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 143; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 144; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 145; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 146; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 147; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 148; CHECK-NEXT: ret 149 %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl) 150 ret <8 x i8> %v 151} 152 153define <8 x i8> @vp_ctlz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { 154; CHECK-LABEL: vp_ctlz_v8i8_unmasked: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 157; CHECK-NEXT: vsrl.vi v9, v8, 1 158; CHECK-NEXT: li a0, 85 159; CHECK-NEXT: vor.vv v8, v8, v9 160; CHECK-NEXT: vsrl.vi v9, v8, 2 161; CHECK-NEXT: vor.vv v8, v8, v9 162; CHECK-NEXT: vsrl.vi v9, v8, 4 163; CHECK-NEXT: vor.vv v8, v8, v9 164; CHECK-NEXT: vnot.v v8, v8 165; CHECK-NEXT: vsrl.vi v9, v8, 1 166; CHECK-NEXT: vand.vx v9, v9, a0 167; CHECK-NEXT: li a0, 51 168; CHECK-NEXT: vsub.vv v8, v8, v9 169; CHECK-NEXT: vand.vx v9, v8, a0 170; CHECK-NEXT: vsrl.vi v8, v8, 2 171; CHECK-NEXT: vand.vx v8, v8, a0 172; CHECK-NEXT: vadd.vv v8, v9, v8 173; CHECK-NEXT: vsrl.vi v9, v8, 4 174; CHECK-NEXT: vadd.vv v8, v8, v9 175; CHECK-NEXT: vand.vi v8, v8, 15 176; CHECK-NEXT: ret 177 %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 178 ret <8 x i8> %v 179} 180 181declare <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32) 182 183define <16 x i8> @vp_ctlz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { 184; CHECK-LABEL: vp_ctlz_v16i8: 185; CHECK: # %bb.0: 186; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 187; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 188; CHECK-NEXT: li a0, 85 189; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 190; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 191; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 192; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 193; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 194; CHECK-NEXT: vnot.v v8, v8, v0.t 195; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 196; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 197; CHECK-NEXT: li a0, 51 198; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 199; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 200; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 201; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 202; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 203; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 204; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 205; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 206; CHECK-NEXT: ret 207 %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl) 208 ret <16 x i8> %v 209} 210 211define <16 x i8> @vp_ctlz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { 212; CHECK-LABEL: vp_ctlz_v16i8_unmasked: 213; CHECK: # %bb.0: 214; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 215; CHECK-NEXT: vsrl.vi v9, v8, 1 216; CHECK-NEXT: li a0, 85 217; CHECK-NEXT: vor.vv v8, v8, v9 218; CHECK-NEXT: vsrl.vi v9, v8, 2 219; CHECK-NEXT: vor.vv v8, v8, v9 220; CHECK-NEXT: vsrl.vi v9, v8, 4 221; CHECK-NEXT: vor.vv v8, v8, v9 222; CHECK-NEXT: vnot.v v8, v8 223; CHECK-NEXT: vsrl.vi v9, v8, 1 224; CHECK-NEXT: vand.vx v9, v9, a0 225; CHECK-NEXT: li a0, 51 226; CHECK-NEXT: vsub.vv v8, v8, v9 227; CHECK-NEXT: vand.vx v9, v8, a0 228; CHECK-NEXT: vsrl.vi v8, v8, 2 229; CHECK-NEXT: vand.vx v8, v8, a0 230; CHECK-NEXT: vadd.vv v8, v9, v8 231; CHECK-NEXT: vsrl.vi v9, v8, 4 232; CHECK-NEXT: vadd.vv v8, v8, v9 233; CHECK-NEXT: vand.vi v8, v8, 15 234; CHECK-NEXT: ret 235 %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 236 ret <16 x i8> %v 237} 238 239declare <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32) 240 241define <2 x i16> @vp_ctlz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { 242; CHECK-LABEL: vp_ctlz_v2i16: 243; CHECK: # %bb.0: 244; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 245; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 246; CHECK-NEXT: lui a0, 5 247; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 248; CHECK-NEXT: addi a0, a0, 1365 249; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 250; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 251; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 252; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 253; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 254; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 255; CHECK-NEXT: vnot.v v8, v8, v0.t 256; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 257; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 258; CHECK-NEXT: lui a0, 3 259; CHECK-NEXT: addi a0, a0, 819 260; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 261; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 262; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 263; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 264; CHECK-NEXT: lui a0, 1 265; CHECK-NEXT: addi a0, a0, -241 266; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 267; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 268; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 269; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 270; CHECK-NEXT: li a0, 257 271; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 272; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 273; CHECK-NEXT: ret 274 %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) 275 ret <2 x i16> %v 276} 277 278define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { 279; CHECK-LABEL: vp_ctlz_v2i16_unmasked: 280; CHECK: # %bb.0: 281; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 282; CHECK-NEXT: vsrl.vi v9, v8, 1 283; CHECK-NEXT: lui a0, 5 284; CHECK-NEXT: vor.vv v8, v8, v9 285; CHECK-NEXT: addi a0, a0, 1365 286; CHECK-NEXT: vsrl.vi v9, v8, 2 287; CHECK-NEXT: vor.vv v8, v8, v9 288; CHECK-NEXT: vsrl.vi v9, v8, 4 289; CHECK-NEXT: vor.vv v8, v8, v9 290; CHECK-NEXT: vsrl.vi v9, v8, 8 291; CHECK-NEXT: vor.vv v8, v8, v9 292; CHECK-NEXT: vnot.v v8, v8 293; CHECK-NEXT: vsrl.vi v9, v8, 1 294; CHECK-NEXT: vand.vx v9, v9, a0 295; CHECK-NEXT: lui a0, 3 296; CHECK-NEXT: addi a0, a0, 819 297; CHECK-NEXT: vsub.vv v8, v8, v9 298; CHECK-NEXT: vand.vx v9, v8, a0 299; CHECK-NEXT: vsrl.vi v8, v8, 2 300; CHECK-NEXT: vand.vx v8, v8, a0 301; CHECK-NEXT: lui a0, 1 302; CHECK-NEXT: addi a0, a0, -241 303; CHECK-NEXT: vadd.vv v8, v9, v8 304; CHECK-NEXT: vsrl.vi v9, v8, 4 305; CHECK-NEXT: vadd.vv v8, v8, v9 306; CHECK-NEXT: vand.vx v8, v8, a0 307; CHECK-NEXT: li a0, 257 308; CHECK-NEXT: vmul.vx v8, v8, a0 309; CHECK-NEXT: vsrl.vi v8, v8, 8 310; CHECK-NEXT: ret 311 %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 312 ret <2 x i16> %v 313} 314 315declare <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32) 316 317define <4 x i16> @vp_ctlz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { 318; CHECK-LABEL: vp_ctlz_v4i16: 319; CHECK: # %bb.0: 320; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 321; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 322; CHECK-NEXT: lui a0, 5 323; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 324; CHECK-NEXT: addi a0, a0, 1365 325; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 326; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 327; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 328; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 329; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 330; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 331; CHECK-NEXT: vnot.v v8, v8, v0.t 332; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 333; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 334; CHECK-NEXT: lui a0, 3 335; CHECK-NEXT: addi a0, a0, 819 336; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 337; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 338; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 339; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 340; CHECK-NEXT: lui a0, 1 341; CHECK-NEXT: addi a0, a0, -241 342; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 343; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 344; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 345; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 346; CHECK-NEXT: li a0, 257 347; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 348; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 349; CHECK-NEXT: ret 350 %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) 351 ret <4 x i16> %v 352} 353 354define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { 355; CHECK-LABEL: vp_ctlz_v4i16_unmasked: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 358; CHECK-NEXT: vsrl.vi v9, v8, 1 359; CHECK-NEXT: lui a0, 5 360; CHECK-NEXT: vor.vv v8, v8, v9 361; CHECK-NEXT: addi a0, a0, 1365 362; CHECK-NEXT: vsrl.vi v9, v8, 2 363; CHECK-NEXT: vor.vv v8, v8, v9 364; CHECK-NEXT: vsrl.vi v9, v8, 4 365; CHECK-NEXT: vor.vv v8, v8, v9 366; CHECK-NEXT: vsrl.vi v9, v8, 8 367; CHECK-NEXT: vor.vv v8, v8, v9 368; CHECK-NEXT: vnot.v v8, v8 369; CHECK-NEXT: vsrl.vi v9, v8, 1 370; CHECK-NEXT: vand.vx v9, v9, a0 371; CHECK-NEXT: lui a0, 3 372; CHECK-NEXT: addi a0, a0, 819 373; CHECK-NEXT: vsub.vv v8, v8, v9 374; CHECK-NEXT: vand.vx v9, v8, a0 375; CHECK-NEXT: vsrl.vi v8, v8, 2 376; CHECK-NEXT: vand.vx v8, v8, a0 377; CHECK-NEXT: lui a0, 1 378; CHECK-NEXT: addi a0, a0, -241 379; CHECK-NEXT: vadd.vv v8, v9, v8 380; CHECK-NEXT: vsrl.vi v9, v8, 4 381; CHECK-NEXT: vadd.vv v8, v8, v9 382; CHECK-NEXT: vand.vx v8, v8, a0 383; CHECK-NEXT: li a0, 257 384; CHECK-NEXT: vmul.vx v8, v8, a0 385; CHECK-NEXT: vsrl.vi v8, v8, 8 386; CHECK-NEXT: ret 387 %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 388 ret <4 x i16> %v 389} 390 391declare <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32) 392 393define <8 x i16> @vp_ctlz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { 394; CHECK-LABEL: vp_ctlz_v8i16: 395; CHECK: # %bb.0: 396; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 397; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 398; CHECK-NEXT: lui a0, 5 399; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 400; CHECK-NEXT: addi a0, a0, 1365 401; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 402; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 403; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 404; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 405; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 406; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 407; CHECK-NEXT: vnot.v v8, v8, v0.t 408; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 409; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 410; CHECK-NEXT: lui a0, 3 411; CHECK-NEXT: addi a0, a0, 819 412; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 413; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 414; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 415; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 416; CHECK-NEXT: lui a0, 1 417; CHECK-NEXT: addi a0, a0, -241 418; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 419; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 420; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 421; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 422; CHECK-NEXT: li a0, 257 423; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 424; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 425; CHECK-NEXT: ret 426 %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) 427 ret <8 x i16> %v 428} 429 430define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { 431; CHECK-LABEL: vp_ctlz_v8i16_unmasked: 432; CHECK: # %bb.0: 433; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 434; CHECK-NEXT: vsrl.vi v9, v8, 1 435; CHECK-NEXT: lui a0, 5 436; CHECK-NEXT: vor.vv v8, v8, v9 437; CHECK-NEXT: addi a0, a0, 1365 438; CHECK-NEXT: vsrl.vi v9, v8, 2 439; CHECK-NEXT: vor.vv v8, v8, v9 440; CHECK-NEXT: vsrl.vi v9, v8, 4 441; CHECK-NEXT: vor.vv v8, v8, v9 442; CHECK-NEXT: vsrl.vi v9, v8, 8 443; CHECK-NEXT: vor.vv v8, v8, v9 444; CHECK-NEXT: vnot.v v8, v8 445; CHECK-NEXT: vsrl.vi v9, v8, 1 446; CHECK-NEXT: vand.vx v9, v9, a0 447; CHECK-NEXT: lui a0, 3 448; CHECK-NEXT: addi a0, a0, 819 449; CHECK-NEXT: vsub.vv v8, v8, v9 450; CHECK-NEXT: vand.vx v9, v8, a0 451; CHECK-NEXT: vsrl.vi v8, v8, 2 452; CHECK-NEXT: vand.vx v8, v8, a0 453; CHECK-NEXT: lui a0, 1 454; CHECK-NEXT: addi a0, a0, -241 455; CHECK-NEXT: vadd.vv v8, v9, v8 456; CHECK-NEXT: vsrl.vi v9, v8, 4 457; CHECK-NEXT: vadd.vv v8, v8, v9 458; CHECK-NEXT: vand.vx v8, v8, a0 459; CHECK-NEXT: li a0, 257 460; CHECK-NEXT: vmul.vx v8, v8, a0 461; CHECK-NEXT: vsrl.vi v8, v8, 8 462; CHECK-NEXT: ret 463 %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 464 ret <8 x i16> %v 465} 466 467declare <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32) 468 469define <16 x i16> @vp_ctlz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { 470; CHECK-LABEL: vp_ctlz_v16i16: 471; CHECK: # %bb.0: 472; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 473; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 474; CHECK-NEXT: lui a0, 5 475; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 476; CHECK-NEXT: addi a0, a0, 1365 477; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 478; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 479; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 480; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 481; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 482; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 483; CHECK-NEXT: vnot.v v8, v8, v0.t 484; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 485; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 486; CHECK-NEXT: lui a0, 3 487; CHECK-NEXT: addi a0, a0, 819 488; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 489; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 490; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 491; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 492; CHECK-NEXT: lui a0, 1 493; CHECK-NEXT: addi a0, a0, -241 494; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 495; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 496; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 497; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 498; CHECK-NEXT: li a0, 257 499; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 500; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 501; CHECK-NEXT: ret 502 %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) 503 ret <16 x i16> %v 504} 505 506define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { 507; CHECK-LABEL: vp_ctlz_v16i16_unmasked: 508; CHECK: # %bb.0: 509; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 510; CHECK-NEXT: vsrl.vi v10, v8, 1 511; CHECK-NEXT: lui a0, 5 512; CHECK-NEXT: vor.vv v8, v8, v10 513; CHECK-NEXT: addi a0, a0, 1365 514; CHECK-NEXT: vsrl.vi v10, v8, 2 515; CHECK-NEXT: vor.vv v8, v8, v10 516; CHECK-NEXT: vsrl.vi v10, v8, 4 517; CHECK-NEXT: vor.vv v8, v8, v10 518; CHECK-NEXT: vsrl.vi v10, v8, 8 519; CHECK-NEXT: vor.vv v8, v8, v10 520; CHECK-NEXT: vnot.v v8, v8 521; CHECK-NEXT: vsrl.vi v10, v8, 1 522; CHECK-NEXT: vand.vx v10, v10, a0 523; CHECK-NEXT: lui a0, 3 524; CHECK-NEXT: addi a0, a0, 819 525; CHECK-NEXT: vsub.vv v8, v8, v10 526; CHECK-NEXT: vand.vx v10, v8, a0 527; CHECK-NEXT: vsrl.vi v8, v8, 2 528; CHECK-NEXT: vand.vx v8, v8, a0 529; CHECK-NEXT: lui a0, 1 530; CHECK-NEXT: addi a0, a0, -241 531; CHECK-NEXT: vadd.vv v8, v10, v8 532; CHECK-NEXT: vsrl.vi v10, v8, 4 533; CHECK-NEXT: vadd.vv v8, v8, v10 534; CHECK-NEXT: vand.vx v8, v8, a0 535; CHECK-NEXT: li a0, 257 536; CHECK-NEXT: vmul.vx v8, v8, a0 537; CHECK-NEXT: vsrl.vi v8, v8, 8 538; CHECK-NEXT: ret 539 %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 540 ret <16 x i16> %v 541} 542 543declare <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32) 544 545define <2 x i32> @vp_ctlz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { 546; CHECK-LABEL: vp_ctlz_v2i32: 547; CHECK: # %bb.0: 548; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 549; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 550; CHECK-NEXT: lui a0, 349525 551; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 552; CHECK-NEXT: addi a0, a0, 1365 553; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 554; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 555; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 556; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 557; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 558; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 559; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t 560; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 561; CHECK-NEXT: vnot.v v8, v8, v0.t 562; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 563; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 564; CHECK-NEXT: lui a0, 209715 565; CHECK-NEXT: addi a0, a0, 819 566; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 567; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 568; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 569; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 570; CHECK-NEXT: lui a0, 61681 571; CHECK-NEXT: addi a0, a0, -241 572; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 573; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 574; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 575; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 576; CHECK-NEXT: lui a0, 4112 577; CHECK-NEXT: addi a0, a0, 257 578; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 579; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 580; CHECK-NEXT: ret 581 %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) 582 ret <2 x i32> %v 583} 584 585define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { 586; CHECK-LABEL: vp_ctlz_v2i32_unmasked: 587; CHECK: # %bb.0: 588; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 589; CHECK-NEXT: vsrl.vi v9, v8, 1 590; CHECK-NEXT: lui a0, 349525 591; CHECK-NEXT: vor.vv v8, v8, v9 592; CHECK-NEXT: addi a0, a0, 1365 593; CHECK-NEXT: vsrl.vi v9, v8, 2 594; CHECK-NEXT: vor.vv v8, v8, v9 595; CHECK-NEXT: vsrl.vi v9, v8, 4 596; CHECK-NEXT: vor.vv v8, v8, v9 597; CHECK-NEXT: vsrl.vi v9, v8, 8 598; CHECK-NEXT: vor.vv v8, v8, v9 599; CHECK-NEXT: vsrl.vi v9, v8, 16 600; CHECK-NEXT: vor.vv v8, v8, v9 601; CHECK-NEXT: vnot.v v8, v8 602; CHECK-NEXT: vsrl.vi v9, v8, 1 603; CHECK-NEXT: vand.vx v9, v9, a0 604; CHECK-NEXT: lui a0, 209715 605; CHECK-NEXT: addi a0, a0, 819 606; CHECK-NEXT: vsub.vv v8, v8, v9 607; CHECK-NEXT: vand.vx v9, v8, a0 608; CHECK-NEXT: vsrl.vi v8, v8, 2 609; CHECK-NEXT: vand.vx v8, v8, a0 610; CHECK-NEXT: lui a0, 61681 611; CHECK-NEXT: addi a0, a0, -241 612; CHECK-NEXT: vadd.vv v8, v9, v8 613; CHECK-NEXT: vsrl.vi v9, v8, 4 614; CHECK-NEXT: vadd.vv v8, v8, v9 615; CHECK-NEXT: vand.vx v8, v8, a0 616; CHECK-NEXT: lui a0, 4112 617; CHECK-NEXT: addi a0, a0, 257 618; CHECK-NEXT: vmul.vx v8, v8, a0 619; CHECK-NEXT: vsrl.vi v8, v8, 24 620; CHECK-NEXT: ret 621 %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 622 ret <2 x i32> %v 623} 624 625declare <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32) 626 627define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { 628; CHECK-LABEL: vp_ctlz_v4i32: 629; CHECK: # %bb.0: 630; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 631; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 632; CHECK-NEXT: lui a0, 349525 633; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 634; CHECK-NEXT: addi a0, a0, 1365 635; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 636; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 637; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 638; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 639; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 640; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 641; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t 642; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 643; CHECK-NEXT: vnot.v v8, v8, v0.t 644; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 645; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 646; CHECK-NEXT: lui a0, 209715 647; CHECK-NEXT: addi a0, a0, 819 648; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 649; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 650; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 651; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 652; CHECK-NEXT: lui a0, 61681 653; CHECK-NEXT: addi a0, a0, -241 654; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 655; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 656; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 657; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 658; CHECK-NEXT: lui a0, 4112 659; CHECK-NEXT: addi a0, a0, 257 660; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 661; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 662; CHECK-NEXT: ret 663 %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) 664 ret <4 x i32> %v 665} 666 667define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { 668; CHECK-LABEL: vp_ctlz_v4i32_unmasked: 669; CHECK: # %bb.0: 670; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 671; CHECK-NEXT: vsrl.vi v9, v8, 1 672; CHECK-NEXT: lui a0, 349525 673; CHECK-NEXT: vor.vv v8, v8, v9 674; CHECK-NEXT: addi a0, a0, 1365 675; CHECK-NEXT: vsrl.vi v9, v8, 2 676; CHECK-NEXT: vor.vv v8, v8, v9 677; CHECK-NEXT: vsrl.vi v9, v8, 4 678; CHECK-NEXT: vor.vv v8, v8, v9 679; CHECK-NEXT: vsrl.vi v9, v8, 8 680; CHECK-NEXT: vor.vv v8, v8, v9 681; CHECK-NEXT: vsrl.vi v9, v8, 16 682; CHECK-NEXT: vor.vv v8, v8, v9 683; CHECK-NEXT: vnot.v v8, v8 684; CHECK-NEXT: vsrl.vi v9, v8, 1 685; CHECK-NEXT: vand.vx v9, v9, a0 686; CHECK-NEXT: lui a0, 209715 687; CHECK-NEXT: addi a0, a0, 819 688; CHECK-NEXT: vsub.vv v8, v8, v9 689; CHECK-NEXT: vand.vx v9, v8, a0 690; CHECK-NEXT: vsrl.vi v8, v8, 2 691; CHECK-NEXT: vand.vx v8, v8, a0 692; CHECK-NEXT: lui a0, 61681 693; CHECK-NEXT: addi a0, a0, -241 694; CHECK-NEXT: vadd.vv v8, v9, v8 695; CHECK-NEXT: vsrl.vi v9, v8, 4 696; CHECK-NEXT: vadd.vv v8, v8, v9 697; CHECK-NEXT: vand.vx v8, v8, a0 698; CHECK-NEXT: lui a0, 4112 699; CHECK-NEXT: addi a0, a0, 257 700; CHECK-NEXT: vmul.vx v8, v8, a0 701; CHECK-NEXT: vsrl.vi v8, v8, 24 702; CHECK-NEXT: ret 703 %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 704 ret <4 x i32> %v 705} 706 707declare <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) 708 709define <8 x i32> @vp_ctlz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { 710; CHECK-LABEL: vp_ctlz_v8i32: 711; CHECK: # %bb.0: 712; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 713; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 714; CHECK-NEXT: lui a0, 349525 715; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 716; CHECK-NEXT: addi a0, a0, 1365 717; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 718; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 719; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 720; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 721; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 722; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 723; CHECK-NEXT: vsrl.vi v10, v8, 16, v0.t 724; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 725; CHECK-NEXT: vnot.v v8, v8, v0.t 726; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 727; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 728; CHECK-NEXT: lui a0, 209715 729; CHECK-NEXT: addi a0, a0, 819 730; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 731; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 732; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 733; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 734; CHECK-NEXT: lui a0, 61681 735; CHECK-NEXT: addi a0, a0, -241 736; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 737; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 738; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 739; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 740; CHECK-NEXT: lui a0, 4112 741; CHECK-NEXT: addi a0, a0, 257 742; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 743; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 744; CHECK-NEXT: ret 745 %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) 746 ret <8 x i32> %v 747} 748 749define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { 750; CHECK-LABEL: vp_ctlz_v8i32_unmasked: 751; CHECK: # %bb.0: 752; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 753; CHECK-NEXT: vsrl.vi v10, v8, 1 754; CHECK-NEXT: lui a0, 349525 755; CHECK-NEXT: vor.vv v8, v8, v10 756; CHECK-NEXT: addi a0, a0, 1365 757; CHECK-NEXT: vsrl.vi v10, v8, 2 758; CHECK-NEXT: vor.vv v8, v8, v10 759; CHECK-NEXT: vsrl.vi v10, v8, 4 760; CHECK-NEXT: vor.vv v8, v8, v10 761; CHECK-NEXT: vsrl.vi v10, v8, 8 762; CHECK-NEXT: vor.vv v8, v8, v10 763; CHECK-NEXT: vsrl.vi v10, v8, 16 764; CHECK-NEXT: vor.vv v8, v8, v10 765; CHECK-NEXT: vnot.v v8, v8 766; CHECK-NEXT: vsrl.vi v10, v8, 1 767; CHECK-NEXT: vand.vx v10, v10, a0 768; CHECK-NEXT: lui a0, 209715 769; CHECK-NEXT: addi a0, a0, 819 770; CHECK-NEXT: vsub.vv v8, v8, v10 771; CHECK-NEXT: vand.vx v10, v8, a0 772; CHECK-NEXT: vsrl.vi v8, v8, 2 773; CHECK-NEXT: vand.vx v8, v8, a0 774; CHECK-NEXT: lui a0, 61681 775; CHECK-NEXT: addi a0, a0, -241 776; CHECK-NEXT: vadd.vv v8, v10, v8 777; CHECK-NEXT: vsrl.vi v10, v8, 4 778; CHECK-NEXT: vadd.vv v8, v8, v10 779; CHECK-NEXT: vand.vx v8, v8, a0 780; CHECK-NEXT: lui a0, 4112 781; CHECK-NEXT: addi a0, a0, 257 782; CHECK-NEXT: vmul.vx v8, v8, a0 783; CHECK-NEXT: vsrl.vi v8, v8, 24 784; CHECK-NEXT: ret 785 %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 786 ret <8 x i32> %v 787} 788 789declare <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) 790 791define <16 x i32> @vp_ctlz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { 792; CHECK-LABEL: vp_ctlz_v16i32: 793; CHECK: # %bb.0: 794; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 795; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 796; CHECK-NEXT: lui a0, 349525 797; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 798; CHECK-NEXT: addi a0, a0, 1365 799; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t 800; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 801; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 802; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 803; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 804; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 805; CHECK-NEXT: vsrl.vi v12, v8, 16, v0.t 806; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 807; CHECK-NEXT: vnot.v v8, v8, v0.t 808; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 809; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 810; CHECK-NEXT: lui a0, 209715 811; CHECK-NEXT: addi a0, a0, 819 812; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 813; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 814; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 815; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 816; CHECK-NEXT: lui a0, 61681 817; CHECK-NEXT: addi a0, a0, -241 818; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 819; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 820; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 821; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 822; CHECK-NEXT: lui a0, 4112 823; CHECK-NEXT: addi a0, a0, 257 824; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 825; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 826; CHECK-NEXT: ret 827 %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) 828 ret <16 x i32> %v 829} 830 831define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { 832; CHECK-LABEL: vp_ctlz_v16i32_unmasked: 833; CHECK: # %bb.0: 834; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 835; CHECK-NEXT: vsrl.vi v12, v8, 1 836; CHECK-NEXT: lui a0, 349525 837; CHECK-NEXT: vor.vv v8, v8, v12 838; CHECK-NEXT: addi a0, a0, 1365 839; CHECK-NEXT: vsrl.vi v12, v8, 2 840; CHECK-NEXT: vor.vv v8, v8, v12 841; CHECK-NEXT: vsrl.vi v12, v8, 4 842; CHECK-NEXT: vor.vv v8, v8, v12 843; CHECK-NEXT: vsrl.vi v12, v8, 8 844; CHECK-NEXT: vor.vv v8, v8, v12 845; CHECK-NEXT: vsrl.vi v12, v8, 16 846; CHECK-NEXT: vor.vv v8, v8, v12 847; CHECK-NEXT: vnot.v v8, v8 848; CHECK-NEXT: vsrl.vi v12, v8, 1 849; CHECK-NEXT: vand.vx v12, v12, a0 850; CHECK-NEXT: lui a0, 209715 851; CHECK-NEXT: addi a0, a0, 819 852; CHECK-NEXT: vsub.vv v8, v8, v12 853; CHECK-NEXT: vand.vx v12, v8, a0 854; CHECK-NEXT: vsrl.vi v8, v8, 2 855; CHECK-NEXT: vand.vx v8, v8, a0 856; CHECK-NEXT: lui a0, 61681 857; CHECK-NEXT: addi a0, a0, -241 858; CHECK-NEXT: vadd.vv v8, v12, v8 859; CHECK-NEXT: vsrl.vi v12, v8, 4 860; CHECK-NEXT: vadd.vv v8, v8, v12 861; CHECK-NEXT: vand.vx v8, v8, a0 862; CHECK-NEXT: lui a0, 4112 863; CHECK-NEXT: addi a0, a0, 257 864; CHECK-NEXT: vmul.vx v8, v8, a0 865; CHECK-NEXT: vsrl.vi v8, v8, 24 866; CHECK-NEXT: ret 867 %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 868 ret <16 x i32> %v 869} 870 871declare <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32) 872 873define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { 874; RV32-LABEL: vp_ctlz_v2i64: 875; RV32: # %bb.0: 876; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 877; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 878; RV32-NEXT: lui a1, 349525 879; RV32-NEXT: addi a1, a1, 1365 880; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 881; RV32-NEXT: vmv.v.x v10, a1 882; RV32-NEXT: li a1, 32 883; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 884; RV32-NEXT: vor.vv v8, v8, v9, v0.t 885; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t 886; RV32-NEXT: vor.vv v8, v8, v9, v0.t 887; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 888; RV32-NEXT: vor.vv v8, v8, v9, v0.t 889; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t 890; RV32-NEXT: vor.vv v8, v8, v9, v0.t 891; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t 892; RV32-NEXT: vor.vv v8, v8, v9, v0.t 893; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t 894; RV32-NEXT: lui a1, 209715 895; RV32-NEXT: addi a1, a1, 819 896; RV32-NEXT: vor.vv v8, v8, v9, v0.t 897; RV32-NEXT: vnot.v v8, v8, v0.t 898; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 899; RV32-NEXT: vand.vv v9, v9, v10, v0.t 900; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 901; RV32-NEXT: vmv.v.x v10, a1 902; RV32-NEXT: lui a1, 61681 903; RV32-NEXT: addi a1, a1, -241 904; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 905; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 906; RV32-NEXT: vand.vv v9, v8, v10, v0.t 907; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 908; RV32-NEXT: vand.vv v8, v8, v10, v0.t 909; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 910; RV32-NEXT: vmv.v.x v10, a1 911; RV32-NEXT: lui a1, 4112 912; RV32-NEXT: addi a1, a1, 257 913; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 914; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 915; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 916; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 917; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 918; RV32-NEXT: vmv.v.x v9, a1 919; RV32-NEXT: li a1, 56 920; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 921; RV32-NEXT: vand.vv v8, v8, v10, v0.t 922; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 923; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 924; RV32-NEXT: ret 925; 926; RV64-LABEL: vp_ctlz_v2i64: 927; RV64: # %bb.0: 928; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 929; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 930; RV64-NEXT: lui a0, 349525 931; RV64-NEXT: lui a1, 209715 932; RV64-NEXT: lui a2, 61681 933; RV64-NEXT: lui a3, 4112 934; RV64-NEXT: addiw a0, a0, 1365 935; RV64-NEXT: addiw a1, a1, 819 936; RV64-NEXT: addiw a2, a2, -241 937; RV64-NEXT: addiw a3, a3, 257 938; RV64-NEXT: slli a4, a0, 32 939; RV64-NEXT: add a0, a0, a4 940; RV64-NEXT: slli a4, a1, 32 941; RV64-NEXT: add a1, a1, a4 942; RV64-NEXT: slli a4, a2, 32 943; RV64-NEXT: add a2, a2, a4 944; RV64-NEXT: slli a4, a3, 32 945; RV64-NEXT: add a3, a3, a4 946; RV64-NEXT: li a4, 32 947; RV64-NEXT: vor.vv v8, v8, v9, v0.t 948; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t 949; RV64-NEXT: vor.vv v8, v8, v9, v0.t 950; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 951; RV64-NEXT: vor.vv v8, v8, v9, v0.t 952; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t 953; RV64-NEXT: vor.vv v8, v8, v9, v0.t 954; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t 955; RV64-NEXT: vor.vv v8, v8, v9, v0.t 956; RV64-NEXT: vsrl.vx v9, v8, a4, v0.t 957; RV64-NEXT: vor.vv v8, v8, v9, v0.t 958; RV64-NEXT: vnot.v v8, v8, v0.t 959; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 960; RV64-NEXT: vand.vx v9, v9, a0, v0.t 961; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 962; RV64-NEXT: vand.vx v9, v8, a1, v0.t 963; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 964; RV64-NEXT: vand.vx v8, v8, a1, v0.t 965; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 966; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 967; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 968; RV64-NEXT: vand.vx v8, v8, a2, v0.t 969; RV64-NEXT: li a0, 56 970; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 971; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 972; RV64-NEXT: ret 973 %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) 974 ret <2 x i64> %v 975} 976 977define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { 978; RV32-LABEL: vp_ctlz_v2i64_unmasked: 979; RV32: # %bb.0: 980; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 981; RV32-NEXT: vsrl.vi v9, v8, 1 982; RV32-NEXT: lui a1, 349525 983; RV32-NEXT: addi a1, a1, 1365 984; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 985; RV32-NEXT: vmv.v.x v10, a1 986; RV32-NEXT: li a1, 32 987; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 988; RV32-NEXT: vor.vv v8, v8, v9 989; RV32-NEXT: vsrl.vi v9, v8, 2 990; RV32-NEXT: vor.vv v8, v8, v9 991; RV32-NEXT: vsrl.vi v9, v8, 4 992; RV32-NEXT: vor.vv v8, v8, v9 993; RV32-NEXT: vsrl.vi v9, v8, 8 994; RV32-NEXT: vor.vv v8, v8, v9 995; RV32-NEXT: vsrl.vi v9, v8, 16 996; RV32-NEXT: vor.vv v8, v8, v9 997; RV32-NEXT: vsrl.vx v9, v8, a1 998; RV32-NEXT: lui a1, 209715 999; RV32-NEXT: addi a1, a1, 819 1000; RV32-NEXT: vor.vv v8, v8, v9 1001; RV32-NEXT: vnot.v v8, v8 1002; RV32-NEXT: vsrl.vi v9, v8, 1 1003; RV32-NEXT: vand.vv v9, v9, v10 1004; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1005; RV32-NEXT: vmv.v.x v10, a1 1006; RV32-NEXT: lui a1, 61681 1007; RV32-NEXT: addi a1, a1, -241 1008; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1009; RV32-NEXT: vsub.vv v8, v8, v9 1010; RV32-NEXT: vand.vv v9, v8, v10 1011; RV32-NEXT: vsrl.vi v8, v8, 2 1012; RV32-NEXT: vand.vv v8, v8, v10 1013; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1014; RV32-NEXT: vmv.v.x v10, a1 1015; RV32-NEXT: lui a1, 4112 1016; RV32-NEXT: addi a1, a1, 257 1017; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1018; RV32-NEXT: vadd.vv v8, v9, v8 1019; RV32-NEXT: vsrl.vi v9, v8, 4 1020; RV32-NEXT: vadd.vv v8, v8, v9 1021; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1022; RV32-NEXT: vmv.v.x v9, a1 1023; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1024; RV32-NEXT: vand.vv v8, v8, v10 1025; RV32-NEXT: vmul.vv v8, v8, v9 1026; RV32-NEXT: li a0, 56 1027; RV32-NEXT: vsrl.vx v8, v8, a0 1028; RV32-NEXT: ret 1029; 1030; RV64-LABEL: vp_ctlz_v2i64_unmasked: 1031; RV64: # %bb.0: 1032; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1033; RV64-NEXT: vsrl.vi v9, v8, 1 1034; RV64-NEXT: lui a0, 349525 1035; RV64-NEXT: lui a1, 209715 1036; RV64-NEXT: lui a2, 61681 1037; RV64-NEXT: lui a3, 4112 1038; RV64-NEXT: addiw a0, a0, 1365 1039; RV64-NEXT: addiw a1, a1, 819 1040; RV64-NEXT: addiw a2, a2, -241 1041; RV64-NEXT: addiw a3, a3, 257 1042; RV64-NEXT: slli a4, a0, 32 1043; RV64-NEXT: add a0, a0, a4 1044; RV64-NEXT: slli a4, a1, 32 1045; RV64-NEXT: add a1, a1, a4 1046; RV64-NEXT: slli a4, a2, 32 1047; RV64-NEXT: add a2, a2, a4 1048; RV64-NEXT: slli a4, a3, 32 1049; RV64-NEXT: add a3, a3, a4 1050; RV64-NEXT: li a4, 32 1051; RV64-NEXT: vor.vv v8, v8, v9 1052; RV64-NEXT: vsrl.vi v9, v8, 2 1053; RV64-NEXT: vor.vv v8, v8, v9 1054; RV64-NEXT: vsrl.vi v9, v8, 4 1055; RV64-NEXT: vor.vv v8, v8, v9 1056; RV64-NEXT: vsrl.vi v9, v8, 8 1057; RV64-NEXT: vor.vv v8, v8, v9 1058; RV64-NEXT: vsrl.vi v9, v8, 16 1059; RV64-NEXT: vor.vv v8, v8, v9 1060; RV64-NEXT: vsrl.vx v9, v8, a4 1061; RV64-NEXT: vor.vv v8, v8, v9 1062; RV64-NEXT: vnot.v v8, v8 1063; RV64-NEXT: vsrl.vi v9, v8, 1 1064; RV64-NEXT: vand.vx v9, v9, a0 1065; RV64-NEXT: vsub.vv v8, v8, v9 1066; RV64-NEXT: vand.vx v9, v8, a1 1067; RV64-NEXT: vsrl.vi v8, v8, 2 1068; RV64-NEXT: vand.vx v8, v8, a1 1069; RV64-NEXT: vadd.vv v8, v9, v8 1070; RV64-NEXT: vsrl.vi v9, v8, 4 1071; RV64-NEXT: vadd.vv v8, v8, v9 1072; RV64-NEXT: vand.vx v8, v8, a2 1073; RV64-NEXT: vmul.vx v8, v8, a3 1074; RV64-NEXT: li a0, 56 1075; RV64-NEXT: vsrl.vx v8, v8, a0 1076; RV64-NEXT: ret 1077 %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) 1078 ret <2 x i64> %v 1079} 1080 1081declare <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32) 1082 1083define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { 1084; RV32-LABEL: vp_ctlz_v4i64: 1085; RV32: # %bb.0: 1086; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1087; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t 1088; RV32-NEXT: lui a1, 349525 1089; RV32-NEXT: addi a1, a1, 1365 1090; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1091; RV32-NEXT: vmv.v.x v10, a1 1092; RV32-NEXT: li a1, 32 1093; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1094; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1095; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t 1096; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1097; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t 1098; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1099; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t 1100; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1101; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t 1102; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1103; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t 1104; RV32-NEXT: lui a1, 209715 1105; RV32-NEXT: addi a1, a1, 819 1106; RV32-NEXT: vor.vv v8, v8, v12, v0.t 1107; RV32-NEXT: vnot.v v8, v8, v0.t 1108; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t 1109; RV32-NEXT: vand.vv v10, v12, v10, v0.t 1110; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1111; RV32-NEXT: vmv.v.x v12, a1 1112; RV32-NEXT: lui a1, 61681 1113; RV32-NEXT: addi a1, a1, -241 1114; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1115; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 1116; RV32-NEXT: vand.vv v10, v8, v12, v0.t 1117; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1118; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1119; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1120; RV32-NEXT: vmv.v.x v12, a1 1121; RV32-NEXT: lui a1, 4112 1122; RV32-NEXT: addi a1, a1, 257 1123; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1124; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 1125; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 1126; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 1127; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1128; RV32-NEXT: vmv.v.x v10, a1 1129; RV32-NEXT: li a1, 56 1130; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1131; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1132; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 1133; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1134; RV32-NEXT: ret 1135; 1136; RV64-LABEL: vp_ctlz_v4i64: 1137; RV64: # %bb.0: 1138; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1139; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 1140; RV64-NEXT: lui a0, 349525 1141; RV64-NEXT: lui a1, 209715 1142; RV64-NEXT: lui a2, 61681 1143; RV64-NEXT: lui a3, 4112 1144; RV64-NEXT: addiw a0, a0, 1365 1145; RV64-NEXT: addiw a1, a1, 819 1146; RV64-NEXT: addiw a2, a2, -241 1147; RV64-NEXT: addiw a3, a3, 257 1148; RV64-NEXT: slli a4, a0, 32 1149; RV64-NEXT: add a0, a0, a4 1150; RV64-NEXT: slli a4, a1, 32 1151; RV64-NEXT: add a1, a1, a4 1152; RV64-NEXT: slli a4, a2, 32 1153; RV64-NEXT: add a2, a2, a4 1154; RV64-NEXT: slli a4, a3, 32 1155; RV64-NEXT: add a3, a3, a4 1156; RV64-NEXT: li a4, 32 1157; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1158; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t 1159; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1160; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 1161; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1162; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t 1163; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1164; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t 1165; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1166; RV64-NEXT: vsrl.vx v10, v8, a4, v0.t 1167; RV64-NEXT: vor.vv v8, v8, v10, v0.t 1168; RV64-NEXT: vnot.v v8, v8, v0.t 1169; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 1170; RV64-NEXT: vand.vx v10, v10, a0, v0.t 1171; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 1172; RV64-NEXT: vand.vx v10, v8, a1, v0.t 1173; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1174; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1175; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 1176; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 1177; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 1178; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1179; RV64-NEXT: li a0, 56 1180; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1181; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1182; RV64-NEXT: ret 1183 %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) 1184 ret <4 x i64> %v 1185} 1186 1187define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { 1188; RV32-LABEL: vp_ctlz_v4i64_unmasked: 1189; RV32: # %bb.0: 1190; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1191; RV32-NEXT: vsrl.vi v10, v8, 1 1192; RV32-NEXT: lui a1, 349525 1193; RV32-NEXT: addi a1, a1, 1365 1194; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1195; RV32-NEXT: vmv.v.x v12, a1 1196; RV32-NEXT: li a1, 32 1197; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1198; RV32-NEXT: vor.vv v8, v8, v10 1199; RV32-NEXT: vsrl.vi v10, v8, 2 1200; RV32-NEXT: vor.vv v8, v8, v10 1201; RV32-NEXT: vsrl.vi v10, v8, 4 1202; RV32-NEXT: vor.vv v8, v8, v10 1203; RV32-NEXT: vsrl.vi v10, v8, 8 1204; RV32-NEXT: vor.vv v8, v8, v10 1205; RV32-NEXT: vsrl.vi v10, v8, 16 1206; RV32-NEXT: vor.vv v8, v8, v10 1207; RV32-NEXT: vsrl.vx v10, v8, a1 1208; RV32-NEXT: lui a1, 209715 1209; RV32-NEXT: addi a1, a1, 819 1210; RV32-NEXT: vor.vv v8, v8, v10 1211; RV32-NEXT: vnot.v v8, v8 1212; RV32-NEXT: vsrl.vi v10, v8, 1 1213; RV32-NEXT: vand.vv v10, v10, v12 1214; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1215; RV32-NEXT: vmv.v.x v12, a1 1216; RV32-NEXT: lui a1, 61681 1217; RV32-NEXT: addi a1, a1, -241 1218; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1219; RV32-NEXT: vsub.vv v8, v8, v10 1220; RV32-NEXT: vand.vv v10, v8, v12 1221; RV32-NEXT: vsrl.vi v8, v8, 2 1222; RV32-NEXT: vand.vv v8, v8, v12 1223; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1224; RV32-NEXT: vmv.v.x v12, a1 1225; RV32-NEXT: lui a1, 4112 1226; RV32-NEXT: addi a1, a1, 257 1227; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1228; RV32-NEXT: vadd.vv v8, v10, v8 1229; RV32-NEXT: vsrl.vi v10, v8, 4 1230; RV32-NEXT: vadd.vv v8, v8, v10 1231; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1232; RV32-NEXT: vmv.v.x v10, a1 1233; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1234; RV32-NEXT: vand.vv v8, v8, v12 1235; RV32-NEXT: vmul.vv v8, v8, v10 1236; RV32-NEXT: li a0, 56 1237; RV32-NEXT: vsrl.vx v8, v8, a0 1238; RV32-NEXT: ret 1239; 1240; RV64-LABEL: vp_ctlz_v4i64_unmasked: 1241; RV64: # %bb.0: 1242; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1243; RV64-NEXT: vsrl.vi v10, v8, 1 1244; RV64-NEXT: lui a0, 349525 1245; RV64-NEXT: lui a1, 209715 1246; RV64-NEXT: lui a2, 61681 1247; RV64-NEXT: lui a3, 4112 1248; RV64-NEXT: addiw a0, a0, 1365 1249; RV64-NEXT: addiw a1, a1, 819 1250; RV64-NEXT: addiw a2, a2, -241 1251; RV64-NEXT: addiw a3, a3, 257 1252; RV64-NEXT: slli a4, a0, 32 1253; RV64-NEXT: add a0, a0, a4 1254; RV64-NEXT: slli a4, a1, 32 1255; RV64-NEXT: add a1, a1, a4 1256; RV64-NEXT: slli a4, a2, 32 1257; RV64-NEXT: add a2, a2, a4 1258; RV64-NEXT: slli a4, a3, 32 1259; RV64-NEXT: add a3, a3, a4 1260; RV64-NEXT: li a4, 32 1261; RV64-NEXT: vor.vv v8, v8, v10 1262; RV64-NEXT: vsrl.vi v10, v8, 2 1263; RV64-NEXT: vor.vv v8, v8, v10 1264; RV64-NEXT: vsrl.vi v10, v8, 4 1265; RV64-NEXT: vor.vv v8, v8, v10 1266; RV64-NEXT: vsrl.vi v10, v8, 8 1267; RV64-NEXT: vor.vv v8, v8, v10 1268; RV64-NEXT: vsrl.vi v10, v8, 16 1269; RV64-NEXT: vor.vv v8, v8, v10 1270; RV64-NEXT: vsrl.vx v10, v8, a4 1271; RV64-NEXT: vor.vv v8, v8, v10 1272; RV64-NEXT: vnot.v v8, v8 1273; RV64-NEXT: vsrl.vi v10, v8, 1 1274; RV64-NEXT: vand.vx v10, v10, a0 1275; RV64-NEXT: vsub.vv v8, v8, v10 1276; RV64-NEXT: vand.vx v10, v8, a1 1277; RV64-NEXT: vsrl.vi v8, v8, 2 1278; RV64-NEXT: vand.vx v8, v8, a1 1279; RV64-NEXT: vadd.vv v8, v10, v8 1280; RV64-NEXT: vsrl.vi v10, v8, 4 1281; RV64-NEXT: vadd.vv v8, v8, v10 1282; RV64-NEXT: vand.vx v8, v8, a2 1283; RV64-NEXT: vmul.vx v8, v8, a3 1284; RV64-NEXT: li a0, 56 1285; RV64-NEXT: vsrl.vx v8, v8, a0 1286; RV64-NEXT: ret 1287 %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) 1288 ret <4 x i64> %v 1289} 1290 1291declare <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32) 1292 1293define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { 1294; RV32-LABEL: vp_ctlz_v8i64: 1295; RV32: # %bb.0: 1296; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1297; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 1298; RV32-NEXT: lui a1, 349525 1299; RV32-NEXT: addi a1, a1, 1365 1300; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1301; RV32-NEXT: vmv.v.x v12, a1 1302; RV32-NEXT: li a1, 32 1303; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1304; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1305; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 1306; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1307; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1308; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1309; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 1310; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1311; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 1312; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1313; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 1314; RV32-NEXT: lui a1, 209715 1315; RV32-NEXT: addi a1, a1, 819 1316; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1317; RV32-NEXT: vnot.v v16, v8, v0.t 1318; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 1319; RV32-NEXT: vand.vv v12, v8, v12, v0.t 1320; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1321; RV32-NEXT: vmv.v.x v8, a1 1322; RV32-NEXT: lui a1, 61681 1323; RV32-NEXT: addi a1, a1, -241 1324; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1325; RV32-NEXT: vsub.vv v12, v16, v12, v0.t 1326; RV32-NEXT: vand.vv v16, v12, v8, v0.t 1327; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t 1328; RV32-NEXT: vand.vv v8, v12, v8, v0.t 1329; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1330; RV32-NEXT: vmv.v.x v12, a1 1331; RV32-NEXT: lui a1, 4112 1332; RV32-NEXT: addi a1, a1, 257 1333; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1334; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 1335; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1336; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1337; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1338; RV32-NEXT: vmv.v.x v16, a1 1339; RV32-NEXT: li a1, 56 1340; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1341; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1342; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1343; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1344; RV32-NEXT: ret 1345; 1346; RV64-LABEL: vp_ctlz_v8i64: 1347; RV64: # %bb.0: 1348; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1349; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1350; RV64-NEXT: lui a0, 349525 1351; RV64-NEXT: lui a1, 209715 1352; RV64-NEXT: lui a2, 61681 1353; RV64-NEXT: lui a3, 4112 1354; RV64-NEXT: addiw a0, a0, 1365 1355; RV64-NEXT: addiw a1, a1, 819 1356; RV64-NEXT: addiw a2, a2, -241 1357; RV64-NEXT: addiw a3, a3, 257 1358; RV64-NEXT: slli a4, a0, 32 1359; RV64-NEXT: add a0, a0, a4 1360; RV64-NEXT: slli a4, a1, 32 1361; RV64-NEXT: add a1, a1, a4 1362; RV64-NEXT: slli a4, a2, 32 1363; RV64-NEXT: add a2, a2, a4 1364; RV64-NEXT: slli a4, a3, 32 1365; RV64-NEXT: add a3, a3, a4 1366; RV64-NEXT: li a4, 32 1367; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1368; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t 1369; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1370; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1371; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1372; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t 1373; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1374; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t 1375; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1376; RV64-NEXT: vsrl.vx v12, v8, a4, v0.t 1377; RV64-NEXT: vor.vv v8, v8, v12, v0.t 1378; RV64-NEXT: vnot.v v8, v8, v0.t 1379; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1380; RV64-NEXT: vand.vx v12, v12, a0, v0.t 1381; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 1382; RV64-NEXT: vand.vx v12, v8, a1, v0.t 1383; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1384; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1385; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 1386; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1387; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 1388; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1389; RV64-NEXT: li a0, 56 1390; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1391; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1392; RV64-NEXT: ret 1393 %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) 1394 ret <8 x i64> %v 1395} 1396 1397define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { 1398; RV32-LABEL: vp_ctlz_v8i64_unmasked: 1399; RV32: # %bb.0: 1400; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1401; RV32-NEXT: vsrl.vi v12, v8, 1 1402; RV32-NEXT: lui a1, 349525 1403; RV32-NEXT: addi a1, a1, 1365 1404; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1405; RV32-NEXT: vmv.v.x v16, a1 1406; RV32-NEXT: li a1, 32 1407; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1408; RV32-NEXT: vor.vv v8, v8, v12 1409; RV32-NEXT: vsrl.vi v12, v8, 2 1410; RV32-NEXT: vor.vv v8, v8, v12 1411; RV32-NEXT: vsrl.vi v12, v8, 4 1412; RV32-NEXT: vor.vv v8, v8, v12 1413; RV32-NEXT: vsrl.vi v12, v8, 8 1414; RV32-NEXT: vor.vv v8, v8, v12 1415; RV32-NEXT: vsrl.vi v12, v8, 16 1416; RV32-NEXT: vor.vv v8, v8, v12 1417; RV32-NEXT: vsrl.vx v12, v8, a1 1418; RV32-NEXT: lui a1, 209715 1419; RV32-NEXT: addi a1, a1, 819 1420; RV32-NEXT: vor.vv v8, v8, v12 1421; RV32-NEXT: vnot.v v8, v8 1422; RV32-NEXT: vsrl.vi v12, v8, 1 1423; RV32-NEXT: vand.vv v12, v12, v16 1424; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1425; RV32-NEXT: vmv.v.x v16, a1 1426; RV32-NEXT: lui a1, 61681 1427; RV32-NEXT: addi a1, a1, -241 1428; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1429; RV32-NEXT: vsub.vv v8, v8, v12 1430; RV32-NEXT: vand.vv v12, v8, v16 1431; RV32-NEXT: vsrl.vi v8, v8, 2 1432; RV32-NEXT: vand.vv v8, v8, v16 1433; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1434; RV32-NEXT: vmv.v.x v16, a1 1435; RV32-NEXT: lui a1, 4112 1436; RV32-NEXT: addi a1, a1, 257 1437; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1438; RV32-NEXT: vadd.vv v8, v12, v8 1439; RV32-NEXT: vsrl.vi v12, v8, 4 1440; RV32-NEXT: vadd.vv v8, v8, v12 1441; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1442; RV32-NEXT: vmv.v.x v12, a1 1443; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1444; RV32-NEXT: vand.vv v8, v8, v16 1445; RV32-NEXT: vmul.vv v8, v8, v12 1446; RV32-NEXT: li a0, 56 1447; RV32-NEXT: vsrl.vx v8, v8, a0 1448; RV32-NEXT: ret 1449; 1450; RV64-LABEL: vp_ctlz_v8i64_unmasked: 1451; RV64: # %bb.0: 1452; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1453; RV64-NEXT: vsrl.vi v12, v8, 1 1454; RV64-NEXT: lui a0, 349525 1455; RV64-NEXT: lui a1, 209715 1456; RV64-NEXT: lui a2, 61681 1457; RV64-NEXT: lui a3, 4112 1458; RV64-NEXT: addiw a0, a0, 1365 1459; RV64-NEXT: addiw a1, a1, 819 1460; RV64-NEXT: addiw a2, a2, -241 1461; RV64-NEXT: addiw a3, a3, 257 1462; RV64-NEXT: slli a4, a0, 32 1463; RV64-NEXT: add a0, a0, a4 1464; RV64-NEXT: slli a4, a1, 32 1465; RV64-NEXT: add a1, a1, a4 1466; RV64-NEXT: slli a4, a2, 32 1467; RV64-NEXT: add a2, a2, a4 1468; RV64-NEXT: slli a4, a3, 32 1469; RV64-NEXT: add a3, a3, a4 1470; RV64-NEXT: li a4, 32 1471; RV64-NEXT: vor.vv v8, v8, v12 1472; RV64-NEXT: vsrl.vi v12, v8, 2 1473; RV64-NEXT: vor.vv v8, v8, v12 1474; RV64-NEXT: vsrl.vi v12, v8, 4 1475; RV64-NEXT: vor.vv v8, v8, v12 1476; RV64-NEXT: vsrl.vi v12, v8, 8 1477; RV64-NEXT: vor.vv v8, v8, v12 1478; RV64-NEXT: vsrl.vi v12, v8, 16 1479; RV64-NEXT: vor.vv v8, v8, v12 1480; RV64-NEXT: vsrl.vx v12, v8, a4 1481; RV64-NEXT: vor.vv v8, v8, v12 1482; RV64-NEXT: vnot.v v8, v8 1483; RV64-NEXT: vsrl.vi v12, v8, 1 1484; RV64-NEXT: vand.vx v12, v12, a0 1485; RV64-NEXT: vsub.vv v8, v8, v12 1486; RV64-NEXT: vand.vx v12, v8, a1 1487; RV64-NEXT: vsrl.vi v8, v8, 2 1488; RV64-NEXT: vand.vx v8, v8, a1 1489; RV64-NEXT: vadd.vv v8, v12, v8 1490; RV64-NEXT: vsrl.vi v12, v8, 4 1491; RV64-NEXT: vadd.vv v8, v8, v12 1492; RV64-NEXT: vand.vx v8, v8, a2 1493; RV64-NEXT: vmul.vx v8, v8, a3 1494; RV64-NEXT: li a0, 56 1495; RV64-NEXT: vsrl.vx v8, v8, a0 1496; RV64-NEXT: ret 1497 %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) 1498 ret <8 x i64> %v 1499} 1500 1501declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) 1502 1503define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { 1504; RV32-LABEL: vp_ctlz_v15i64: 1505; RV32: # %bb.0: 1506; RV32-NEXT: addi sp, sp, -48 1507; RV32-NEXT: .cfi_def_cfa_offset 48 1508; RV32-NEXT: csrr a1, vlenb 1509; RV32-NEXT: slli a1, a1, 4 1510; RV32-NEXT: sub sp, sp, a1 1511; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 1512; RV32-NEXT: lui a1, 349525 1513; RV32-NEXT: addi a1, a1, 1365 1514; RV32-NEXT: sw a1, 40(sp) 1515; RV32-NEXT: sw a1, 44(sp) 1516; RV32-NEXT: lui a1, 209715 1517; RV32-NEXT: addi a1, a1, 819 1518; RV32-NEXT: sw a1, 32(sp) 1519; RV32-NEXT: sw a1, 36(sp) 1520; RV32-NEXT: lui a1, 61681 1521; RV32-NEXT: addi a1, a1, -241 1522; RV32-NEXT: sw a1, 24(sp) 1523; RV32-NEXT: sw a1, 28(sp) 1524; RV32-NEXT: lui a1, 4112 1525; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1526; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 1527; RV32-NEXT: addi a1, a1, 257 1528; RV32-NEXT: sw a1, 16(sp) 1529; RV32-NEXT: sw a1, 20(sp) 1530; RV32-NEXT: addi a1, sp, 40 1531; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1532; RV32-NEXT: vlse64.v v24, (a1), zero 1533; RV32-NEXT: csrr a1, vlenb 1534; RV32-NEXT: slli a1, a1, 3 1535; RV32-NEXT: add a1, sp, a1 1536; RV32-NEXT: addi a1, a1, 48 1537; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 1538; RV32-NEXT: li a1, 32 1539; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1540; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1541; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 1542; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1543; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1544; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1545; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 1546; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1547; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 1548; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1549; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 1550; RV32-NEXT: addi a1, sp, 32 1551; RV32-NEXT: vor.vv v16, v8, v16, v0.t 1552; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1553; RV32-NEXT: vlse64.v v8, (a1), zero 1554; RV32-NEXT: addi a1, sp, 48 1555; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1556; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1557; RV32-NEXT: vnot.v v16, v16, v0.t 1558; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 1559; RV32-NEXT: csrr a1, vlenb 1560; RV32-NEXT: slli a1, a1, 3 1561; RV32-NEXT: add a1, sp, a1 1562; RV32-NEXT: addi a1, a1, 48 1563; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1564; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1565; RV32-NEXT: addi a1, sp, 24 1566; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 1567; RV32-NEXT: addi a2, sp, 48 1568; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1569; RV32-NEXT: vand.vv v16, v8, v24, v0.t 1570; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1571; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1572; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1573; RV32-NEXT: vlse64.v v8, (a1), zero 1574; RV32-NEXT: csrr a1, vlenb 1575; RV32-NEXT: slli a1, a1, 3 1576; RV32-NEXT: add a1, sp, a1 1577; RV32-NEXT: addi a1, a1, 48 1578; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1579; RV32-NEXT: addi a1, sp, 16 1580; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1581; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 1582; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1583; RV32-NEXT: vlse64.v v16, (a1), zero 1584; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1585; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 1586; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1587; RV32-NEXT: li a0, 56 1588; RV32-NEXT: csrr a1, vlenb 1589; RV32-NEXT: slli a1, a1, 3 1590; RV32-NEXT: add a1, sp, a1 1591; RV32-NEXT: addi a1, a1, 48 1592; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1593; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1594; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1595; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 1596; RV32-NEXT: csrr a0, vlenb 1597; RV32-NEXT: slli a0, a0, 4 1598; RV32-NEXT: add sp, sp, a0 1599; RV32-NEXT: .cfi_def_cfa sp, 48 1600; RV32-NEXT: addi sp, sp, 48 1601; RV32-NEXT: .cfi_def_cfa_offset 0 1602; RV32-NEXT: ret 1603; 1604; RV64-LABEL: vp_ctlz_v15i64: 1605; RV64: # %bb.0: 1606; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1607; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1608; RV64-NEXT: lui a0, 349525 1609; RV64-NEXT: lui a1, 209715 1610; RV64-NEXT: lui a2, 61681 1611; RV64-NEXT: lui a3, 4112 1612; RV64-NEXT: addiw a0, a0, 1365 1613; RV64-NEXT: addiw a1, a1, 819 1614; RV64-NEXT: addiw a2, a2, -241 1615; RV64-NEXT: addiw a3, a3, 257 1616; RV64-NEXT: slli a4, a0, 32 1617; RV64-NEXT: add a0, a0, a4 1618; RV64-NEXT: slli a4, a1, 32 1619; RV64-NEXT: add a1, a1, a4 1620; RV64-NEXT: slli a4, a2, 32 1621; RV64-NEXT: add a2, a2, a4 1622; RV64-NEXT: slli a4, a3, 32 1623; RV64-NEXT: add a3, a3, a4 1624; RV64-NEXT: li a4, 32 1625; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1626; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 1627; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1628; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1629; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1630; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 1631; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1632; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 1633; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1634; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t 1635; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1636; RV64-NEXT: vnot.v v8, v8, v0.t 1637; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1638; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1639; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1640; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1641; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1642; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1643; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1644; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1645; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1646; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1647; RV64-NEXT: li a0, 56 1648; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1649; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1650; RV64-NEXT: ret 1651 %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) 1652 ret <15 x i64> %v 1653} 1654 1655define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { 1656; RV32-LABEL: vp_ctlz_v15i64_unmasked: 1657; RV32: # %bb.0: 1658; RV32-NEXT: addi sp, sp, -32 1659; RV32-NEXT: .cfi_def_cfa_offset 32 1660; RV32-NEXT: lui a1, 349525 1661; RV32-NEXT: addi a1, a1, 1365 1662; RV32-NEXT: sw a1, 24(sp) 1663; RV32-NEXT: sw a1, 28(sp) 1664; RV32-NEXT: lui a1, 209715 1665; RV32-NEXT: addi a1, a1, 819 1666; RV32-NEXT: sw a1, 16(sp) 1667; RV32-NEXT: sw a1, 20(sp) 1668; RV32-NEXT: lui a1, 61681 1669; RV32-NEXT: addi a1, a1, -241 1670; RV32-NEXT: sw a1, 8(sp) 1671; RV32-NEXT: sw a1, 12(sp) 1672; RV32-NEXT: lui a1, 4112 1673; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1674; RV32-NEXT: vsrl.vi v0, v8, 1 1675; RV32-NEXT: addi a1, a1, 257 1676; RV32-NEXT: sw a1, 0(sp) 1677; RV32-NEXT: sw a1, 4(sp) 1678; RV32-NEXT: addi a1, sp, 24 1679; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1680; RV32-NEXT: vlse64.v v24, (a1), zero 1681; RV32-NEXT: addi a1, sp, 16 1682; RV32-NEXT: vlse64.v v16, (a1), zero 1683; RV32-NEXT: li a1, 32 1684; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1685; RV32-NEXT: vor.vv v8, v8, v0 1686; RV32-NEXT: vsrl.vi v0, v8, 2 1687; RV32-NEXT: vor.vv v8, v8, v0 1688; RV32-NEXT: vsrl.vi v0, v8, 4 1689; RV32-NEXT: vor.vv v8, v8, v0 1690; RV32-NEXT: vsrl.vi v0, v8, 8 1691; RV32-NEXT: vor.vv v8, v8, v0 1692; RV32-NEXT: vsrl.vi v0, v8, 16 1693; RV32-NEXT: vor.vv v8, v8, v0 1694; RV32-NEXT: vsrl.vx v0, v8, a1 1695; RV32-NEXT: addi a1, sp, 8 1696; RV32-NEXT: vor.vv v8, v8, v0 1697; RV32-NEXT: vnot.v v0, v8 1698; RV32-NEXT: vsrl.vi v8, v0, 1 1699; RV32-NEXT: vand.vv v24, v8, v24 1700; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1701; RV32-NEXT: vlse64.v v8, (a1), zero 1702; RV32-NEXT: mv a1, sp 1703; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1704; RV32-NEXT: vsub.vv v24, v0, v24 1705; RV32-NEXT: vand.vv v0, v24, v16 1706; RV32-NEXT: vsrl.vi v24, v24, 2 1707; RV32-NEXT: vand.vv v16, v24, v16 1708; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1709; RV32-NEXT: vlse64.v v24, (a1), zero 1710; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1711; RV32-NEXT: vadd.vv v16, v0, v16 1712; RV32-NEXT: vsrl.vi v0, v16, 4 1713; RV32-NEXT: vadd.vv v16, v16, v0 1714; RV32-NEXT: vand.vv v8, v16, v8 1715; RV32-NEXT: vmul.vv v8, v8, v24 1716; RV32-NEXT: li a0, 56 1717; RV32-NEXT: vsrl.vx v8, v8, a0 1718; RV32-NEXT: addi sp, sp, 32 1719; RV32-NEXT: .cfi_def_cfa_offset 0 1720; RV32-NEXT: ret 1721; 1722; RV64-LABEL: vp_ctlz_v15i64_unmasked: 1723; RV64: # %bb.0: 1724; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1725; RV64-NEXT: vsrl.vi v16, v8, 1 1726; RV64-NEXT: lui a0, 349525 1727; RV64-NEXT: lui a1, 209715 1728; RV64-NEXT: lui a2, 61681 1729; RV64-NEXT: lui a3, 4112 1730; RV64-NEXT: addiw a0, a0, 1365 1731; RV64-NEXT: addiw a1, a1, 819 1732; RV64-NEXT: addiw a2, a2, -241 1733; RV64-NEXT: addiw a3, a3, 257 1734; RV64-NEXT: slli a4, a0, 32 1735; RV64-NEXT: add a0, a0, a4 1736; RV64-NEXT: slli a4, a1, 32 1737; RV64-NEXT: add a1, a1, a4 1738; RV64-NEXT: slli a4, a2, 32 1739; RV64-NEXT: add a2, a2, a4 1740; RV64-NEXT: slli a4, a3, 32 1741; RV64-NEXT: add a3, a3, a4 1742; RV64-NEXT: li a4, 32 1743; RV64-NEXT: vor.vv v8, v8, v16 1744; RV64-NEXT: vsrl.vi v16, v8, 2 1745; RV64-NEXT: vor.vv v8, v8, v16 1746; RV64-NEXT: vsrl.vi v16, v8, 4 1747; RV64-NEXT: vor.vv v8, v8, v16 1748; RV64-NEXT: vsrl.vi v16, v8, 8 1749; RV64-NEXT: vor.vv v8, v8, v16 1750; RV64-NEXT: vsrl.vi v16, v8, 16 1751; RV64-NEXT: vor.vv v8, v8, v16 1752; RV64-NEXT: vsrl.vx v16, v8, a4 1753; RV64-NEXT: vor.vv v8, v8, v16 1754; RV64-NEXT: vnot.v v8, v8 1755; RV64-NEXT: vsrl.vi v16, v8, 1 1756; RV64-NEXT: vand.vx v16, v16, a0 1757; RV64-NEXT: vsub.vv v8, v8, v16 1758; RV64-NEXT: vand.vx v16, v8, a1 1759; RV64-NEXT: vsrl.vi v8, v8, 2 1760; RV64-NEXT: vand.vx v8, v8, a1 1761; RV64-NEXT: vadd.vv v8, v16, v8 1762; RV64-NEXT: vsrl.vi v16, v8, 4 1763; RV64-NEXT: vadd.vv v8, v8, v16 1764; RV64-NEXT: vand.vx v8, v8, a2 1765; RV64-NEXT: vmul.vx v8, v8, a3 1766; RV64-NEXT: li a0, 56 1767; RV64-NEXT: vsrl.vx v8, v8, a0 1768; RV64-NEXT: ret 1769 %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl) 1770 ret <15 x i64> %v 1771} 1772 1773declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) 1774 1775define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { 1776; RV32-LABEL: vp_ctlz_v16i64: 1777; RV32: # %bb.0: 1778; RV32-NEXT: addi sp, sp, -48 1779; RV32-NEXT: .cfi_def_cfa_offset 48 1780; RV32-NEXT: csrr a1, vlenb 1781; RV32-NEXT: slli a1, a1, 4 1782; RV32-NEXT: sub sp, sp, a1 1783; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 1784; RV32-NEXT: lui a1, 349525 1785; RV32-NEXT: addi a1, a1, 1365 1786; RV32-NEXT: sw a1, 40(sp) 1787; RV32-NEXT: sw a1, 44(sp) 1788; RV32-NEXT: lui a1, 209715 1789; RV32-NEXT: addi a1, a1, 819 1790; RV32-NEXT: sw a1, 32(sp) 1791; RV32-NEXT: sw a1, 36(sp) 1792; RV32-NEXT: lui a1, 61681 1793; RV32-NEXT: addi a1, a1, -241 1794; RV32-NEXT: sw a1, 24(sp) 1795; RV32-NEXT: sw a1, 28(sp) 1796; RV32-NEXT: lui a1, 4112 1797; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1798; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 1799; RV32-NEXT: addi a1, a1, 257 1800; RV32-NEXT: sw a1, 16(sp) 1801; RV32-NEXT: sw a1, 20(sp) 1802; RV32-NEXT: addi a1, sp, 40 1803; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1804; RV32-NEXT: vlse64.v v24, (a1), zero 1805; RV32-NEXT: csrr a1, vlenb 1806; RV32-NEXT: slli a1, a1, 3 1807; RV32-NEXT: add a1, sp, a1 1808; RV32-NEXT: addi a1, a1, 48 1809; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 1810; RV32-NEXT: li a1, 32 1811; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1812; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1813; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 1814; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1815; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1816; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1817; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 1818; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1819; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 1820; RV32-NEXT: vor.vv v8, v8, v16, v0.t 1821; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 1822; RV32-NEXT: addi a1, sp, 32 1823; RV32-NEXT: vor.vv v16, v8, v16, v0.t 1824; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1825; RV32-NEXT: vlse64.v v8, (a1), zero 1826; RV32-NEXT: addi a1, sp, 48 1827; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1828; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1829; RV32-NEXT: vnot.v v16, v16, v0.t 1830; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 1831; RV32-NEXT: csrr a1, vlenb 1832; RV32-NEXT: slli a1, a1, 3 1833; RV32-NEXT: add a1, sp, a1 1834; RV32-NEXT: addi a1, a1, 48 1835; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1836; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1837; RV32-NEXT: addi a1, sp, 24 1838; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 1839; RV32-NEXT: addi a2, sp, 48 1840; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1841; RV32-NEXT: vand.vv v16, v8, v24, v0.t 1842; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1843; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1844; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1845; RV32-NEXT: vlse64.v v8, (a1), zero 1846; RV32-NEXT: csrr a1, vlenb 1847; RV32-NEXT: slli a1, a1, 3 1848; RV32-NEXT: add a1, sp, a1 1849; RV32-NEXT: addi a1, a1, 48 1850; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1851; RV32-NEXT: addi a1, sp, 16 1852; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1853; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 1854; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1855; RV32-NEXT: vlse64.v v16, (a1), zero 1856; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1857; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 1858; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1859; RV32-NEXT: li a0, 56 1860; RV32-NEXT: csrr a1, vlenb 1861; RV32-NEXT: slli a1, a1, 3 1862; RV32-NEXT: add a1, sp, a1 1863; RV32-NEXT: addi a1, a1, 48 1864; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1865; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1866; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1867; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 1868; RV32-NEXT: csrr a0, vlenb 1869; RV32-NEXT: slli a0, a0, 4 1870; RV32-NEXT: add sp, sp, a0 1871; RV32-NEXT: .cfi_def_cfa sp, 48 1872; RV32-NEXT: addi sp, sp, 48 1873; RV32-NEXT: .cfi_def_cfa_offset 0 1874; RV32-NEXT: ret 1875; 1876; RV64-LABEL: vp_ctlz_v16i64: 1877; RV64: # %bb.0: 1878; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1879; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1880; RV64-NEXT: lui a0, 349525 1881; RV64-NEXT: lui a1, 209715 1882; RV64-NEXT: lui a2, 61681 1883; RV64-NEXT: lui a3, 4112 1884; RV64-NEXT: addiw a0, a0, 1365 1885; RV64-NEXT: addiw a1, a1, 819 1886; RV64-NEXT: addiw a2, a2, -241 1887; RV64-NEXT: addiw a3, a3, 257 1888; RV64-NEXT: slli a4, a0, 32 1889; RV64-NEXT: add a0, a0, a4 1890; RV64-NEXT: slli a4, a1, 32 1891; RV64-NEXT: add a1, a1, a4 1892; RV64-NEXT: slli a4, a2, 32 1893; RV64-NEXT: add a2, a2, a4 1894; RV64-NEXT: slli a4, a3, 32 1895; RV64-NEXT: add a3, a3, a4 1896; RV64-NEXT: li a4, 32 1897; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1898; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 1899; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1900; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1901; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1902; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 1903; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1904; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 1905; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1906; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t 1907; RV64-NEXT: vor.vv v8, v8, v16, v0.t 1908; RV64-NEXT: vnot.v v8, v8, v0.t 1909; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1910; RV64-NEXT: vand.vx v16, v16, a0, v0.t 1911; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1912; RV64-NEXT: vand.vx v16, v8, a1, v0.t 1913; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1914; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1915; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1916; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1917; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1918; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1919; RV64-NEXT: li a0, 56 1920; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 1921; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1922; RV64-NEXT: ret 1923 %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) 1924 ret <16 x i64> %v 1925} 1926 1927define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { 1928; RV32-LABEL: vp_ctlz_v16i64_unmasked: 1929; RV32: # %bb.0: 1930; RV32-NEXT: addi sp, sp, -32 1931; RV32-NEXT: .cfi_def_cfa_offset 32 1932; RV32-NEXT: lui a1, 349525 1933; RV32-NEXT: addi a1, a1, 1365 1934; RV32-NEXT: sw a1, 24(sp) 1935; RV32-NEXT: sw a1, 28(sp) 1936; RV32-NEXT: lui a1, 209715 1937; RV32-NEXT: addi a1, a1, 819 1938; RV32-NEXT: sw a1, 16(sp) 1939; RV32-NEXT: sw a1, 20(sp) 1940; RV32-NEXT: lui a1, 61681 1941; RV32-NEXT: addi a1, a1, -241 1942; RV32-NEXT: sw a1, 8(sp) 1943; RV32-NEXT: sw a1, 12(sp) 1944; RV32-NEXT: lui a1, 4112 1945; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1946; RV32-NEXT: vsrl.vi v0, v8, 1 1947; RV32-NEXT: addi a1, a1, 257 1948; RV32-NEXT: sw a1, 0(sp) 1949; RV32-NEXT: sw a1, 4(sp) 1950; RV32-NEXT: addi a1, sp, 24 1951; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1952; RV32-NEXT: vlse64.v v24, (a1), zero 1953; RV32-NEXT: addi a1, sp, 16 1954; RV32-NEXT: vlse64.v v16, (a1), zero 1955; RV32-NEXT: li a1, 32 1956; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1957; RV32-NEXT: vor.vv v8, v8, v0 1958; RV32-NEXT: vsrl.vi v0, v8, 2 1959; RV32-NEXT: vor.vv v8, v8, v0 1960; RV32-NEXT: vsrl.vi v0, v8, 4 1961; RV32-NEXT: vor.vv v8, v8, v0 1962; RV32-NEXT: vsrl.vi v0, v8, 8 1963; RV32-NEXT: vor.vv v8, v8, v0 1964; RV32-NEXT: vsrl.vi v0, v8, 16 1965; RV32-NEXT: vor.vv v8, v8, v0 1966; RV32-NEXT: vsrl.vx v0, v8, a1 1967; RV32-NEXT: addi a1, sp, 8 1968; RV32-NEXT: vor.vv v8, v8, v0 1969; RV32-NEXT: vnot.v v0, v8 1970; RV32-NEXT: vsrl.vi v8, v0, 1 1971; RV32-NEXT: vand.vv v24, v8, v24 1972; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1973; RV32-NEXT: vlse64.v v8, (a1), zero 1974; RV32-NEXT: mv a1, sp 1975; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1976; RV32-NEXT: vsub.vv v24, v0, v24 1977; RV32-NEXT: vand.vv v0, v24, v16 1978; RV32-NEXT: vsrl.vi v24, v24, 2 1979; RV32-NEXT: vand.vv v16, v24, v16 1980; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1981; RV32-NEXT: vlse64.v v24, (a1), zero 1982; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1983; RV32-NEXT: vadd.vv v16, v0, v16 1984; RV32-NEXT: vsrl.vi v0, v16, 4 1985; RV32-NEXT: vadd.vv v16, v16, v0 1986; RV32-NEXT: vand.vv v8, v16, v8 1987; RV32-NEXT: vmul.vv v8, v8, v24 1988; RV32-NEXT: li a0, 56 1989; RV32-NEXT: vsrl.vx v8, v8, a0 1990; RV32-NEXT: addi sp, sp, 32 1991; RV32-NEXT: .cfi_def_cfa_offset 0 1992; RV32-NEXT: ret 1993; 1994; RV64-LABEL: vp_ctlz_v16i64_unmasked: 1995; RV64: # %bb.0: 1996; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1997; RV64-NEXT: vsrl.vi v16, v8, 1 1998; RV64-NEXT: lui a0, 349525 1999; RV64-NEXT: lui a1, 209715 2000; RV64-NEXT: lui a2, 61681 2001; RV64-NEXT: lui a3, 4112 2002; RV64-NEXT: addiw a0, a0, 1365 2003; RV64-NEXT: addiw a1, a1, 819 2004; RV64-NEXT: addiw a2, a2, -241 2005; RV64-NEXT: addiw a3, a3, 257 2006; RV64-NEXT: slli a4, a0, 32 2007; RV64-NEXT: add a0, a0, a4 2008; RV64-NEXT: slli a4, a1, 32 2009; RV64-NEXT: add a1, a1, a4 2010; RV64-NEXT: slli a4, a2, 32 2011; RV64-NEXT: add a2, a2, a4 2012; RV64-NEXT: slli a4, a3, 32 2013; RV64-NEXT: add a3, a3, a4 2014; RV64-NEXT: li a4, 32 2015; RV64-NEXT: vor.vv v8, v8, v16 2016; RV64-NEXT: vsrl.vi v16, v8, 2 2017; RV64-NEXT: vor.vv v8, v8, v16 2018; RV64-NEXT: vsrl.vi v16, v8, 4 2019; RV64-NEXT: vor.vv v8, v8, v16 2020; RV64-NEXT: vsrl.vi v16, v8, 8 2021; RV64-NEXT: vor.vv v8, v8, v16 2022; RV64-NEXT: vsrl.vi v16, v8, 16 2023; RV64-NEXT: vor.vv v8, v8, v16 2024; RV64-NEXT: vsrl.vx v16, v8, a4 2025; RV64-NEXT: vor.vv v8, v8, v16 2026; RV64-NEXT: vnot.v v8, v8 2027; RV64-NEXT: vsrl.vi v16, v8, 1 2028; RV64-NEXT: vand.vx v16, v16, a0 2029; RV64-NEXT: vsub.vv v8, v8, v16 2030; RV64-NEXT: vand.vx v16, v8, a1 2031; RV64-NEXT: vsrl.vi v8, v8, 2 2032; RV64-NEXT: vand.vx v8, v8, a1 2033; RV64-NEXT: vadd.vv v8, v16, v8 2034; RV64-NEXT: vsrl.vi v16, v8, 4 2035; RV64-NEXT: vadd.vv v8, v8, v16 2036; RV64-NEXT: vand.vx v8, v8, a2 2037; RV64-NEXT: vmul.vx v8, v8, a3 2038; RV64-NEXT: li a0, 56 2039; RV64-NEXT: vsrl.vx v8, v8, a0 2040; RV64-NEXT: ret 2041 %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) 2042 ret <16 x i64> %v 2043} 2044 2045declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) 2046 2047define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { 2048; RV32-LABEL: vp_ctlz_v32i64: 2049; RV32: # %bb.0: 2050; RV32-NEXT: addi sp, sp, -48 2051; RV32-NEXT: .cfi_def_cfa_offset 48 2052; RV32-NEXT: csrr a1, vlenb 2053; RV32-NEXT: li a2, 56 2054; RV32-NEXT: mul a1, a1, a2 2055; RV32-NEXT: sub sp, sp, a1 2056; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb 2057; RV32-NEXT: csrr a1, vlenb 2058; RV32-NEXT: slli a1, a1, 4 2059; RV32-NEXT: add a1, sp, a1 2060; RV32-NEXT: addi a1, a1, 48 2061; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2062; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2063; RV32-NEXT: vslidedown.vi v24, v0, 2 2064; RV32-NEXT: lui a1, 349525 2065; RV32-NEXT: lui a2, 209715 2066; RV32-NEXT: addi a1, a1, 1365 2067; RV32-NEXT: sw a1, 40(sp) 2068; RV32-NEXT: sw a1, 44(sp) 2069; RV32-NEXT: lui a1, 61681 2070; RV32-NEXT: addi a2, a2, 819 2071; RV32-NEXT: sw a2, 32(sp) 2072; RV32-NEXT: sw a2, 36(sp) 2073; RV32-NEXT: lui a2, 4112 2074; RV32-NEXT: addi a1, a1, -241 2075; RV32-NEXT: sw a1, 24(sp) 2076; RV32-NEXT: sw a1, 28(sp) 2077; RV32-NEXT: li a1, 16 2078; RV32-NEXT: addi a2, a2, 257 2079; RV32-NEXT: sw a2, 16(sp) 2080; RV32-NEXT: sw a2, 20(sp) 2081; RV32-NEXT: mv a2, a0 2082; RV32-NEXT: bltu a0, a1, .LBB34_2 2083; RV32-NEXT: # %bb.1: 2084; RV32-NEXT: li a2, 16 2085; RV32-NEXT: .LBB34_2: 2086; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2087; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 2088; RV32-NEXT: li a1, 32 2089; RV32-NEXT: addi a3, sp, 40 2090; RV32-NEXT: addi a4, sp, 32 2091; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2092; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 2093; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2094; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2095; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2096; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 2097; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2098; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 2099; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2100; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 2101; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2102; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2103; RV32-NEXT: vlse64.v v16, (a3), zero 2104; RV32-NEXT: csrr a3, vlenb 2105; RV32-NEXT: li a5, 40 2106; RV32-NEXT: mul a3, a3, a5 2107; RV32-NEXT: add a3, sp, a3 2108; RV32-NEXT: addi a3, a3, 48 2109; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2110; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2111; RV32-NEXT: vnot.v v16, v8, v0.t 2112; RV32-NEXT: csrr a3, vlenb 2113; RV32-NEXT: slli a3, a3, 5 2114; RV32-NEXT: add a3, sp, a3 2115; RV32-NEXT: addi a3, a3, 48 2116; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2117; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2118; RV32-NEXT: vlse64.v v8, (a4), zero 2119; RV32-NEXT: csrr a3, vlenb 2120; RV32-NEXT: li a4, 48 2121; RV32-NEXT: mul a3, a3, a4 2122; RV32-NEXT: add a3, sp, a3 2123; RV32-NEXT: addi a3, a3, 48 2124; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2125; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2126; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 2127; RV32-NEXT: csrr a3, vlenb 2128; RV32-NEXT: li a4, 24 2129; RV32-NEXT: mul a3, a3, a4 2130; RV32-NEXT: add a3, sp, a3 2131; RV32-NEXT: addi a3, a3, 48 2132; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2133; RV32-NEXT: csrr a3, vlenb 2134; RV32-NEXT: li a4, 40 2135; RV32-NEXT: mul a3, a3, a4 2136; RV32-NEXT: add a3, sp, a3 2137; RV32-NEXT: addi a3, a3, 48 2138; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2139; RV32-NEXT: csrr a3, vlenb 2140; RV32-NEXT: li a4, 24 2141; RV32-NEXT: mul a3, a3, a4 2142; RV32-NEXT: add a3, sp, a3 2143; RV32-NEXT: addi a3, a3, 48 2144; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 2145; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2146; RV32-NEXT: csrr a3, vlenb 2147; RV32-NEXT: slli a3, a3, 5 2148; RV32-NEXT: add a3, sp, a3 2149; RV32-NEXT: addi a3, a3, 48 2150; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2151; RV32-NEXT: vsub.vv v16, v16, v8, v0.t 2152; RV32-NEXT: csrr a3, vlenb 2153; RV32-NEXT: slli a3, a3, 5 2154; RV32-NEXT: add a3, sp, a3 2155; RV32-NEXT: addi a3, a3, 48 2156; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2157; RV32-NEXT: csrr a3, vlenb 2158; RV32-NEXT: slli a3, a3, 5 2159; RV32-NEXT: add a3, sp, a3 2160; RV32-NEXT: addi a3, a3, 48 2161; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2162; RV32-NEXT: csrr a3, vlenb 2163; RV32-NEXT: li a4, 48 2164; RV32-NEXT: mul a3, a3, a4 2165; RV32-NEXT: add a3, sp, a3 2166; RV32-NEXT: addi a3, a3, 48 2167; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 2168; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2169; RV32-NEXT: csrr a3, vlenb 2170; RV32-NEXT: li a4, 24 2171; RV32-NEXT: mul a3, a3, a4 2172; RV32-NEXT: add a3, sp, a3 2173; RV32-NEXT: addi a3, a3, 48 2174; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2175; RV32-NEXT: csrr a3, vlenb 2176; RV32-NEXT: slli a3, a3, 5 2177; RV32-NEXT: add a3, sp, a3 2178; RV32-NEXT: addi a3, a3, 48 2179; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2180; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t 2181; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2182; RV32-NEXT: csrr a3, vlenb 2183; RV32-NEXT: li a4, 24 2184; RV32-NEXT: mul a3, a3, a4 2185; RV32-NEXT: add a3, sp, a3 2186; RV32-NEXT: addi a3, a3, 48 2187; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 2188; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2189; RV32-NEXT: csrr a3, vlenb 2190; RV32-NEXT: slli a3, a3, 3 2191; RV32-NEXT: add a3, sp, a3 2192; RV32-NEXT: addi a3, a3, 48 2193; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2194; RV32-NEXT: addi a3, sp, 24 2195; RV32-NEXT: addi a4, sp, 16 2196; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2197; RV32-NEXT: vlse64.v v8, (a3), zero 2198; RV32-NEXT: csrr a3, vlenb 2199; RV32-NEXT: li a5, 24 2200; RV32-NEXT: mul a3, a3, a5 2201; RV32-NEXT: add a3, sp, a3 2202; RV32-NEXT: addi a3, a3, 48 2203; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2204; RV32-NEXT: vlse64.v v8, (a4), zero 2205; RV32-NEXT: csrr a3, vlenb 2206; RV32-NEXT: slli a3, a3, 5 2207; RV32-NEXT: add a3, sp, a3 2208; RV32-NEXT: addi a3, a3, 48 2209; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2210; RV32-NEXT: csrr a3, vlenb 2211; RV32-NEXT: slli a3, a3, 3 2212; RV32-NEXT: add a3, sp, a3 2213; RV32-NEXT: addi a3, a3, 48 2214; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 2215; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2216; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2217; RV32-NEXT: addi a2, sp, 48 2218; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 2219; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 2220; RV32-NEXT: vadd.vv v16, v8, v16, v0.t 2221; RV32-NEXT: csrr a2, vlenb 2222; RV32-NEXT: li a3, 24 2223; RV32-NEXT: mul a2, a2, a3 2224; RV32-NEXT: add a2, sp, a2 2225; RV32-NEXT: addi a2, a2, 48 2226; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 2227; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2228; RV32-NEXT: csrr a2, vlenb 2229; RV32-NEXT: slli a2, a2, 5 2230; RV32-NEXT: add a2, sp, a2 2231; RV32-NEXT: addi a2, a2, 48 2232; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 2233; RV32-NEXT: vmul.vv v8, v16, v8, v0.t 2234; RV32-NEXT: li a2, 56 2235; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t 2236; RV32-NEXT: csrr a3, vlenb 2237; RV32-NEXT: slli a3, a3, 3 2238; RV32-NEXT: add a3, sp, a3 2239; RV32-NEXT: addi a3, a3, 48 2240; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2241; RV32-NEXT: addi a3, a0, -16 2242; RV32-NEXT: sltu a0, a0, a3 2243; RV32-NEXT: addi a0, a0, -1 2244; RV32-NEXT: and a0, a0, a3 2245; RV32-NEXT: vmv1r.v v0, v24 2246; RV32-NEXT: csrr a3, vlenb 2247; RV32-NEXT: slli a3, a3, 4 2248; RV32-NEXT: add a3, sp, a3 2249; RV32-NEXT: addi a3, a3, 48 2250; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 2251; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2252; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 2253; RV32-NEXT: vor.vv v8, v16, v8, v0.t 2254; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 2255; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2256; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2257; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2258; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 2259; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2260; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 2261; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2262; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 2263; RV32-NEXT: vor.vv v8, v8, v16, v0.t 2264; RV32-NEXT: vnot.v v8, v8, v0.t 2265; RV32-NEXT: addi a0, sp, 48 2266; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2267; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t 2268; RV32-NEXT: csrr a0, vlenb 2269; RV32-NEXT: slli a0, a0, 4 2270; RV32-NEXT: add a0, sp, a0 2271; RV32-NEXT: addi a0, a0, 48 2272; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2273; RV32-NEXT: csrr a0, vlenb 2274; RV32-NEXT: li a1, 40 2275; RV32-NEXT: mul a0, a0, a1 2276; RV32-NEXT: add a0, sp, a0 2277; RV32-NEXT: addi a0, a0, 48 2278; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2279; RV32-NEXT: csrr a0, vlenb 2280; RV32-NEXT: slli a0, a0, 4 2281; RV32-NEXT: add a0, sp, a0 2282; RV32-NEXT: addi a0, a0, 48 2283; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2284; RV32-NEXT: vand.vv v16, v8, v16, v0.t 2285; RV32-NEXT: addi a0, sp, 48 2286; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2287; RV32-NEXT: vsub.vv v8, v8, v16, v0.t 2288; RV32-NEXT: csrr a0, vlenb 2289; RV32-NEXT: li a1, 40 2290; RV32-NEXT: mul a0, a0, a1 2291; RV32-NEXT: add a0, sp, a0 2292; RV32-NEXT: addi a0, a0, 48 2293; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2294; RV32-NEXT: csrr a0, vlenb 2295; RV32-NEXT: li a1, 48 2296; RV32-NEXT: mul a0, a0, a1 2297; RV32-NEXT: add a0, sp, a0 2298; RV32-NEXT: addi a0, a0, 48 2299; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2300; RV32-NEXT: csrr a0, vlenb 2301; RV32-NEXT: li a1, 40 2302; RV32-NEXT: mul a0, a0, a1 2303; RV32-NEXT: add a0, sp, a0 2304; RV32-NEXT: addi a0, a0, 48 2305; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2306; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2307; RV32-NEXT: csrr a0, vlenb 2308; RV32-NEXT: slli a0, a0, 4 2309; RV32-NEXT: add a0, sp, a0 2310; RV32-NEXT: addi a0, a0, 48 2311; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 2312; RV32-NEXT: csrr a0, vlenb 2313; RV32-NEXT: li a1, 40 2314; RV32-NEXT: mul a0, a0, a1 2315; RV32-NEXT: add a0, sp, a0 2316; RV32-NEXT: addi a0, a0, 48 2317; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2318; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 2319; RV32-NEXT: csrr a0, vlenb 2320; RV32-NEXT: li a1, 48 2321; RV32-NEXT: mul a0, a0, a1 2322; RV32-NEXT: add a0, sp, a0 2323; RV32-NEXT: addi a0, a0, 48 2324; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2325; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2326; RV32-NEXT: csrr a0, vlenb 2327; RV32-NEXT: slli a0, a0, 4 2328; RV32-NEXT: add a0, sp, a0 2329; RV32-NEXT: addi a0, a0, 48 2330; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2331; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 2332; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2333; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2334; RV32-NEXT: csrr a0, vlenb 2335; RV32-NEXT: li a1, 24 2336; RV32-NEXT: mul a0, a0, a1 2337; RV32-NEXT: add a0, sp, a0 2338; RV32-NEXT: addi a0, a0, 48 2339; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2340; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2341; RV32-NEXT: csrr a0, vlenb 2342; RV32-NEXT: slli a0, a0, 5 2343; RV32-NEXT: add a0, sp, a0 2344; RV32-NEXT: addi a0, a0, 48 2345; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2346; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 2347; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 2348; RV32-NEXT: csrr a0, vlenb 2349; RV32-NEXT: slli a0, a0, 3 2350; RV32-NEXT: add a0, sp, a0 2351; RV32-NEXT: addi a0, a0, 48 2352; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2353; RV32-NEXT: csrr a0, vlenb 2354; RV32-NEXT: li a1, 56 2355; RV32-NEXT: mul a0, a0, a1 2356; RV32-NEXT: add sp, sp, a0 2357; RV32-NEXT: .cfi_def_cfa sp, 48 2358; RV32-NEXT: addi sp, sp, 48 2359; RV32-NEXT: .cfi_def_cfa_offset 0 2360; RV32-NEXT: ret 2361; 2362; RV64-LABEL: vp_ctlz_v32i64: 2363; RV64: # %bb.0: 2364; RV64-NEXT: addi sp, sp, -16 2365; RV64-NEXT: .cfi_def_cfa_offset 16 2366; RV64-NEXT: csrr a1, vlenb 2367; RV64-NEXT: slli a1, a1, 4 2368; RV64-NEXT: sub sp, sp, a1 2369; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2370; RV64-NEXT: csrr a1, vlenb 2371; RV64-NEXT: slli a1, a1, 3 2372; RV64-NEXT: add a1, sp, a1 2373; RV64-NEXT: addi a1, a1, 16 2374; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2375; RV64-NEXT: li a2, 16 2376; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 2377; RV64-NEXT: vslidedown.vi v24, v0, 2 2378; RV64-NEXT: mv a1, a0 2379; RV64-NEXT: bltu a0, a2, .LBB34_2 2380; RV64-NEXT: # %bb.1: 2381; RV64-NEXT: li a1, 16 2382; RV64-NEXT: .LBB34_2: 2383; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2384; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2385; RV64-NEXT: li a1, 32 2386; RV64-NEXT: lui a2, 349525 2387; RV64-NEXT: lui a3, 209715 2388; RV64-NEXT: lui a4, 61681 2389; RV64-NEXT: lui a5, 4112 2390; RV64-NEXT: addiw a2, a2, 1365 2391; RV64-NEXT: addiw a3, a3, 819 2392; RV64-NEXT: addiw a6, a4, -241 2393; RV64-NEXT: addiw a7, a5, 257 2394; RV64-NEXT: slli a5, a2, 32 2395; RV64-NEXT: add a5, a2, a5 2396; RV64-NEXT: slli a4, a3, 32 2397; RV64-NEXT: add a4, a3, a4 2398; RV64-NEXT: slli a2, a6, 32 2399; RV64-NEXT: add a2, a6, a2 2400; RV64-NEXT: slli a3, a7, 32 2401; RV64-NEXT: add a3, a7, a3 2402; RV64-NEXT: addi a6, a0, -16 2403; RV64-NEXT: sltu a0, a0, a6 2404; RV64-NEXT: addi a0, a0, -1 2405; RV64-NEXT: and a6, a0, a6 2406; RV64-NEXT: li a0, 56 2407; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2408; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 2409; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2410; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2411; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2412; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 2413; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2414; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 2415; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2416; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t 2417; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2418; RV64-NEXT: vnot.v v8, v8, v0.t 2419; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2420; RV64-NEXT: vand.vx v16, v16, a5, v0.t 2421; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2422; RV64-NEXT: vand.vx v16, v8, a4, v0.t 2423; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2424; RV64-NEXT: vand.vx v8, v8, a4, v0.t 2425; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2426; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2427; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2428; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2429; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2430; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 2431; RV64-NEXT: addi a7, sp, 16 2432; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill 2433; RV64-NEXT: vmv1r.v v0, v24 2434; RV64-NEXT: csrr a7, vlenb 2435; RV64-NEXT: slli a7, a7, 3 2436; RV64-NEXT: add a7, sp, a7 2437; RV64-NEXT: addi a7, a7, 16 2438; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload 2439; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2440; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2441; RV64-NEXT: vor.vv v16, v8, v16, v0.t 2442; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t 2443; RV64-NEXT: vor.vv v8, v16, v8, v0.t 2444; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2445; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2446; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 2447; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2448; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 2449; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2450; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t 2451; RV64-NEXT: vor.vv v8, v8, v16, v0.t 2452; RV64-NEXT: vnot.v v8, v8, v0.t 2453; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2454; RV64-NEXT: vand.vx v16, v16, a5, v0.t 2455; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2456; RV64-NEXT: vand.vx v16, v8, a4, v0.t 2457; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2458; RV64-NEXT: vand.vx v8, v8, a4, v0.t 2459; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2460; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2461; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2462; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2463; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2464; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t 2465; RV64-NEXT: addi a0, sp, 16 2466; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2467; RV64-NEXT: csrr a0, vlenb 2468; RV64-NEXT: slli a0, a0, 4 2469; RV64-NEXT: add sp, sp, a0 2470; RV64-NEXT: .cfi_def_cfa sp, 16 2471; RV64-NEXT: addi sp, sp, 16 2472; RV64-NEXT: .cfi_def_cfa_offset 0 2473; RV64-NEXT: ret 2474 %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) 2475 ret <32 x i64> %v 2476} 2477 2478define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { 2479; RV32-LABEL: vp_ctlz_v32i64_unmasked: 2480; RV32: # %bb.0: 2481; RV32-NEXT: addi sp, sp, -48 2482; RV32-NEXT: .cfi_def_cfa_offset 48 2483; RV32-NEXT: csrr a1, vlenb 2484; RV32-NEXT: slli a1, a1, 4 2485; RV32-NEXT: sub sp, sp, a1 2486; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 2487; RV32-NEXT: lui a1, 349525 2488; RV32-NEXT: lui a2, 209715 2489; RV32-NEXT: addi a1, a1, 1365 2490; RV32-NEXT: sw a1, 40(sp) 2491; RV32-NEXT: sw a1, 44(sp) 2492; RV32-NEXT: lui a1, 61681 2493; RV32-NEXT: addi a2, a2, 819 2494; RV32-NEXT: sw a2, 32(sp) 2495; RV32-NEXT: sw a2, 36(sp) 2496; RV32-NEXT: lui a2, 4112 2497; RV32-NEXT: addi a1, a1, -241 2498; RV32-NEXT: sw a1, 24(sp) 2499; RV32-NEXT: sw a1, 28(sp) 2500; RV32-NEXT: li a3, 16 2501; RV32-NEXT: addi a1, a2, 257 2502; RV32-NEXT: sw a1, 16(sp) 2503; RV32-NEXT: sw a1, 20(sp) 2504; RV32-NEXT: mv a1, a0 2505; RV32-NEXT: bltu a0, a3, .LBB35_2 2506; RV32-NEXT: # %bb.1: 2507; RV32-NEXT: li a1, 16 2508; RV32-NEXT: .LBB35_2: 2509; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2510; RV32-NEXT: vsrl.vi v0, v8, 1 2511; RV32-NEXT: li a2, 32 2512; RV32-NEXT: addi a3, sp, 40 2513; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2514; RV32-NEXT: vlse64.v v24, (a3), zero 2515; RV32-NEXT: csrr a3, vlenb 2516; RV32-NEXT: slli a3, a3, 3 2517; RV32-NEXT: add a3, sp, a3 2518; RV32-NEXT: addi a3, a3, 48 2519; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 2520; RV32-NEXT: addi a3, a0, -16 2521; RV32-NEXT: sltu a0, a0, a3 2522; RV32-NEXT: addi a0, a0, -1 2523; RV32-NEXT: and a0, a0, a3 2524; RV32-NEXT: addi a3, sp, 32 2525; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2526; RV32-NEXT: vor.vv v8, v8, v0 2527; RV32-NEXT: vsrl.vi v0, v8, 2 2528; RV32-NEXT: vor.vv v8, v8, v0 2529; RV32-NEXT: vsrl.vi v0, v8, 4 2530; RV32-NEXT: vor.vv v8, v8, v0 2531; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2532; RV32-NEXT: vsrl.vi v0, v16, 1 2533; RV32-NEXT: vor.vv v16, v16, v0 2534; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2535; RV32-NEXT: vsrl.vi v0, v8, 8 2536; RV32-NEXT: vor.vv v8, v8, v0 2537; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2538; RV32-NEXT: vsrl.vi v0, v16, 2 2539; RV32-NEXT: vor.vv v16, v16, v0 2540; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2541; RV32-NEXT: vsrl.vi v0, v8, 16 2542; RV32-NEXT: vor.vv v8, v8, v0 2543; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2544; RV32-NEXT: vsrl.vi v0, v16, 4 2545; RV32-NEXT: vor.vv v16, v16, v0 2546; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2547; RV32-NEXT: vsrl.vx v0, v8, a2 2548; RV32-NEXT: vor.vv v8, v8, v0 2549; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2550; RV32-NEXT: vsrl.vi v0, v16, 8 2551; RV32-NEXT: vor.vv v16, v16, v0 2552; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2553; RV32-NEXT: vnot.v v0, v8 2554; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2555; RV32-NEXT: vsrl.vi v8, v16, 16 2556; RV32-NEXT: vor.vv v16, v16, v8 2557; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2558; RV32-NEXT: vsrl.vi v8, v0, 1 2559; RV32-NEXT: csrr a4, vlenb 2560; RV32-NEXT: slli a4, a4, 3 2561; RV32-NEXT: add a4, sp, a4 2562; RV32-NEXT: addi a4, a4, 48 2563; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 2564; RV32-NEXT: vand.vv v24, v8, v24 2565; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2566; RV32-NEXT: vlse64.v v8, (a3), zero 2567; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2568; RV32-NEXT: vsub.vv v24, v0, v24 2569; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2570; RV32-NEXT: vsrl.vx v0, v16, a2 2571; RV32-NEXT: vor.vv v16, v16, v0 2572; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2573; RV32-NEXT: vand.vv v0, v24, v8 2574; RV32-NEXT: vsrl.vi v24, v24, 2 2575; RV32-NEXT: vand.vv v24, v24, v8 2576; RV32-NEXT: vadd.vv v24, v0, v24 2577; RV32-NEXT: addi a2, sp, 48 2578; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 2579; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2580; RV32-NEXT: vnot.v v16, v16 2581; RV32-NEXT: vsrl.vi v0, v16, 1 2582; RV32-NEXT: csrr a2, vlenb 2583; RV32-NEXT: slli a2, a2, 3 2584; RV32-NEXT: add a2, sp, a2 2585; RV32-NEXT: addi a2, a2, 48 2586; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 2587; RV32-NEXT: vand.vv v0, v0, v24 2588; RV32-NEXT: addi a2, sp, 24 2589; RV32-NEXT: addi a3, sp, 16 2590; RV32-NEXT: vsub.vv v0, v16, v0 2591; RV32-NEXT: addi a4, sp, 48 2592; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 2593; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2594; RV32-NEXT: vsrl.vi v16, v24, 4 2595; RV32-NEXT: vadd.vv v16, v24, v16 2596; RV32-NEXT: csrr a4, vlenb 2597; RV32-NEXT: slli a4, a4, 3 2598; RV32-NEXT: add a4, sp, a4 2599; RV32-NEXT: addi a4, a4, 48 2600; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 2601; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2602; RV32-NEXT: vand.vv v24, v0, v8 2603; RV32-NEXT: vsrl.vi v0, v0, 2 2604; RV32-NEXT: vand.vv v8, v0, v8 2605; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2606; RV32-NEXT: vlse64.v v0, (a2), zero 2607; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2608; RV32-NEXT: vadd.vv v8, v24, v8 2609; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 2610; RV32-NEXT: vlse64.v v24, (a3), zero 2611; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2612; RV32-NEXT: vsrl.vi v16, v8, 4 2613; RV32-NEXT: vadd.vv v8, v8, v16 2614; RV32-NEXT: csrr a2, vlenb 2615; RV32-NEXT: slli a2, a2, 3 2616; RV32-NEXT: add a2, sp, a2 2617; RV32-NEXT: addi a2, a2, 48 2618; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 2619; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2620; RV32-NEXT: vand.vv v16, v16, v0 2621; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2622; RV32-NEXT: vand.vv v8, v8, v0 2623; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2624; RV32-NEXT: vmul.vv v16, v16, v24 2625; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2626; RV32-NEXT: vmul.vv v24, v8, v24 2627; RV32-NEXT: li a2, 56 2628; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2629; RV32-NEXT: vsrl.vx v8, v16, a2 2630; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2631; RV32-NEXT: vsrl.vx v16, v24, a2 2632; RV32-NEXT: csrr a0, vlenb 2633; RV32-NEXT: slli a0, a0, 4 2634; RV32-NEXT: add sp, sp, a0 2635; RV32-NEXT: .cfi_def_cfa sp, 48 2636; RV32-NEXT: addi sp, sp, 48 2637; RV32-NEXT: .cfi_def_cfa_offset 0 2638; RV32-NEXT: ret 2639; 2640; RV64-LABEL: vp_ctlz_v32i64_unmasked: 2641; RV64: # %bb.0: 2642; RV64-NEXT: li a2, 16 2643; RV64-NEXT: mv a1, a0 2644; RV64-NEXT: bltu a0, a2, .LBB35_2 2645; RV64-NEXT: # %bb.1: 2646; RV64-NEXT: li a1, 16 2647; RV64-NEXT: .LBB35_2: 2648; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2649; RV64-NEXT: vsrl.vi v24, v8, 1 2650; RV64-NEXT: li a2, 32 2651; RV64-NEXT: lui a3, 349525 2652; RV64-NEXT: lui a4, 209715 2653; RV64-NEXT: lui a5, 61681 2654; RV64-NEXT: lui a6, 4112 2655; RV64-NEXT: addiw a7, a3, 1365 2656; RV64-NEXT: addiw a3, a4, 819 2657; RV64-NEXT: addiw a4, a5, -241 2658; RV64-NEXT: addiw a6, a6, 257 2659; RV64-NEXT: slli a5, a7, 32 2660; RV64-NEXT: add a7, a7, a5 2661; RV64-NEXT: slli a5, a3, 32 2662; RV64-NEXT: add a5, a3, a5 2663; RV64-NEXT: slli a3, a4, 32 2664; RV64-NEXT: add a3, a4, a3 2665; RV64-NEXT: slli a4, a6, 32 2666; RV64-NEXT: add a4, a6, a4 2667; RV64-NEXT: addi a6, a0, -16 2668; RV64-NEXT: sltu a0, a0, a6 2669; RV64-NEXT: addi a0, a0, -1 2670; RV64-NEXT: and a6, a0, a6 2671; RV64-NEXT: li a0, 56 2672; RV64-NEXT: vor.vv v8, v8, v24 2673; RV64-NEXT: vsrl.vi v24, v8, 2 2674; RV64-NEXT: vor.vv v8, v8, v24 2675; RV64-NEXT: vsrl.vi v24, v8, 4 2676; RV64-NEXT: vor.vv v8, v8, v24 2677; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2678; RV64-NEXT: vsrl.vi v24, v16, 1 2679; RV64-NEXT: vor.vv v16, v16, v24 2680; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2681; RV64-NEXT: vsrl.vi v24, v8, 8 2682; RV64-NEXT: vor.vv v8, v8, v24 2683; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2684; RV64-NEXT: vsrl.vi v24, v16, 2 2685; RV64-NEXT: vor.vv v16, v16, v24 2686; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2687; RV64-NEXT: vsrl.vi v24, v8, 16 2688; RV64-NEXT: vor.vv v8, v8, v24 2689; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2690; RV64-NEXT: vsrl.vi v24, v16, 4 2691; RV64-NEXT: vor.vv v16, v16, v24 2692; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2693; RV64-NEXT: vsrl.vx v24, v8, a2 2694; RV64-NEXT: vor.vv v8, v8, v24 2695; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2696; RV64-NEXT: vsrl.vi v24, v16, 8 2697; RV64-NEXT: vor.vv v16, v16, v24 2698; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2699; RV64-NEXT: vnot.v v8, v8 2700; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2701; RV64-NEXT: vsrl.vi v24, v16, 16 2702; RV64-NEXT: vor.vv v16, v16, v24 2703; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2704; RV64-NEXT: vsrl.vi v24, v8, 1 2705; RV64-NEXT: vand.vx v24, v24, a7 2706; RV64-NEXT: vsub.vv v8, v8, v24 2707; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2708; RV64-NEXT: vsrl.vx v24, v16, a2 2709; RV64-NEXT: vor.vv v16, v16, v24 2710; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2711; RV64-NEXT: vand.vx v24, v8, a5 2712; RV64-NEXT: vsrl.vi v8, v8, 2 2713; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2714; RV64-NEXT: vnot.v v16, v16 2715; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2716; RV64-NEXT: vand.vx v8, v8, a5 2717; RV64-NEXT: vadd.vv v8, v24, v8 2718; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2719; RV64-NEXT: vsrl.vi v24, v16, 1 2720; RV64-NEXT: vand.vx v24, v24, a7 2721; RV64-NEXT: vsub.vv v16, v16, v24 2722; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2723; RV64-NEXT: vsrl.vi v24, v8, 4 2724; RV64-NEXT: vadd.vv v8, v8, v24 2725; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2726; RV64-NEXT: vand.vx v24, v16, a5 2727; RV64-NEXT: vsrl.vi v16, v16, 2 2728; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2729; RV64-NEXT: vand.vx v8, v8, a3 2730; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2731; RV64-NEXT: vand.vx v16, v16, a5 2732; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2733; RV64-NEXT: vmul.vx v8, v8, a4 2734; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2735; RV64-NEXT: vadd.vv v16, v24, v16 2736; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2737; RV64-NEXT: vsrl.vx v8, v8, a0 2738; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 2739; RV64-NEXT: vsrl.vi v24, v16, 4 2740; RV64-NEXT: vadd.vv v16, v16, v24 2741; RV64-NEXT: vand.vx v16, v16, a3 2742; RV64-NEXT: vmul.vx v16, v16, a4 2743; RV64-NEXT: vsrl.vx v16, v16, a0 2744; RV64-NEXT: ret 2745 %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) 2746 ret <32 x i64> %v 2747} 2748 2749define <2 x i8> @vp_ctlz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { 2750; CHECK-LABEL: vp_ctlz_zero_undef_v2i8: 2751; CHECK: # %bb.0: 2752; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2753; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2754; CHECK-NEXT: li a0, 85 2755; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2756; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 2757; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2758; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2759; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2760; CHECK-NEXT: vnot.v v8, v8, v0.t 2761; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2762; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2763; CHECK-NEXT: li a0, 51 2764; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2765; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2766; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2767; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2768; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2769; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2770; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2771; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2772; CHECK-NEXT: ret 2773 %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 true, <2 x i1> %m, i32 %evl) 2774 ret <2 x i8> %v 2775} 2776 2777define <2 x i8> @vp_ctlz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { 2778; CHECK-LABEL: vp_ctlz_zero_undef_v2i8_unmasked: 2779; CHECK: # %bb.0: 2780; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 2781; CHECK-NEXT: vsrl.vi v9, v8, 1 2782; CHECK-NEXT: li a0, 85 2783; CHECK-NEXT: vor.vv v8, v8, v9 2784; CHECK-NEXT: vsrl.vi v9, v8, 2 2785; CHECK-NEXT: vor.vv v8, v8, v9 2786; CHECK-NEXT: vsrl.vi v9, v8, 4 2787; CHECK-NEXT: vor.vv v8, v8, v9 2788; CHECK-NEXT: vnot.v v8, v8 2789; CHECK-NEXT: vsrl.vi v9, v8, 1 2790; CHECK-NEXT: vand.vx v9, v9, a0 2791; CHECK-NEXT: li a0, 51 2792; CHECK-NEXT: vsub.vv v8, v8, v9 2793; CHECK-NEXT: vand.vx v9, v8, a0 2794; CHECK-NEXT: vsrl.vi v8, v8, 2 2795; CHECK-NEXT: vand.vx v8, v8, a0 2796; CHECK-NEXT: vadd.vv v8, v9, v8 2797; CHECK-NEXT: vsrl.vi v9, v8, 4 2798; CHECK-NEXT: vadd.vv v8, v8, v9 2799; CHECK-NEXT: vand.vi v8, v8, 15 2800; CHECK-NEXT: ret 2801 %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 2802 ret <2 x i8> %v 2803} 2804 2805define <4 x i8> @vp_ctlz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { 2806; CHECK-LABEL: vp_ctlz_zero_undef_v4i8: 2807; CHECK: # %bb.0: 2808; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2809; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2810; CHECK-NEXT: li a0, 85 2811; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2812; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 2813; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2814; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2815; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2816; CHECK-NEXT: vnot.v v8, v8, v0.t 2817; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2818; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2819; CHECK-NEXT: li a0, 51 2820; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2821; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2822; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2823; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2824; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2825; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2826; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2827; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2828; CHECK-NEXT: ret 2829 %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 true, <4 x i1> %m, i32 %evl) 2830 ret <4 x i8> %v 2831} 2832 2833define <4 x i8> @vp_ctlz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { 2834; CHECK-LABEL: vp_ctlz_zero_undef_v4i8_unmasked: 2835; CHECK: # %bb.0: 2836; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 2837; CHECK-NEXT: vsrl.vi v9, v8, 1 2838; CHECK-NEXT: li a0, 85 2839; CHECK-NEXT: vor.vv v8, v8, v9 2840; CHECK-NEXT: vsrl.vi v9, v8, 2 2841; CHECK-NEXT: vor.vv v8, v8, v9 2842; CHECK-NEXT: vsrl.vi v9, v8, 4 2843; CHECK-NEXT: vor.vv v8, v8, v9 2844; CHECK-NEXT: vnot.v v8, v8 2845; CHECK-NEXT: vsrl.vi v9, v8, 1 2846; CHECK-NEXT: vand.vx v9, v9, a0 2847; CHECK-NEXT: li a0, 51 2848; CHECK-NEXT: vsub.vv v8, v8, v9 2849; CHECK-NEXT: vand.vx v9, v8, a0 2850; CHECK-NEXT: vsrl.vi v8, v8, 2 2851; CHECK-NEXT: vand.vx v8, v8, a0 2852; CHECK-NEXT: vadd.vv v8, v9, v8 2853; CHECK-NEXT: vsrl.vi v9, v8, 4 2854; CHECK-NEXT: vadd.vv v8, v8, v9 2855; CHECK-NEXT: vand.vi v8, v8, 15 2856; CHECK-NEXT: ret 2857 %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 2858 ret <4 x i8> %v 2859} 2860 2861define <8 x i8> @vp_ctlz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { 2862; CHECK-LABEL: vp_ctlz_zero_undef_v8i8: 2863; CHECK: # %bb.0: 2864; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2865; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2866; CHECK-NEXT: li a0, 85 2867; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2868; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 2869; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2870; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2871; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2872; CHECK-NEXT: vnot.v v8, v8, v0.t 2873; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2874; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2875; CHECK-NEXT: li a0, 51 2876; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2877; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2878; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2879; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2880; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2881; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2882; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2883; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2884; CHECK-NEXT: ret 2885 %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 true, <8 x i1> %m, i32 %evl) 2886 ret <8 x i8> %v 2887} 2888 2889define <8 x i8> @vp_ctlz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { 2890; CHECK-LABEL: vp_ctlz_zero_undef_v8i8_unmasked: 2891; CHECK: # %bb.0: 2892; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 2893; CHECK-NEXT: vsrl.vi v9, v8, 1 2894; CHECK-NEXT: li a0, 85 2895; CHECK-NEXT: vor.vv v8, v8, v9 2896; CHECK-NEXT: vsrl.vi v9, v8, 2 2897; CHECK-NEXT: vor.vv v8, v8, v9 2898; CHECK-NEXT: vsrl.vi v9, v8, 4 2899; CHECK-NEXT: vor.vv v8, v8, v9 2900; CHECK-NEXT: vnot.v v8, v8 2901; CHECK-NEXT: vsrl.vi v9, v8, 1 2902; CHECK-NEXT: vand.vx v9, v9, a0 2903; CHECK-NEXT: li a0, 51 2904; CHECK-NEXT: vsub.vv v8, v8, v9 2905; CHECK-NEXT: vand.vx v9, v8, a0 2906; CHECK-NEXT: vsrl.vi v8, v8, 2 2907; CHECK-NEXT: vand.vx v8, v8, a0 2908; CHECK-NEXT: vadd.vv v8, v9, v8 2909; CHECK-NEXT: vsrl.vi v9, v8, 4 2910; CHECK-NEXT: vadd.vv v8, v8, v9 2911; CHECK-NEXT: vand.vi v8, v8, 15 2912; CHECK-NEXT: ret 2913 %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 2914 ret <8 x i8> %v 2915} 2916 2917define <16 x i8> @vp_ctlz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { 2918; CHECK-LABEL: vp_ctlz_zero_undef_v16i8: 2919; CHECK: # %bb.0: 2920; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 2921; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2922; CHECK-NEXT: li a0, 85 2923; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2924; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 2925; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2926; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2927; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2928; CHECK-NEXT: vnot.v v8, v8, v0.t 2929; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2930; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2931; CHECK-NEXT: li a0, 51 2932; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2933; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2934; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2935; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2936; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2937; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2938; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2939; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 2940; CHECK-NEXT: ret 2941 %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 true, <16 x i1> %m, i32 %evl) 2942 ret <16 x i8> %v 2943} 2944 2945define <16 x i8> @vp_ctlz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { 2946; CHECK-LABEL: vp_ctlz_zero_undef_v16i8_unmasked: 2947; CHECK: # %bb.0: 2948; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 2949; CHECK-NEXT: vsrl.vi v9, v8, 1 2950; CHECK-NEXT: li a0, 85 2951; CHECK-NEXT: vor.vv v8, v8, v9 2952; CHECK-NEXT: vsrl.vi v9, v8, 2 2953; CHECK-NEXT: vor.vv v8, v8, v9 2954; CHECK-NEXT: vsrl.vi v9, v8, 4 2955; CHECK-NEXT: vor.vv v8, v8, v9 2956; CHECK-NEXT: vnot.v v8, v8 2957; CHECK-NEXT: vsrl.vi v9, v8, 1 2958; CHECK-NEXT: vand.vx v9, v9, a0 2959; CHECK-NEXT: li a0, 51 2960; CHECK-NEXT: vsub.vv v8, v8, v9 2961; CHECK-NEXT: vand.vx v9, v8, a0 2962; CHECK-NEXT: vsrl.vi v8, v8, 2 2963; CHECK-NEXT: vand.vx v8, v8, a0 2964; CHECK-NEXT: vadd.vv v8, v9, v8 2965; CHECK-NEXT: vsrl.vi v9, v8, 4 2966; CHECK-NEXT: vadd.vv v8, v8, v9 2967; CHECK-NEXT: vand.vi v8, v8, 15 2968; CHECK-NEXT: ret 2969 %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 2970 ret <16 x i8> %v 2971} 2972 2973define <2 x i16> @vp_ctlz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { 2974; CHECK-LABEL: vp_ctlz_zero_undef_v2i16: 2975; CHECK: # %bb.0: 2976; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 2977; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2978; CHECK-NEXT: lui a0, 5 2979; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2980; CHECK-NEXT: addi a0, a0, 1365 2981; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 2982; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2983; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2984; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2985; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 2986; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 2987; CHECK-NEXT: vnot.v v8, v8, v0.t 2988; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2989; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2990; CHECK-NEXT: lui a0, 3 2991; CHECK-NEXT: addi a0, a0, 819 2992; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2993; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2994; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2995; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2996; CHECK-NEXT: lui a0, 1 2997; CHECK-NEXT: addi a0, a0, -241 2998; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2999; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3000; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 3001; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3002; CHECK-NEXT: li a0, 257 3003; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3004; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 3005; CHECK-NEXT: ret 3006 %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) 3007 ret <2 x i16> %v 3008} 3009 3010define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { 3011; CHECK-LABEL: vp_ctlz_zero_undef_v2i16_unmasked: 3012; CHECK: # %bb.0: 3013; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 3014; CHECK-NEXT: vsrl.vi v9, v8, 1 3015; CHECK-NEXT: lui a0, 5 3016; CHECK-NEXT: vor.vv v8, v8, v9 3017; CHECK-NEXT: addi a0, a0, 1365 3018; CHECK-NEXT: vsrl.vi v9, v8, 2 3019; CHECK-NEXT: vor.vv v8, v8, v9 3020; CHECK-NEXT: vsrl.vi v9, v8, 4 3021; CHECK-NEXT: vor.vv v8, v8, v9 3022; CHECK-NEXT: vsrl.vi v9, v8, 8 3023; CHECK-NEXT: vor.vv v8, v8, v9 3024; CHECK-NEXT: vnot.v v8, v8 3025; CHECK-NEXT: vsrl.vi v9, v8, 1 3026; CHECK-NEXT: vand.vx v9, v9, a0 3027; CHECK-NEXT: lui a0, 3 3028; CHECK-NEXT: addi a0, a0, 819 3029; CHECK-NEXT: vsub.vv v8, v8, v9 3030; CHECK-NEXT: vand.vx v9, v8, a0 3031; CHECK-NEXT: vsrl.vi v8, v8, 2 3032; CHECK-NEXT: vand.vx v8, v8, a0 3033; CHECK-NEXT: lui a0, 1 3034; CHECK-NEXT: addi a0, a0, -241 3035; CHECK-NEXT: vadd.vv v8, v9, v8 3036; CHECK-NEXT: vsrl.vi v9, v8, 4 3037; CHECK-NEXT: vadd.vv v8, v8, v9 3038; CHECK-NEXT: vand.vx v8, v8, a0 3039; CHECK-NEXT: li a0, 257 3040; CHECK-NEXT: vmul.vx v8, v8, a0 3041; CHECK-NEXT: vsrl.vi v8, v8, 8 3042; CHECK-NEXT: ret 3043 %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 3044 ret <2 x i16> %v 3045} 3046 3047define <4 x i16> @vp_ctlz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { 3048; CHECK-LABEL: vp_ctlz_zero_undef_v4i16: 3049; CHECK: # %bb.0: 3050; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 3051; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3052; CHECK-NEXT: lui a0, 5 3053; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3054; CHECK-NEXT: addi a0, a0, 1365 3055; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 3056; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3057; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3058; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3059; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 3060; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3061; CHECK-NEXT: vnot.v v8, v8, v0.t 3062; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3063; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3064; CHECK-NEXT: lui a0, 3 3065; CHECK-NEXT: addi a0, a0, 819 3066; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 3067; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 3068; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3069; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3070; CHECK-NEXT: lui a0, 1 3071; CHECK-NEXT: addi a0, a0, -241 3072; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 3073; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3074; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 3075; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3076; CHECK-NEXT: li a0, 257 3077; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3078; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 3079; CHECK-NEXT: ret 3080 %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) 3081 ret <4 x i16> %v 3082} 3083 3084define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { 3085; CHECK-LABEL: vp_ctlz_zero_undef_v4i16_unmasked: 3086; CHECK: # %bb.0: 3087; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 3088; CHECK-NEXT: vsrl.vi v9, v8, 1 3089; CHECK-NEXT: lui a0, 5 3090; CHECK-NEXT: vor.vv v8, v8, v9 3091; CHECK-NEXT: addi a0, a0, 1365 3092; CHECK-NEXT: vsrl.vi v9, v8, 2 3093; CHECK-NEXT: vor.vv v8, v8, v9 3094; CHECK-NEXT: vsrl.vi v9, v8, 4 3095; CHECK-NEXT: vor.vv v8, v8, v9 3096; CHECK-NEXT: vsrl.vi v9, v8, 8 3097; CHECK-NEXT: vor.vv v8, v8, v9 3098; CHECK-NEXT: vnot.v v8, v8 3099; CHECK-NEXT: vsrl.vi v9, v8, 1 3100; CHECK-NEXT: vand.vx v9, v9, a0 3101; CHECK-NEXT: lui a0, 3 3102; CHECK-NEXT: addi a0, a0, 819 3103; CHECK-NEXT: vsub.vv v8, v8, v9 3104; CHECK-NEXT: vand.vx v9, v8, a0 3105; CHECK-NEXT: vsrl.vi v8, v8, 2 3106; CHECK-NEXT: vand.vx v8, v8, a0 3107; CHECK-NEXT: lui a0, 1 3108; CHECK-NEXT: addi a0, a0, -241 3109; CHECK-NEXT: vadd.vv v8, v9, v8 3110; CHECK-NEXT: vsrl.vi v9, v8, 4 3111; CHECK-NEXT: vadd.vv v8, v8, v9 3112; CHECK-NEXT: vand.vx v8, v8, a0 3113; CHECK-NEXT: li a0, 257 3114; CHECK-NEXT: vmul.vx v8, v8, a0 3115; CHECK-NEXT: vsrl.vi v8, v8, 8 3116; CHECK-NEXT: ret 3117 %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 3118 ret <4 x i16> %v 3119} 3120 3121define <8 x i16> @vp_ctlz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { 3122; CHECK-LABEL: vp_ctlz_zero_undef_v8i16: 3123; CHECK: # %bb.0: 3124; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 3125; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3126; CHECK-NEXT: lui a0, 5 3127; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3128; CHECK-NEXT: addi a0, a0, 1365 3129; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 3130; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3131; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3132; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3133; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 3134; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3135; CHECK-NEXT: vnot.v v8, v8, v0.t 3136; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3137; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3138; CHECK-NEXT: lui a0, 3 3139; CHECK-NEXT: addi a0, a0, 819 3140; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 3141; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 3142; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3143; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3144; CHECK-NEXT: lui a0, 1 3145; CHECK-NEXT: addi a0, a0, -241 3146; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 3147; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3148; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 3149; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3150; CHECK-NEXT: li a0, 257 3151; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3152; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 3153; CHECK-NEXT: ret 3154 %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) 3155 ret <8 x i16> %v 3156} 3157 3158define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { 3159; CHECK-LABEL: vp_ctlz_zero_undef_v8i16_unmasked: 3160; CHECK: # %bb.0: 3161; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 3162; CHECK-NEXT: vsrl.vi v9, v8, 1 3163; CHECK-NEXT: lui a0, 5 3164; CHECK-NEXT: vor.vv v8, v8, v9 3165; CHECK-NEXT: addi a0, a0, 1365 3166; CHECK-NEXT: vsrl.vi v9, v8, 2 3167; CHECK-NEXT: vor.vv v8, v8, v9 3168; CHECK-NEXT: vsrl.vi v9, v8, 4 3169; CHECK-NEXT: vor.vv v8, v8, v9 3170; CHECK-NEXT: vsrl.vi v9, v8, 8 3171; CHECK-NEXT: vor.vv v8, v8, v9 3172; CHECK-NEXT: vnot.v v8, v8 3173; CHECK-NEXT: vsrl.vi v9, v8, 1 3174; CHECK-NEXT: vand.vx v9, v9, a0 3175; CHECK-NEXT: lui a0, 3 3176; CHECK-NEXT: addi a0, a0, 819 3177; CHECK-NEXT: vsub.vv v8, v8, v9 3178; CHECK-NEXT: vand.vx v9, v8, a0 3179; CHECK-NEXT: vsrl.vi v8, v8, 2 3180; CHECK-NEXT: vand.vx v8, v8, a0 3181; CHECK-NEXT: lui a0, 1 3182; CHECK-NEXT: addi a0, a0, -241 3183; CHECK-NEXT: vadd.vv v8, v9, v8 3184; CHECK-NEXT: vsrl.vi v9, v8, 4 3185; CHECK-NEXT: vadd.vv v8, v8, v9 3186; CHECK-NEXT: vand.vx v8, v8, a0 3187; CHECK-NEXT: li a0, 257 3188; CHECK-NEXT: vmul.vx v8, v8, a0 3189; CHECK-NEXT: vsrl.vi v8, v8, 8 3190; CHECK-NEXT: ret 3191 %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 3192 ret <8 x i16> %v 3193} 3194 3195define <16 x i16> @vp_ctlz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { 3196; CHECK-LABEL: vp_ctlz_zero_undef_v16i16: 3197; CHECK: # %bb.0: 3198; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 3199; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 3200; CHECK-NEXT: lui a0, 5 3201; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3202; CHECK-NEXT: addi a0, a0, 1365 3203; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 3204; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3205; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 3206; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3207; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 3208; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3209; CHECK-NEXT: vnot.v v8, v8, v0.t 3210; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 3211; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 3212; CHECK-NEXT: lui a0, 3 3213; CHECK-NEXT: addi a0, a0, 819 3214; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 3215; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 3216; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3217; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3218; CHECK-NEXT: lui a0, 1 3219; CHECK-NEXT: addi a0, a0, -241 3220; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 3221; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 3222; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 3223; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3224; CHECK-NEXT: li a0, 257 3225; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3226; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 3227; CHECK-NEXT: ret 3228 %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) 3229 ret <16 x i16> %v 3230} 3231 3232define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { 3233; CHECK-LABEL: vp_ctlz_zero_undef_v16i16_unmasked: 3234; CHECK: # %bb.0: 3235; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 3236; CHECK-NEXT: vsrl.vi v10, v8, 1 3237; CHECK-NEXT: lui a0, 5 3238; CHECK-NEXT: vor.vv v8, v8, v10 3239; CHECK-NEXT: addi a0, a0, 1365 3240; CHECK-NEXT: vsrl.vi v10, v8, 2 3241; CHECK-NEXT: vor.vv v8, v8, v10 3242; CHECK-NEXT: vsrl.vi v10, v8, 4 3243; CHECK-NEXT: vor.vv v8, v8, v10 3244; CHECK-NEXT: vsrl.vi v10, v8, 8 3245; CHECK-NEXT: vor.vv v8, v8, v10 3246; CHECK-NEXT: vnot.v v8, v8 3247; CHECK-NEXT: vsrl.vi v10, v8, 1 3248; CHECK-NEXT: vand.vx v10, v10, a0 3249; CHECK-NEXT: lui a0, 3 3250; CHECK-NEXT: addi a0, a0, 819 3251; CHECK-NEXT: vsub.vv v8, v8, v10 3252; CHECK-NEXT: vand.vx v10, v8, a0 3253; CHECK-NEXT: vsrl.vi v8, v8, 2 3254; CHECK-NEXT: vand.vx v8, v8, a0 3255; CHECK-NEXT: lui a0, 1 3256; CHECK-NEXT: addi a0, a0, -241 3257; CHECK-NEXT: vadd.vv v8, v10, v8 3258; CHECK-NEXT: vsrl.vi v10, v8, 4 3259; CHECK-NEXT: vadd.vv v8, v8, v10 3260; CHECK-NEXT: vand.vx v8, v8, a0 3261; CHECK-NEXT: li a0, 257 3262; CHECK-NEXT: vmul.vx v8, v8, a0 3263; CHECK-NEXT: vsrl.vi v8, v8, 8 3264; CHECK-NEXT: ret 3265 %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 3266 ret <16 x i16> %v 3267} 3268 3269define <2 x i32> @vp_ctlz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { 3270; CHECK-LABEL: vp_ctlz_zero_undef_v2i32: 3271; CHECK: # %bb.0: 3272; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 3273; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3274; CHECK-NEXT: lui a0, 349525 3275; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3276; CHECK-NEXT: addi a0, a0, 1365 3277; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 3278; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3279; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3280; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3281; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 3282; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3283; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t 3284; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3285; CHECK-NEXT: vnot.v v8, v8, v0.t 3286; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3287; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3288; CHECK-NEXT: lui a0, 209715 3289; CHECK-NEXT: addi a0, a0, 819 3290; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 3291; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 3292; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3293; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3294; CHECK-NEXT: lui a0, 61681 3295; CHECK-NEXT: addi a0, a0, -241 3296; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 3297; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3298; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 3299; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3300; CHECK-NEXT: lui a0, 4112 3301; CHECK-NEXT: addi a0, a0, 257 3302; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3303; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 3304; CHECK-NEXT: ret 3305 %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) 3306 ret <2 x i32> %v 3307} 3308 3309define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { 3310; CHECK-LABEL: vp_ctlz_zero_undef_v2i32_unmasked: 3311; CHECK: # %bb.0: 3312; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 3313; CHECK-NEXT: vsrl.vi v9, v8, 1 3314; CHECK-NEXT: lui a0, 349525 3315; CHECK-NEXT: vor.vv v8, v8, v9 3316; CHECK-NEXT: addi a0, a0, 1365 3317; CHECK-NEXT: vsrl.vi v9, v8, 2 3318; CHECK-NEXT: vor.vv v8, v8, v9 3319; CHECK-NEXT: vsrl.vi v9, v8, 4 3320; CHECK-NEXT: vor.vv v8, v8, v9 3321; CHECK-NEXT: vsrl.vi v9, v8, 8 3322; CHECK-NEXT: vor.vv v8, v8, v9 3323; CHECK-NEXT: vsrl.vi v9, v8, 16 3324; CHECK-NEXT: vor.vv v8, v8, v9 3325; CHECK-NEXT: vnot.v v8, v8 3326; CHECK-NEXT: vsrl.vi v9, v8, 1 3327; CHECK-NEXT: vand.vx v9, v9, a0 3328; CHECK-NEXT: lui a0, 209715 3329; CHECK-NEXT: addi a0, a0, 819 3330; CHECK-NEXT: vsub.vv v8, v8, v9 3331; CHECK-NEXT: vand.vx v9, v8, a0 3332; CHECK-NEXT: vsrl.vi v8, v8, 2 3333; CHECK-NEXT: vand.vx v8, v8, a0 3334; CHECK-NEXT: lui a0, 61681 3335; CHECK-NEXT: addi a0, a0, -241 3336; CHECK-NEXT: vadd.vv v8, v9, v8 3337; CHECK-NEXT: vsrl.vi v9, v8, 4 3338; CHECK-NEXT: vadd.vv v8, v8, v9 3339; CHECK-NEXT: vand.vx v8, v8, a0 3340; CHECK-NEXT: lui a0, 4112 3341; CHECK-NEXT: addi a0, a0, 257 3342; CHECK-NEXT: vmul.vx v8, v8, a0 3343; CHECK-NEXT: vsrl.vi v8, v8, 24 3344; CHECK-NEXT: ret 3345 %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 3346 ret <2 x i32> %v 3347} 3348 3349define <4 x i32> @vp_ctlz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { 3350; CHECK-LABEL: vp_ctlz_zero_undef_v4i32: 3351; CHECK: # %bb.0: 3352; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 3353; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3354; CHECK-NEXT: lui a0, 349525 3355; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3356; CHECK-NEXT: addi a0, a0, 1365 3357; CHECK-NEXT: vsrl.vi v9, v8, 2, v0.t 3358; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3359; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3360; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3361; CHECK-NEXT: vsrl.vi v9, v8, 8, v0.t 3362; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3363; CHECK-NEXT: vsrl.vi v9, v8, 16, v0.t 3364; CHECK-NEXT: vor.vv v8, v8, v9, v0.t 3365; CHECK-NEXT: vnot.v v8, v8, v0.t 3366; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 3367; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 3368; CHECK-NEXT: lui a0, 209715 3369; CHECK-NEXT: addi a0, a0, 819 3370; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 3371; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 3372; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3373; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3374; CHECK-NEXT: lui a0, 61681 3375; CHECK-NEXT: addi a0, a0, -241 3376; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 3377; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 3378; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 3379; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3380; CHECK-NEXT: lui a0, 4112 3381; CHECK-NEXT: addi a0, a0, 257 3382; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3383; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 3384; CHECK-NEXT: ret 3385 %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) 3386 ret <4 x i32> %v 3387} 3388 3389define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { 3390; CHECK-LABEL: vp_ctlz_zero_undef_v4i32_unmasked: 3391; CHECK: # %bb.0: 3392; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 3393; CHECK-NEXT: vsrl.vi v9, v8, 1 3394; CHECK-NEXT: lui a0, 349525 3395; CHECK-NEXT: vor.vv v8, v8, v9 3396; CHECK-NEXT: addi a0, a0, 1365 3397; CHECK-NEXT: vsrl.vi v9, v8, 2 3398; CHECK-NEXT: vor.vv v8, v8, v9 3399; CHECK-NEXT: vsrl.vi v9, v8, 4 3400; CHECK-NEXT: vor.vv v8, v8, v9 3401; CHECK-NEXT: vsrl.vi v9, v8, 8 3402; CHECK-NEXT: vor.vv v8, v8, v9 3403; CHECK-NEXT: vsrl.vi v9, v8, 16 3404; CHECK-NEXT: vor.vv v8, v8, v9 3405; CHECK-NEXT: vnot.v v8, v8 3406; CHECK-NEXT: vsrl.vi v9, v8, 1 3407; CHECK-NEXT: vand.vx v9, v9, a0 3408; CHECK-NEXT: lui a0, 209715 3409; CHECK-NEXT: addi a0, a0, 819 3410; CHECK-NEXT: vsub.vv v8, v8, v9 3411; CHECK-NEXT: vand.vx v9, v8, a0 3412; CHECK-NEXT: vsrl.vi v8, v8, 2 3413; CHECK-NEXT: vand.vx v8, v8, a0 3414; CHECK-NEXT: lui a0, 61681 3415; CHECK-NEXT: addi a0, a0, -241 3416; CHECK-NEXT: vadd.vv v8, v9, v8 3417; CHECK-NEXT: vsrl.vi v9, v8, 4 3418; CHECK-NEXT: vadd.vv v8, v8, v9 3419; CHECK-NEXT: vand.vx v8, v8, a0 3420; CHECK-NEXT: lui a0, 4112 3421; CHECK-NEXT: addi a0, a0, 257 3422; CHECK-NEXT: vmul.vx v8, v8, a0 3423; CHECK-NEXT: vsrl.vi v8, v8, 24 3424; CHECK-NEXT: ret 3425 %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 3426 ret <4 x i32> %v 3427} 3428 3429define <8 x i32> @vp_ctlz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { 3430; CHECK-LABEL: vp_ctlz_zero_undef_v8i32: 3431; CHECK: # %bb.0: 3432; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 3433; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 3434; CHECK-NEXT: lui a0, 349525 3435; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3436; CHECK-NEXT: addi a0, a0, 1365 3437; CHECK-NEXT: vsrl.vi v10, v8, 2, v0.t 3438; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3439; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 3440; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3441; CHECK-NEXT: vsrl.vi v10, v8, 8, v0.t 3442; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3443; CHECK-NEXT: vsrl.vi v10, v8, 16, v0.t 3444; CHECK-NEXT: vor.vv v8, v8, v10, v0.t 3445; CHECK-NEXT: vnot.v v8, v8, v0.t 3446; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 3447; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 3448; CHECK-NEXT: lui a0, 209715 3449; CHECK-NEXT: addi a0, a0, 819 3450; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 3451; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 3452; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3453; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3454; CHECK-NEXT: lui a0, 61681 3455; CHECK-NEXT: addi a0, a0, -241 3456; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 3457; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 3458; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 3459; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3460; CHECK-NEXT: lui a0, 4112 3461; CHECK-NEXT: addi a0, a0, 257 3462; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3463; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 3464; CHECK-NEXT: ret 3465 %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) 3466 ret <8 x i32> %v 3467} 3468 3469define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { 3470; CHECK-LABEL: vp_ctlz_zero_undef_v8i32_unmasked: 3471; CHECK: # %bb.0: 3472; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 3473; CHECK-NEXT: vsrl.vi v10, v8, 1 3474; CHECK-NEXT: lui a0, 349525 3475; CHECK-NEXT: vor.vv v8, v8, v10 3476; CHECK-NEXT: addi a0, a0, 1365 3477; CHECK-NEXT: vsrl.vi v10, v8, 2 3478; CHECK-NEXT: vor.vv v8, v8, v10 3479; CHECK-NEXT: vsrl.vi v10, v8, 4 3480; CHECK-NEXT: vor.vv v8, v8, v10 3481; CHECK-NEXT: vsrl.vi v10, v8, 8 3482; CHECK-NEXT: vor.vv v8, v8, v10 3483; CHECK-NEXT: vsrl.vi v10, v8, 16 3484; CHECK-NEXT: vor.vv v8, v8, v10 3485; CHECK-NEXT: vnot.v v8, v8 3486; CHECK-NEXT: vsrl.vi v10, v8, 1 3487; CHECK-NEXT: vand.vx v10, v10, a0 3488; CHECK-NEXT: lui a0, 209715 3489; CHECK-NEXT: addi a0, a0, 819 3490; CHECK-NEXT: vsub.vv v8, v8, v10 3491; CHECK-NEXT: vand.vx v10, v8, a0 3492; CHECK-NEXT: vsrl.vi v8, v8, 2 3493; CHECK-NEXT: vand.vx v8, v8, a0 3494; CHECK-NEXT: lui a0, 61681 3495; CHECK-NEXT: addi a0, a0, -241 3496; CHECK-NEXT: vadd.vv v8, v10, v8 3497; CHECK-NEXT: vsrl.vi v10, v8, 4 3498; CHECK-NEXT: vadd.vv v8, v8, v10 3499; CHECK-NEXT: vand.vx v8, v8, a0 3500; CHECK-NEXT: lui a0, 4112 3501; CHECK-NEXT: addi a0, a0, 257 3502; CHECK-NEXT: vmul.vx v8, v8, a0 3503; CHECK-NEXT: vsrl.vi v8, v8, 24 3504; CHECK-NEXT: ret 3505 %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 3506 ret <8 x i32> %v 3507} 3508 3509define <16 x i32> @vp_ctlz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { 3510; CHECK-LABEL: vp_ctlz_zero_undef_v16i32: 3511; CHECK: # %bb.0: 3512; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 3513; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 3514; CHECK-NEXT: lui a0, 349525 3515; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 3516; CHECK-NEXT: addi a0, a0, 1365 3517; CHECK-NEXT: vsrl.vi v12, v8, 2, v0.t 3518; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 3519; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 3520; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 3521; CHECK-NEXT: vsrl.vi v12, v8, 8, v0.t 3522; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 3523; CHECK-NEXT: vsrl.vi v12, v8, 16, v0.t 3524; CHECK-NEXT: vor.vv v8, v8, v12, v0.t 3525; CHECK-NEXT: vnot.v v8, v8, v0.t 3526; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 3527; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 3528; CHECK-NEXT: lui a0, 209715 3529; CHECK-NEXT: addi a0, a0, 819 3530; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 3531; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 3532; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 3533; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3534; CHECK-NEXT: lui a0, 61681 3535; CHECK-NEXT: addi a0, a0, -241 3536; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 3537; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 3538; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 3539; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 3540; CHECK-NEXT: lui a0, 4112 3541; CHECK-NEXT: addi a0, a0, 257 3542; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 3543; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 3544; CHECK-NEXT: ret 3545 %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) 3546 ret <16 x i32> %v 3547} 3548 3549define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { 3550; CHECK-LABEL: vp_ctlz_zero_undef_v16i32_unmasked: 3551; CHECK: # %bb.0: 3552; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 3553; CHECK-NEXT: vsrl.vi v12, v8, 1 3554; CHECK-NEXT: lui a0, 349525 3555; CHECK-NEXT: vor.vv v8, v8, v12 3556; CHECK-NEXT: addi a0, a0, 1365 3557; CHECK-NEXT: vsrl.vi v12, v8, 2 3558; CHECK-NEXT: vor.vv v8, v8, v12 3559; CHECK-NEXT: vsrl.vi v12, v8, 4 3560; CHECK-NEXT: vor.vv v8, v8, v12 3561; CHECK-NEXT: vsrl.vi v12, v8, 8 3562; CHECK-NEXT: vor.vv v8, v8, v12 3563; CHECK-NEXT: vsrl.vi v12, v8, 16 3564; CHECK-NEXT: vor.vv v8, v8, v12 3565; CHECK-NEXT: vnot.v v8, v8 3566; CHECK-NEXT: vsrl.vi v12, v8, 1 3567; CHECK-NEXT: vand.vx v12, v12, a0 3568; CHECK-NEXT: lui a0, 209715 3569; CHECK-NEXT: addi a0, a0, 819 3570; CHECK-NEXT: vsub.vv v8, v8, v12 3571; CHECK-NEXT: vand.vx v12, v8, a0 3572; CHECK-NEXT: vsrl.vi v8, v8, 2 3573; CHECK-NEXT: vand.vx v8, v8, a0 3574; CHECK-NEXT: lui a0, 61681 3575; CHECK-NEXT: addi a0, a0, -241 3576; CHECK-NEXT: vadd.vv v8, v12, v8 3577; CHECK-NEXT: vsrl.vi v12, v8, 4 3578; CHECK-NEXT: vadd.vv v8, v8, v12 3579; CHECK-NEXT: vand.vx v8, v8, a0 3580; CHECK-NEXT: lui a0, 4112 3581; CHECK-NEXT: addi a0, a0, 257 3582; CHECK-NEXT: vmul.vx v8, v8, a0 3583; CHECK-NEXT: vsrl.vi v8, v8, 24 3584; CHECK-NEXT: ret 3585 %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 3586 ret <16 x i32> %v 3587} 3588 3589define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { 3590; RV32-LABEL: vp_ctlz_zero_undef_v2i64: 3591; RV32: # %bb.0: 3592; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3593; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 3594; RV32-NEXT: lui a1, 349525 3595; RV32-NEXT: addi a1, a1, 1365 3596; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3597; RV32-NEXT: vmv.v.x v10, a1 3598; RV32-NEXT: li a1, 32 3599; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3600; RV32-NEXT: vor.vv v8, v8, v9, v0.t 3601; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t 3602; RV32-NEXT: vor.vv v8, v8, v9, v0.t 3603; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 3604; RV32-NEXT: vor.vv v8, v8, v9, v0.t 3605; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t 3606; RV32-NEXT: vor.vv v8, v8, v9, v0.t 3607; RV32-NEXT: vsrl.vi v9, v8, 16, v0.t 3608; RV32-NEXT: vor.vv v8, v8, v9, v0.t 3609; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t 3610; RV32-NEXT: lui a1, 209715 3611; RV32-NEXT: addi a1, a1, 819 3612; RV32-NEXT: vor.vv v8, v8, v9, v0.t 3613; RV32-NEXT: vnot.v v8, v8, v0.t 3614; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 3615; RV32-NEXT: vand.vv v9, v9, v10, v0.t 3616; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3617; RV32-NEXT: vmv.v.x v10, a1 3618; RV32-NEXT: lui a1, 61681 3619; RV32-NEXT: addi a1, a1, -241 3620; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3621; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 3622; RV32-NEXT: vand.vv v9, v8, v10, v0.t 3623; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 3624; RV32-NEXT: vand.vv v8, v8, v10, v0.t 3625; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3626; RV32-NEXT: vmv.v.x v10, a1 3627; RV32-NEXT: lui a1, 4112 3628; RV32-NEXT: addi a1, a1, 257 3629; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3630; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 3631; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 3632; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 3633; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3634; RV32-NEXT: vmv.v.x v9, a1 3635; RV32-NEXT: li a1, 56 3636; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3637; RV32-NEXT: vand.vv v8, v8, v10, v0.t 3638; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 3639; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 3640; RV32-NEXT: ret 3641; 3642; RV64-LABEL: vp_ctlz_zero_undef_v2i64: 3643; RV64: # %bb.0: 3644; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3645; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 3646; RV64-NEXT: lui a0, 349525 3647; RV64-NEXT: lui a1, 209715 3648; RV64-NEXT: lui a2, 61681 3649; RV64-NEXT: lui a3, 4112 3650; RV64-NEXT: addiw a0, a0, 1365 3651; RV64-NEXT: addiw a1, a1, 819 3652; RV64-NEXT: addiw a2, a2, -241 3653; RV64-NEXT: addiw a3, a3, 257 3654; RV64-NEXT: slli a4, a0, 32 3655; RV64-NEXT: add a0, a0, a4 3656; RV64-NEXT: slli a4, a1, 32 3657; RV64-NEXT: add a1, a1, a4 3658; RV64-NEXT: slli a4, a2, 32 3659; RV64-NEXT: add a2, a2, a4 3660; RV64-NEXT: slli a4, a3, 32 3661; RV64-NEXT: add a3, a3, a4 3662; RV64-NEXT: li a4, 32 3663; RV64-NEXT: vor.vv v8, v8, v9, v0.t 3664; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t 3665; RV64-NEXT: vor.vv v8, v8, v9, v0.t 3666; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 3667; RV64-NEXT: vor.vv v8, v8, v9, v0.t 3668; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t 3669; RV64-NEXT: vor.vv v8, v8, v9, v0.t 3670; RV64-NEXT: vsrl.vi v9, v8, 16, v0.t 3671; RV64-NEXT: vor.vv v8, v8, v9, v0.t 3672; RV64-NEXT: vsrl.vx v9, v8, a4, v0.t 3673; RV64-NEXT: vor.vv v8, v8, v9, v0.t 3674; RV64-NEXT: vnot.v v8, v8, v0.t 3675; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 3676; RV64-NEXT: vand.vx v9, v9, a0, v0.t 3677; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 3678; RV64-NEXT: vand.vx v9, v8, a1, v0.t 3679; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3680; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3681; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 3682; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 3683; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 3684; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3685; RV64-NEXT: li a0, 56 3686; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3687; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3688; RV64-NEXT: ret 3689 %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) 3690 ret <2 x i64> %v 3691} 3692 3693define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { 3694; RV32-LABEL: vp_ctlz_zero_undef_v2i64_unmasked: 3695; RV32: # %bb.0: 3696; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3697; RV32-NEXT: vsrl.vi v9, v8, 1 3698; RV32-NEXT: lui a1, 349525 3699; RV32-NEXT: addi a1, a1, 1365 3700; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3701; RV32-NEXT: vmv.v.x v10, a1 3702; RV32-NEXT: li a1, 32 3703; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3704; RV32-NEXT: vor.vv v8, v8, v9 3705; RV32-NEXT: vsrl.vi v9, v8, 2 3706; RV32-NEXT: vor.vv v8, v8, v9 3707; RV32-NEXT: vsrl.vi v9, v8, 4 3708; RV32-NEXT: vor.vv v8, v8, v9 3709; RV32-NEXT: vsrl.vi v9, v8, 8 3710; RV32-NEXT: vor.vv v8, v8, v9 3711; RV32-NEXT: vsrl.vi v9, v8, 16 3712; RV32-NEXT: vor.vv v8, v8, v9 3713; RV32-NEXT: vsrl.vx v9, v8, a1 3714; RV32-NEXT: lui a1, 209715 3715; RV32-NEXT: addi a1, a1, 819 3716; RV32-NEXT: vor.vv v8, v8, v9 3717; RV32-NEXT: vnot.v v8, v8 3718; RV32-NEXT: vsrl.vi v9, v8, 1 3719; RV32-NEXT: vand.vv v9, v9, v10 3720; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3721; RV32-NEXT: vmv.v.x v10, a1 3722; RV32-NEXT: lui a1, 61681 3723; RV32-NEXT: addi a1, a1, -241 3724; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3725; RV32-NEXT: vsub.vv v8, v8, v9 3726; RV32-NEXT: vand.vv v9, v8, v10 3727; RV32-NEXT: vsrl.vi v8, v8, 2 3728; RV32-NEXT: vand.vv v8, v8, v10 3729; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3730; RV32-NEXT: vmv.v.x v10, a1 3731; RV32-NEXT: lui a1, 4112 3732; RV32-NEXT: addi a1, a1, 257 3733; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3734; RV32-NEXT: vadd.vv v8, v9, v8 3735; RV32-NEXT: vsrl.vi v9, v8, 4 3736; RV32-NEXT: vadd.vv v8, v8, v9 3737; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3738; RV32-NEXT: vmv.v.x v9, a1 3739; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3740; RV32-NEXT: vand.vv v8, v8, v10 3741; RV32-NEXT: vmul.vv v8, v8, v9 3742; RV32-NEXT: li a0, 56 3743; RV32-NEXT: vsrl.vx v8, v8, a0 3744; RV32-NEXT: ret 3745; 3746; RV64-LABEL: vp_ctlz_zero_undef_v2i64_unmasked: 3747; RV64: # %bb.0: 3748; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 3749; RV64-NEXT: vsrl.vi v9, v8, 1 3750; RV64-NEXT: lui a0, 349525 3751; RV64-NEXT: lui a1, 209715 3752; RV64-NEXT: lui a2, 61681 3753; RV64-NEXT: lui a3, 4112 3754; RV64-NEXT: addiw a0, a0, 1365 3755; RV64-NEXT: addiw a1, a1, 819 3756; RV64-NEXT: addiw a2, a2, -241 3757; RV64-NEXT: addiw a3, a3, 257 3758; RV64-NEXT: slli a4, a0, 32 3759; RV64-NEXT: add a0, a0, a4 3760; RV64-NEXT: slli a4, a1, 32 3761; RV64-NEXT: add a1, a1, a4 3762; RV64-NEXT: slli a4, a2, 32 3763; RV64-NEXT: add a2, a2, a4 3764; RV64-NEXT: slli a4, a3, 32 3765; RV64-NEXT: add a3, a3, a4 3766; RV64-NEXT: li a4, 32 3767; RV64-NEXT: vor.vv v8, v8, v9 3768; RV64-NEXT: vsrl.vi v9, v8, 2 3769; RV64-NEXT: vor.vv v8, v8, v9 3770; RV64-NEXT: vsrl.vi v9, v8, 4 3771; RV64-NEXT: vor.vv v8, v8, v9 3772; RV64-NEXT: vsrl.vi v9, v8, 8 3773; RV64-NEXT: vor.vv v8, v8, v9 3774; RV64-NEXT: vsrl.vi v9, v8, 16 3775; RV64-NEXT: vor.vv v8, v8, v9 3776; RV64-NEXT: vsrl.vx v9, v8, a4 3777; RV64-NEXT: vor.vv v8, v8, v9 3778; RV64-NEXT: vnot.v v8, v8 3779; RV64-NEXT: vsrl.vi v9, v8, 1 3780; RV64-NEXT: vand.vx v9, v9, a0 3781; RV64-NEXT: vsub.vv v8, v8, v9 3782; RV64-NEXT: vand.vx v9, v8, a1 3783; RV64-NEXT: vsrl.vi v8, v8, 2 3784; RV64-NEXT: vand.vx v8, v8, a1 3785; RV64-NEXT: vadd.vv v8, v9, v8 3786; RV64-NEXT: vsrl.vi v9, v8, 4 3787; RV64-NEXT: vadd.vv v8, v8, v9 3788; RV64-NEXT: vand.vx v8, v8, a2 3789; RV64-NEXT: vmul.vx v8, v8, a3 3790; RV64-NEXT: li a0, 56 3791; RV64-NEXT: vsrl.vx v8, v8, a0 3792; RV64-NEXT: ret 3793 %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) 3794 ret <2 x i64> %v 3795} 3796 3797define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { 3798; RV32-LABEL: vp_ctlz_zero_undef_v4i64: 3799; RV32: # %bb.0: 3800; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3801; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t 3802; RV32-NEXT: lui a1, 349525 3803; RV32-NEXT: addi a1, a1, 1365 3804; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3805; RV32-NEXT: vmv.v.x v10, a1 3806; RV32-NEXT: li a1, 32 3807; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3808; RV32-NEXT: vor.vv v8, v8, v12, v0.t 3809; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t 3810; RV32-NEXT: vor.vv v8, v8, v12, v0.t 3811; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t 3812; RV32-NEXT: vor.vv v8, v8, v12, v0.t 3813; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t 3814; RV32-NEXT: vor.vv v8, v8, v12, v0.t 3815; RV32-NEXT: vsrl.vi v12, v8, 16, v0.t 3816; RV32-NEXT: vor.vv v8, v8, v12, v0.t 3817; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t 3818; RV32-NEXT: lui a1, 209715 3819; RV32-NEXT: addi a1, a1, 819 3820; RV32-NEXT: vor.vv v8, v8, v12, v0.t 3821; RV32-NEXT: vnot.v v8, v8, v0.t 3822; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t 3823; RV32-NEXT: vand.vv v10, v12, v10, v0.t 3824; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3825; RV32-NEXT: vmv.v.x v12, a1 3826; RV32-NEXT: lui a1, 61681 3827; RV32-NEXT: addi a1, a1, -241 3828; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3829; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 3830; RV32-NEXT: vand.vv v10, v8, v12, v0.t 3831; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 3832; RV32-NEXT: vand.vv v8, v8, v12, v0.t 3833; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3834; RV32-NEXT: vmv.v.x v12, a1 3835; RV32-NEXT: lui a1, 4112 3836; RV32-NEXT: addi a1, a1, 257 3837; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3838; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 3839; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 3840; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 3841; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3842; RV32-NEXT: vmv.v.x v10, a1 3843; RV32-NEXT: li a1, 56 3844; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3845; RV32-NEXT: vand.vv v8, v8, v12, v0.t 3846; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 3847; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 3848; RV32-NEXT: ret 3849; 3850; RV64-LABEL: vp_ctlz_zero_undef_v4i64: 3851; RV64: # %bb.0: 3852; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3853; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 3854; RV64-NEXT: lui a0, 349525 3855; RV64-NEXT: lui a1, 209715 3856; RV64-NEXT: lui a2, 61681 3857; RV64-NEXT: lui a3, 4112 3858; RV64-NEXT: addiw a0, a0, 1365 3859; RV64-NEXT: addiw a1, a1, 819 3860; RV64-NEXT: addiw a2, a2, -241 3861; RV64-NEXT: addiw a3, a3, 257 3862; RV64-NEXT: slli a4, a0, 32 3863; RV64-NEXT: add a0, a0, a4 3864; RV64-NEXT: slli a4, a1, 32 3865; RV64-NEXT: add a1, a1, a4 3866; RV64-NEXT: slli a4, a2, 32 3867; RV64-NEXT: add a2, a2, a4 3868; RV64-NEXT: slli a4, a3, 32 3869; RV64-NEXT: add a3, a3, a4 3870; RV64-NEXT: li a4, 32 3871; RV64-NEXT: vor.vv v8, v8, v10, v0.t 3872; RV64-NEXT: vsrl.vi v10, v8, 2, v0.t 3873; RV64-NEXT: vor.vv v8, v8, v10, v0.t 3874; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 3875; RV64-NEXT: vor.vv v8, v8, v10, v0.t 3876; RV64-NEXT: vsrl.vi v10, v8, 8, v0.t 3877; RV64-NEXT: vor.vv v8, v8, v10, v0.t 3878; RV64-NEXT: vsrl.vi v10, v8, 16, v0.t 3879; RV64-NEXT: vor.vv v8, v8, v10, v0.t 3880; RV64-NEXT: vsrl.vx v10, v8, a4, v0.t 3881; RV64-NEXT: vor.vv v8, v8, v10, v0.t 3882; RV64-NEXT: vnot.v v8, v8, v0.t 3883; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 3884; RV64-NEXT: vand.vx v10, v10, a0, v0.t 3885; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 3886; RV64-NEXT: vand.vx v10, v8, a1, v0.t 3887; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 3888; RV64-NEXT: vand.vx v8, v8, a1, v0.t 3889; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 3890; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 3891; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 3892; RV64-NEXT: vand.vx v8, v8, a2, v0.t 3893; RV64-NEXT: li a0, 56 3894; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 3895; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 3896; RV64-NEXT: ret 3897 %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) 3898 ret <4 x i64> %v 3899} 3900 3901define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { 3902; RV32-LABEL: vp_ctlz_zero_undef_v4i64_unmasked: 3903; RV32: # %bb.0: 3904; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3905; RV32-NEXT: vsrl.vi v10, v8, 1 3906; RV32-NEXT: lui a1, 349525 3907; RV32-NEXT: addi a1, a1, 1365 3908; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3909; RV32-NEXT: vmv.v.x v12, a1 3910; RV32-NEXT: li a1, 32 3911; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3912; RV32-NEXT: vor.vv v8, v8, v10 3913; RV32-NEXT: vsrl.vi v10, v8, 2 3914; RV32-NEXT: vor.vv v8, v8, v10 3915; RV32-NEXT: vsrl.vi v10, v8, 4 3916; RV32-NEXT: vor.vv v8, v8, v10 3917; RV32-NEXT: vsrl.vi v10, v8, 8 3918; RV32-NEXT: vor.vv v8, v8, v10 3919; RV32-NEXT: vsrl.vi v10, v8, 16 3920; RV32-NEXT: vor.vv v8, v8, v10 3921; RV32-NEXT: vsrl.vx v10, v8, a1 3922; RV32-NEXT: lui a1, 209715 3923; RV32-NEXT: addi a1, a1, 819 3924; RV32-NEXT: vor.vv v8, v8, v10 3925; RV32-NEXT: vnot.v v8, v8 3926; RV32-NEXT: vsrl.vi v10, v8, 1 3927; RV32-NEXT: vand.vv v10, v10, v12 3928; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3929; RV32-NEXT: vmv.v.x v12, a1 3930; RV32-NEXT: lui a1, 61681 3931; RV32-NEXT: addi a1, a1, -241 3932; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3933; RV32-NEXT: vsub.vv v8, v8, v10 3934; RV32-NEXT: vand.vv v10, v8, v12 3935; RV32-NEXT: vsrl.vi v8, v8, 2 3936; RV32-NEXT: vand.vv v8, v8, v12 3937; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3938; RV32-NEXT: vmv.v.x v12, a1 3939; RV32-NEXT: lui a1, 4112 3940; RV32-NEXT: addi a1, a1, 257 3941; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3942; RV32-NEXT: vadd.vv v8, v10, v8 3943; RV32-NEXT: vsrl.vi v10, v8, 4 3944; RV32-NEXT: vadd.vv v8, v8, v10 3945; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 3946; RV32-NEXT: vmv.v.x v10, a1 3947; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3948; RV32-NEXT: vand.vv v8, v8, v12 3949; RV32-NEXT: vmul.vv v8, v8, v10 3950; RV32-NEXT: li a0, 56 3951; RV32-NEXT: vsrl.vx v8, v8, a0 3952; RV32-NEXT: ret 3953; 3954; RV64-LABEL: vp_ctlz_zero_undef_v4i64_unmasked: 3955; RV64: # %bb.0: 3956; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 3957; RV64-NEXT: vsrl.vi v10, v8, 1 3958; RV64-NEXT: lui a0, 349525 3959; RV64-NEXT: lui a1, 209715 3960; RV64-NEXT: lui a2, 61681 3961; RV64-NEXT: lui a3, 4112 3962; RV64-NEXT: addiw a0, a0, 1365 3963; RV64-NEXT: addiw a1, a1, 819 3964; RV64-NEXT: addiw a2, a2, -241 3965; RV64-NEXT: addiw a3, a3, 257 3966; RV64-NEXT: slli a4, a0, 32 3967; RV64-NEXT: add a0, a0, a4 3968; RV64-NEXT: slli a4, a1, 32 3969; RV64-NEXT: add a1, a1, a4 3970; RV64-NEXT: slli a4, a2, 32 3971; RV64-NEXT: add a2, a2, a4 3972; RV64-NEXT: slli a4, a3, 32 3973; RV64-NEXT: add a3, a3, a4 3974; RV64-NEXT: li a4, 32 3975; RV64-NEXT: vor.vv v8, v8, v10 3976; RV64-NEXT: vsrl.vi v10, v8, 2 3977; RV64-NEXT: vor.vv v8, v8, v10 3978; RV64-NEXT: vsrl.vi v10, v8, 4 3979; RV64-NEXT: vor.vv v8, v8, v10 3980; RV64-NEXT: vsrl.vi v10, v8, 8 3981; RV64-NEXT: vor.vv v8, v8, v10 3982; RV64-NEXT: vsrl.vi v10, v8, 16 3983; RV64-NEXT: vor.vv v8, v8, v10 3984; RV64-NEXT: vsrl.vx v10, v8, a4 3985; RV64-NEXT: vor.vv v8, v8, v10 3986; RV64-NEXT: vnot.v v8, v8 3987; RV64-NEXT: vsrl.vi v10, v8, 1 3988; RV64-NEXT: vand.vx v10, v10, a0 3989; RV64-NEXT: vsub.vv v8, v8, v10 3990; RV64-NEXT: vand.vx v10, v8, a1 3991; RV64-NEXT: vsrl.vi v8, v8, 2 3992; RV64-NEXT: vand.vx v8, v8, a1 3993; RV64-NEXT: vadd.vv v8, v10, v8 3994; RV64-NEXT: vsrl.vi v10, v8, 4 3995; RV64-NEXT: vadd.vv v8, v8, v10 3996; RV64-NEXT: vand.vx v8, v8, a2 3997; RV64-NEXT: vmul.vx v8, v8, a3 3998; RV64-NEXT: li a0, 56 3999; RV64-NEXT: vsrl.vx v8, v8, a0 4000; RV64-NEXT: ret 4001 %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) 4002 ret <4 x i64> %v 4003} 4004 4005define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { 4006; RV32-LABEL: vp_ctlz_zero_undef_v8i64: 4007; RV32: # %bb.0: 4008; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4009; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 4010; RV32-NEXT: lui a1, 349525 4011; RV32-NEXT: addi a1, a1, 1365 4012; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4013; RV32-NEXT: vmv.v.x v12, a1 4014; RV32-NEXT: li a1, 32 4015; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4016; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4017; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 4018; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4019; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4020; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4021; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 4022; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4023; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 4024; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4025; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 4026; RV32-NEXT: lui a1, 209715 4027; RV32-NEXT: addi a1, a1, 819 4028; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4029; RV32-NEXT: vnot.v v16, v8, v0.t 4030; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 4031; RV32-NEXT: vand.vv v12, v8, v12, v0.t 4032; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4033; RV32-NEXT: vmv.v.x v8, a1 4034; RV32-NEXT: lui a1, 61681 4035; RV32-NEXT: addi a1, a1, -241 4036; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4037; RV32-NEXT: vsub.vv v12, v16, v12, v0.t 4038; RV32-NEXT: vand.vv v16, v12, v8, v0.t 4039; RV32-NEXT: vsrl.vi v12, v12, 2, v0.t 4040; RV32-NEXT: vand.vv v8, v12, v8, v0.t 4041; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4042; RV32-NEXT: vmv.v.x v12, a1 4043; RV32-NEXT: lui a1, 4112 4044; RV32-NEXT: addi a1, a1, 257 4045; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4046; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 4047; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4048; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 4049; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4050; RV32-NEXT: vmv.v.x v16, a1 4051; RV32-NEXT: li a1, 56 4052; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4053; RV32-NEXT: vand.vv v8, v8, v12, v0.t 4054; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 4055; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 4056; RV32-NEXT: ret 4057; 4058; RV64-LABEL: vp_ctlz_zero_undef_v8i64: 4059; RV64: # %bb.0: 4060; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4061; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 4062; RV64-NEXT: lui a0, 349525 4063; RV64-NEXT: lui a1, 209715 4064; RV64-NEXT: lui a2, 61681 4065; RV64-NEXT: lui a3, 4112 4066; RV64-NEXT: addiw a0, a0, 1365 4067; RV64-NEXT: addiw a1, a1, 819 4068; RV64-NEXT: addiw a2, a2, -241 4069; RV64-NEXT: addiw a3, a3, 257 4070; RV64-NEXT: slli a4, a0, 32 4071; RV64-NEXT: add a0, a0, a4 4072; RV64-NEXT: slli a4, a1, 32 4073; RV64-NEXT: add a1, a1, a4 4074; RV64-NEXT: slli a4, a2, 32 4075; RV64-NEXT: add a2, a2, a4 4076; RV64-NEXT: slli a4, a3, 32 4077; RV64-NEXT: add a3, a3, a4 4078; RV64-NEXT: li a4, 32 4079; RV64-NEXT: vor.vv v8, v8, v12, v0.t 4080; RV64-NEXT: vsrl.vi v12, v8, 2, v0.t 4081; RV64-NEXT: vor.vv v8, v8, v12, v0.t 4082; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 4083; RV64-NEXT: vor.vv v8, v8, v12, v0.t 4084; RV64-NEXT: vsrl.vi v12, v8, 8, v0.t 4085; RV64-NEXT: vor.vv v8, v8, v12, v0.t 4086; RV64-NEXT: vsrl.vi v12, v8, 16, v0.t 4087; RV64-NEXT: vor.vv v8, v8, v12, v0.t 4088; RV64-NEXT: vsrl.vx v12, v8, a4, v0.t 4089; RV64-NEXT: vor.vv v8, v8, v12, v0.t 4090; RV64-NEXT: vnot.v v8, v8, v0.t 4091; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 4092; RV64-NEXT: vand.vx v12, v12, a0, v0.t 4093; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 4094; RV64-NEXT: vand.vx v12, v8, a1, v0.t 4095; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 4096; RV64-NEXT: vand.vx v8, v8, a1, v0.t 4097; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 4098; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 4099; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 4100; RV64-NEXT: vand.vx v8, v8, a2, v0.t 4101; RV64-NEXT: li a0, 56 4102; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 4103; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 4104; RV64-NEXT: ret 4105 %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) 4106 ret <8 x i64> %v 4107} 4108 4109define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { 4110; RV32-LABEL: vp_ctlz_zero_undef_v8i64_unmasked: 4111; RV32: # %bb.0: 4112; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4113; RV32-NEXT: vsrl.vi v12, v8, 1 4114; RV32-NEXT: lui a1, 349525 4115; RV32-NEXT: addi a1, a1, 1365 4116; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4117; RV32-NEXT: vmv.v.x v16, a1 4118; RV32-NEXT: li a1, 32 4119; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4120; RV32-NEXT: vor.vv v8, v8, v12 4121; RV32-NEXT: vsrl.vi v12, v8, 2 4122; RV32-NEXT: vor.vv v8, v8, v12 4123; RV32-NEXT: vsrl.vi v12, v8, 4 4124; RV32-NEXT: vor.vv v8, v8, v12 4125; RV32-NEXT: vsrl.vi v12, v8, 8 4126; RV32-NEXT: vor.vv v8, v8, v12 4127; RV32-NEXT: vsrl.vi v12, v8, 16 4128; RV32-NEXT: vor.vv v8, v8, v12 4129; RV32-NEXT: vsrl.vx v12, v8, a1 4130; RV32-NEXT: lui a1, 209715 4131; RV32-NEXT: addi a1, a1, 819 4132; RV32-NEXT: vor.vv v8, v8, v12 4133; RV32-NEXT: vnot.v v8, v8 4134; RV32-NEXT: vsrl.vi v12, v8, 1 4135; RV32-NEXT: vand.vv v12, v12, v16 4136; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4137; RV32-NEXT: vmv.v.x v16, a1 4138; RV32-NEXT: lui a1, 61681 4139; RV32-NEXT: addi a1, a1, -241 4140; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4141; RV32-NEXT: vsub.vv v8, v8, v12 4142; RV32-NEXT: vand.vv v12, v8, v16 4143; RV32-NEXT: vsrl.vi v8, v8, 2 4144; RV32-NEXT: vand.vv v8, v8, v16 4145; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4146; RV32-NEXT: vmv.v.x v16, a1 4147; RV32-NEXT: lui a1, 4112 4148; RV32-NEXT: addi a1, a1, 257 4149; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4150; RV32-NEXT: vadd.vv v8, v12, v8 4151; RV32-NEXT: vsrl.vi v12, v8, 4 4152; RV32-NEXT: vadd.vv v8, v8, v12 4153; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 4154; RV32-NEXT: vmv.v.x v12, a1 4155; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4156; RV32-NEXT: vand.vv v8, v8, v16 4157; RV32-NEXT: vmul.vv v8, v8, v12 4158; RV32-NEXT: li a0, 56 4159; RV32-NEXT: vsrl.vx v8, v8, a0 4160; RV32-NEXT: ret 4161; 4162; RV64-LABEL: vp_ctlz_zero_undef_v8i64_unmasked: 4163; RV64: # %bb.0: 4164; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 4165; RV64-NEXT: vsrl.vi v12, v8, 1 4166; RV64-NEXT: lui a0, 349525 4167; RV64-NEXT: lui a1, 209715 4168; RV64-NEXT: lui a2, 61681 4169; RV64-NEXT: lui a3, 4112 4170; RV64-NEXT: addiw a0, a0, 1365 4171; RV64-NEXT: addiw a1, a1, 819 4172; RV64-NEXT: addiw a2, a2, -241 4173; RV64-NEXT: addiw a3, a3, 257 4174; RV64-NEXT: slli a4, a0, 32 4175; RV64-NEXT: add a0, a0, a4 4176; RV64-NEXT: slli a4, a1, 32 4177; RV64-NEXT: add a1, a1, a4 4178; RV64-NEXT: slli a4, a2, 32 4179; RV64-NEXT: add a2, a2, a4 4180; RV64-NEXT: slli a4, a3, 32 4181; RV64-NEXT: add a3, a3, a4 4182; RV64-NEXT: li a4, 32 4183; RV64-NEXT: vor.vv v8, v8, v12 4184; RV64-NEXT: vsrl.vi v12, v8, 2 4185; RV64-NEXT: vor.vv v8, v8, v12 4186; RV64-NEXT: vsrl.vi v12, v8, 4 4187; RV64-NEXT: vor.vv v8, v8, v12 4188; RV64-NEXT: vsrl.vi v12, v8, 8 4189; RV64-NEXT: vor.vv v8, v8, v12 4190; RV64-NEXT: vsrl.vi v12, v8, 16 4191; RV64-NEXT: vor.vv v8, v8, v12 4192; RV64-NEXT: vsrl.vx v12, v8, a4 4193; RV64-NEXT: vor.vv v8, v8, v12 4194; RV64-NEXT: vnot.v v8, v8 4195; RV64-NEXT: vsrl.vi v12, v8, 1 4196; RV64-NEXT: vand.vx v12, v12, a0 4197; RV64-NEXT: vsub.vv v8, v8, v12 4198; RV64-NEXT: vand.vx v12, v8, a1 4199; RV64-NEXT: vsrl.vi v8, v8, 2 4200; RV64-NEXT: vand.vx v8, v8, a1 4201; RV64-NEXT: vadd.vv v8, v12, v8 4202; RV64-NEXT: vsrl.vi v12, v8, 4 4203; RV64-NEXT: vadd.vv v8, v8, v12 4204; RV64-NEXT: vand.vx v8, v8, a2 4205; RV64-NEXT: vmul.vx v8, v8, a3 4206; RV64-NEXT: li a0, 56 4207; RV64-NEXT: vsrl.vx v8, v8, a0 4208; RV64-NEXT: ret 4209 %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) 4210 ret <8 x i64> %v 4211} 4212 4213define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { 4214; RV32-LABEL: vp_ctlz_zero_undef_v15i64: 4215; RV32: # %bb.0: 4216; RV32-NEXT: addi sp, sp, -48 4217; RV32-NEXT: .cfi_def_cfa_offset 48 4218; RV32-NEXT: csrr a1, vlenb 4219; RV32-NEXT: slli a1, a1, 4 4220; RV32-NEXT: sub sp, sp, a1 4221; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 4222; RV32-NEXT: lui a1, 349525 4223; RV32-NEXT: addi a1, a1, 1365 4224; RV32-NEXT: sw a1, 40(sp) 4225; RV32-NEXT: sw a1, 44(sp) 4226; RV32-NEXT: lui a1, 209715 4227; RV32-NEXT: addi a1, a1, 819 4228; RV32-NEXT: sw a1, 32(sp) 4229; RV32-NEXT: sw a1, 36(sp) 4230; RV32-NEXT: lui a1, 61681 4231; RV32-NEXT: addi a1, a1, -241 4232; RV32-NEXT: sw a1, 24(sp) 4233; RV32-NEXT: sw a1, 28(sp) 4234; RV32-NEXT: lui a1, 4112 4235; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4236; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 4237; RV32-NEXT: addi a1, a1, 257 4238; RV32-NEXT: sw a1, 16(sp) 4239; RV32-NEXT: sw a1, 20(sp) 4240; RV32-NEXT: addi a1, sp, 40 4241; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4242; RV32-NEXT: vlse64.v v24, (a1), zero 4243; RV32-NEXT: csrr a1, vlenb 4244; RV32-NEXT: slli a1, a1, 3 4245; RV32-NEXT: add a1, sp, a1 4246; RV32-NEXT: addi a1, a1, 48 4247; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 4248; RV32-NEXT: li a1, 32 4249; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4250; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4251; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 4252; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4253; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4254; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4255; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 4256; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4257; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 4258; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4259; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 4260; RV32-NEXT: addi a1, sp, 32 4261; RV32-NEXT: vor.vv v16, v8, v16, v0.t 4262; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4263; RV32-NEXT: vlse64.v v8, (a1), zero 4264; RV32-NEXT: addi a1, sp, 48 4265; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 4266; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4267; RV32-NEXT: vnot.v v16, v16, v0.t 4268; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 4269; RV32-NEXT: csrr a1, vlenb 4270; RV32-NEXT: slli a1, a1, 3 4271; RV32-NEXT: add a1, sp, a1 4272; RV32-NEXT: addi a1, a1, 48 4273; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 4274; RV32-NEXT: vand.vv v8, v8, v24, v0.t 4275; RV32-NEXT: addi a1, sp, 24 4276; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 4277; RV32-NEXT: addi a2, sp, 48 4278; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 4279; RV32-NEXT: vand.vv v16, v8, v24, v0.t 4280; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 4281; RV32-NEXT: vand.vv v24, v8, v24, v0.t 4282; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4283; RV32-NEXT: vlse64.v v8, (a1), zero 4284; RV32-NEXT: csrr a1, vlenb 4285; RV32-NEXT: slli a1, a1, 3 4286; RV32-NEXT: add a1, sp, a1 4287; RV32-NEXT: addi a1, a1, 48 4288; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 4289; RV32-NEXT: addi a1, sp, 16 4290; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4291; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 4292; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4293; RV32-NEXT: vlse64.v v16, (a1), zero 4294; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4295; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 4296; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 4297; RV32-NEXT: li a0, 56 4298; RV32-NEXT: csrr a1, vlenb 4299; RV32-NEXT: slli a1, a1, 3 4300; RV32-NEXT: add a1, sp, a1 4301; RV32-NEXT: addi a1, a1, 48 4302; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 4303; RV32-NEXT: vand.vv v8, v8, v24, v0.t 4304; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 4305; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 4306; RV32-NEXT: csrr a0, vlenb 4307; RV32-NEXT: slli a0, a0, 4 4308; RV32-NEXT: add sp, sp, a0 4309; RV32-NEXT: .cfi_def_cfa sp, 48 4310; RV32-NEXT: addi sp, sp, 48 4311; RV32-NEXT: .cfi_def_cfa_offset 0 4312; RV32-NEXT: ret 4313; 4314; RV64-LABEL: vp_ctlz_zero_undef_v15i64: 4315; RV64: # %bb.0: 4316; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4317; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 4318; RV64-NEXT: lui a0, 349525 4319; RV64-NEXT: lui a1, 209715 4320; RV64-NEXT: lui a2, 61681 4321; RV64-NEXT: lui a3, 4112 4322; RV64-NEXT: addiw a0, a0, 1365 4323; RV64-NEXT: addiw a1, a1, 819 4324; RV64-NEXT: addiw a2, a2, -241 4325; RV64-NEXT: addiw a3, a3, 257 4326; RV64-NEXT: slli a4, a0, 32 4327; RV64-NEXT: add a0, a0, a4 4328; RV64-NEXT: slli a4, a1, 32 4329; RV64-NEXT: add a1, a1, a4 4330; RV64-NEXT: slli a4, a2, 32 4331; RV64-NEXT: add a2, a2, a4 4332; RV64-NEXT: slli a4, a3, 32 4333; RV64-NEXT: add a3, a3, a4 4334; RV64-NEXT: li a4, 32 4335; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4336; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 4337; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4338; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 4339; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4340; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 4341; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4342; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 4343; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4344; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t 4345; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4346; RV64-NEXT: vnot.v v8, v8, v0.t 4347; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 4348; RV64-NEXT: vand.vx v16, v16, a0, v0.t 4349; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 4350; RV64-NEXT: vand.vx v16, v8, a1, v0.t 4351; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 4352; RV64-NEXT: vand.vx v8, v8, a1, v0.t 4353; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 4354; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 4355; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 4356; RV64-NEXT: vand.vx v8, v8, a2, v0.t 4357; RV64-NEXT: li a0, 56 4358; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 4359; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 4360; RV64-NEXT: ret 4361 %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) 4362 ret <15 x i64> %v 4363} 4364 4365define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { 4366; RV32-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: 4367; RV32: # %bb.0: 4368; RV32-NEXT: addi sp, sp, -32 4369; RV32-NEXT: .cfi_def_cfa_offset 32 4370; RV32-NEXT: lui a1, 349525 4371; RV32-NEXT: addi a1, a1, 1365 4372; RV32-NEXT: sw a1, 24(sp) 4373; RV32-NEXT: sw a1, 28(sp) 4374; RV32-NEXT: lui a1, 209715 4375; RV32-NEXT: addi a1, a1, 819 4376; RV32-NEXT: sw a1, 16(sp) 4377; RV32-NEXT: sw a1, 20(sp) 4378; RV32-NEXT: lui a1, 61681 4379; RV32-NEXT: addi a1, a1, -241 4380; RV32-NEXT: sw a1, 8(sp) 4381; RV32-NEXT: sw a1, 12(sp) 4382; RV32-NEXT: lui a1, 4112 4383; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4384; RV32-NEXT: vsrl.vi v0, v8, 1 4385; RV32-NEXT: addi a1, a1, 257 4386; RV32-NEXT: sw a1, 0(sp) 4387; RV32-NEXT: sw a1, 4(sp) 4388; RV32-NEXT: addi a1, sp, 24 4389; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4390; RV32-NEXT: vlse64.v v24, (a1), zero 4391; RV32-NEXT: addi a1, sp, 16 4392; RV32-NEXT: vlse64.v v16, (a1), zero 4393; RV32-NEXT: li a1, 32 4394; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4395; RV32-NEXT: vor.vv v8, v8, v0 4396; RV32-NEXT: vsrl.vi v0, v8, 2 4397; RV32-NEXT: vor.vv v8, v8, v0 4398; RV32-NEXT: vsrl.vi v0, v8, 4 4399; RV32-NEXT: vor.vv v8, v8, v0 4400; RV32-NEXT: vsrl.vi v0, v8, 8 4401; RV32-NEXT: vor.vv v8, v8, v0 4402; RV32-NEXT: vsrl.vi v0, v8, 16 4403; RV32-NEXT: vor.vv v8, v8, v0 4404; RV32-NEXT: vsrl.vx v0, v8, a1 4405; RV32-NEXT: addi a1, sp, 8 4406; RV32-NEXT: vor.vv v8, v8, v0 4407; RV32-NEXT: vnot.v v0, v8 4408; RV32-NEXT: vsrl.vi v8, v0, 1 4409; RV32-NEXT: vand.vv v24, v8, v24 4410; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4411; RV32-NEXT: vlse64.v v8, (a1), zero 4412; RV32-NEXT: mv a1, sp 4413; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4414; RV32-NEXT: vsub.vv v24, v0, v24 4415; RV32-NEXT: vand.vv v0, v24, v16 4416; RV32-NEXT: vsrl.vi v24, v24, 2 4417; RV32-NEXT: vand.vv v16, v24, v16 4418; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4419; RV32-NEXT: vlse64.v v24, (a1), zero 4420; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4421; RV32-NEXT: vadd.vv v16, v0, v16 4422; RV32-NEXT: vsrl.vi v0, v16, 4 4423; RV32-NEXT: vadd.vv v16, v16, v0 4424; RV32-NEXT: vand.vv v8, v16, v8 4425; RV32-NEXT: vmul.vv v8, v8, v24 4426; RV32-NEXT: li a0, 56 4427; RV32-NEXT: vsrl.vx v8, v8, a0 4428; RV32-NEXT: addi sp, sp, 32 4429; RV32-NEXT: .cfi_def_cfa_offset 0 4430; RV32-NEXT: ret 4431; 4432; RV64-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: 4433; RV64: # %bb.0: 4434; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4435; RV64-NEXT: vsrl.vi v16, v8, 1 4436; RV64-NEXT: lui a0, 349525 4437; RV64-NEXT: lui a1, 209715 4438; RV64-NEXT: lui a2, 61681 4439; RV64-NEXT: lui a3, 4112 4440; RV64-NEXT: addiw a0, a0, 1365 4441; RV64-NEXT: addiw a1, a1, 819 4442; RV64-NEXT: addiw a2, a2, -241 4443; RV64-NEXT: addiw a3, a3, 257 4444; RV64-NEXT: slli a4, a0, 32 4445; RV64-NEXT: add a0, a0, a4 4446; RV64-NEXT: slli a4, a1, 32 4447; RV64-NEXT: add a1, a1, a4 4448; RV64-NEXT: slli a4, a2, 32 4449; RV64-NEXT: add a2, a2, a4 4450; RV64-NEXT: slli a4, a3, 32 4451; RV64-NEXT: add a3, a3, a4 4452; RV64-NEXT: li a4, 32 4453; RV64-NEXT: vor.vv v8, v8, v16 4454; RV64-NEXT: vsrl.vi v16, v8, 2 4455; RV64-NEXT: vor.vv v8, v8, v16 4456; RV64-NEXT: vsrl.vi v16, v8, 4 4457; RV64-NEXT: vor.vv v8, v8, v16 4458; RV64-NEXT: vsrl.vi v16, v8, 8 4459; RV64-NEXT: vor.vv v8, v8, v16 4460; RV64-NEXT: vsrl.vi v16, v8, 16 4461; RV64-NEXT: vor.vv v8, v8, v16 4462; RV64-NEXT: vsrl.vx v16, v8, a4 4463; RV64-NEXT: vor.vv v8, v8, v16 4464; RV64-NEXT: vnot.v v8, v8 4465; RV64-NEXT: vsrl.vi v16, v8, 1 4466; RV64-NEXT: vand.vx v16, v16, a0 4467; RV64-NEXT: vsub.vv v8, v8, v16 4468; RV64-NEXT: vand.vx v16, v8, a1 4469; RV64-NEXT: vsrl.vi v8, v8, 2 4470; RV64-NEXT: vand.vx v8, v8, a1 4471; RV64-NEXT: vadd.vv v8, v16, v8 4472; RV64-NEXT: vsrl.vi v16, v8, 4 4473; RV64-NEXT: vadd.vv v8, v8, v16 4474; RV64-NEXT: vand.vx v8, v8, a2 4475; RV64-NEXT: vmul.vx v8, v8, a3 4476; RV64-NEXT: li a0, 56 4477; RV64-NEXT: vsrl.vx v8, v8, a0 4478; RV64-NEXT: ret 4479 %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> splat (i1 true), i32 %evl) 4480 ret <15 x i64> %v 4481} 4482 4483define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { 4484; RV32-LABEL: vp_ctlz_zero_undef_v16i64: 4485; RV32: # %bb.0: 4486; RV32-NEXT: addi sp, sp, -48 4487; RV32-NEXT: .cfi_def_cfa_offset 48 4488; RV32-NEXT: csrr a1, vlenb 4489; RV32-NEXT: slli a1, a1, 4 4490; RV32-NEXT: sub sp, sp, a1 4491; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 4492; RV32-NEXT: lui a1, 349525 4493; RV32-NEXT: addi a1, a1, 1365 4494; RV32-NEXT: sw a1, 40(sp) 4495; RV32-NEXT: sw a1, 44(sp) 4496; RV32-NEXT: lui a1, 209715 4497; RV32-NEXT: addi a1, a1, 819 4498; RV32-NEXT: sw a1, 32(sp) 4499; RV32-NEXT: sw a1, 36(sp) 4500; RV32-NEXT: lui a1, 61681 4501; RV32-NEXT: addi a1, a1, -241 4502; RV32-NEXT: sw a1, 24(sp) 4503; RV32-NEXT: sw a1, 28(sp) 4504; RV32-NEXT: lui a1, 4112 4505; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4506; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 4507; RV32-NEXT: addi a1, a1, 257 4508; RV32-NEXT: sw a1, 16(sp) 4509; RV32-NEXT: sw a1, 20(sp) 4510; RV32-NEXT: addi a1, sp, 40 4511; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4512; RV32-NEXT: vlse64.v v24, (a1), zero 4513; RV32-NEXT: csrr a1, vlenb 4514; RV32-NEXT: slli a1, a1, 3 4515; RV32-NEXT: add a1, sp, a1 4516; RV32-NEXT: addi a1, a1, 48 4517; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill 4518; RV32-NEXT: li a1, 32 4519; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4520; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4521; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 4522; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4523; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4524; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4525; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 4526; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4527; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 4528; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4529; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 4530; RV32-NEXT: addi a1, sp, 32 4531; RV32-NEXT: vor.vv v16, v8, v16, v0.t 4532; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4533; RV32-NEXT: vlse64.v v8, (a1), zero 4534; RV32-NEXT: addi a1, sp, 48 4535; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 4536; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4537; RV32-NEXT: vnot.v v16, v16, v0.t 4538; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 4539; RV32-NEXT: csrr a1, vlenb 4540; RV32-NEXT: slli a1, a1, 3 4541; RV32-NEXT: add a1, sp, a1 4542; RV32-NEXT: addi a1, a1, 48 4543; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 4544; RV32-NEXT: vand.vv v8, v8, v24, v0.t 4545; RV32-NEXT: addi a1, sp, 24 4546; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 4547; RV32-NEXT: addi a2, sp, 48 4548; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 4549; RV32-NEXT: vand.vv v16, v8, v24, v0.t 4550; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 4551; RV32-NEXT: vand.vv v24, v8, v24, v0.t 4552; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4553; RV32-NEXT: vlse64.v v8, (a1), zero 4554; RV32-NEXT: csrr a1, vlenb 4555; RV32-NEXT: slli a1, a1, 3 4556; RV32-NEXT: add a1, sp, a1 4557; RV32-NEXT: addi a1, a1, 48 4558; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 4559; RV32-NEXT: addi a1, sp, 16 4560; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4561; RV32-NEXT: vadd.vv v24, v16, v24, v0.t 4562; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4563; RV32-NEXT: vlse64.v v16, (a1), zero 4564; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4565; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 4566; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 4567; RV32-NEXT: li a0, 56 4568; RV32-NEXT: csrr a1, vlenb 4569; RV32-NEXT: slli a1, a1, 3 4570; RV32-NEXT: add a1, sp, a1 4571; RV32-NEXT: addi a1, a1, 48 4572; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 4573; RV32-NEXT: vand.vv v8, v8, v24, v0.t 4574; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 4575; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 4576; RV32-NEXT: csrr a0, vlenb 4577; RV32-NEXT: slli a0, a0, 4 4578; RV32-NEXT: add sp, sp, a0 4579; RV32-NEXT: .cfi_def_cfa sp, 48 4580; RV32-NEXT: addi sp, sp, 48 4581; RV32-NEXT: .cfi_def_cfa_offset 0 4582; RV32-NEXT: ret 4583; 4584; RV64-LABEL: vp_ctlz_zero_undef_v16i64: 4585; RV64: # %bb.0: 4586; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4587; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 4588; RV64-NEXT: lui a0, 349525 4589; RV64-NEXT: lui a1, 209715 4590; RV64-NEXT: lui a2, 61681 4591; RV64-NEXT: lui a3, 4112 4592; RV64-NEXT: addiw a0, a0, 1365 4593; RV64-NEXT: addiw a1, a1, 819 4594; RV64-NEXT: addiw a2, a2, -241 4595; RV64-NEXT: addiw a3, a3, 257 4596; RV64-NEXT: slli a4, a0, 32 4597; RV64-NEXT: add a0, a0, a4 4598; RV64-NEXT: slli a4, a1, 32 4599; RV64-NEXT: add a1, a1, a4 4600; RV64-NEXT: slli a4, a2, 32 4601; RV64-NEXT: add a2, a2, a4 4602; RV64-NEXT: slli a4, a3, 32 4603; RV64-NEXT: add a3, a3, a4 4604; RV64-NEXT: li a4, 32 4605; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4606; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 4607; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4608; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 4609; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4610; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 4611; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4612; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 4613; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4614; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t 4615; RV64-NEXT: vor.vv v8, v8, v16, v0.t 4616; RV64-NEXT: vnot.v v8, v8, v0.t 4617; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 4618; RV64-NEXT: vand.vx v16, v16, a0, v0.t 4619; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 4620; RV64-NEXT: vand.vx v16, v8, a1, v0.t 4621; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 4622; RV64-NEXT: vand.vx v8, v8, a1, v0.t 4623; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 4624; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 4625; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 4626; RV64-NEXT: vand.vx v8, v8, a2, v0.t 4627; RV64-NEXT: li a0, 56 4628; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 4629; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 4630; RV64-NEXT: ret 4631 %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) 4632 ret <16 x i64> %v 4633} 4634 4635define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { 4636; RV32-LABEL: vp_ctlz_zero_undef_v16i64_unmasked: 4637; RV32: # %bb.0: 4638; RV32-NEXT: addi sp, sp, -32 4639; RV32-NEXT: .cfi_def_cfa_offset 32 4640; RV32-NEXT: lui a1, 349525 4641; RV32-NEXT: addi a1, a1, 1365 4642; RV32-NEXT: sw a1, 24(sp) 4643; RV32-NEXT: sw a1, 28(sp) 4644; RV32-NEXT: lui a1, 209715 4645; RV32-NEXT: addi a1, a1, 819 4646; RV32-NEXT: sw a1, 16(sp) 4647; RV32-NEXT: sw a1, 20(sp) 4648; RV32-NEXT: lui a1, 61681 4649; RV32-NEXT: addi a1, a1, -241 4650; RV32-NEXT: sw a1, 8(sp) 4651; RV32-NEXT: sw a1, 12(sp) 4652; RV32-NEXT: lui a1, 4112 4653; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4654; RV32-NEXT: vsrl.vi v0, v8, 1 4655; RV32-NEXT: addi a1, a1, 257 4656; RV32-NEXT: sw a1, 0(sp) 4657; RV32-NEXT: sw a1, 4(sp) 4658; RV32-NEXT: addi a1, sp, 24 4659; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4660; RV32-NEXT: vlse64.v v24, (a1), zero 4661; RV32-NEXT: addi a1, sp, 16 4662; RV32-NEXT: vlse64.v v16, (a1), zero 4663; RV32-NEXT: li a1, 32 4664; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4665; RV32-NEXT: vor.vv v8, v8, v0 4666; RV32-NEXT: vsrl.vi v0, v8, 2 4667; RV32-NEXT: vor.vv v8, v8, v0 4668; RV32-NEXT: vsrl.vi v0, v8, 4 4669; RV32-NEXT: vor.vv v8, v8, v0 4670; RV32-NEXT: vsrl.vi v0, v8, 8 4671; RV32-NEXT: vor.vv v8, v8, v0 4672; RV32-NEXT: vsrl.vi v0, v8, 16 4673; RV32-NEXT: vor.vv v8, v8, v0 4674; RV32-NEXT: vsrl.vx v0, v8, a1 4675; RV32-NEXT: addi a1, sp, 8 4676; RV32-NEXT: vor.vv v8, v8, v0 4677; RV32-NEXT: vnot.v v0, v8 4678; RV32-NEXT: vsrl.vi v8, v0, 1 4679; RV32-NEXT: vand.vv v24, v8, v24 4680; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4681; RV32-NEXT: vlse64.v v8, (a1), zero 4682; RV32-NEXT: mv a1, sp 4683; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4684; RV32-NEXT: vsub.vv v24, v0, v24 4685; RV32-NEXT: vand.vv v0, v24, v16 4686; RV32-NEXT: vsrl.vi v24, v24, 2 4687; RV32-NEXT: vand.vv v16, v24, v16 4688; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4689; RV32-NEXT: vlse64.v v24, (a1), zero 4690; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4691; RV32-NEXT: vadd.vv v16, v0, v16 4692; RV32-NEXT: vsrl.vi v0, v16, 4 4693; RV32-NEXT: vadd.vv v16, v16, v0 4694; RV32-NEXT: vand.vv v8, v16, v8 4695; RV32-NEXT: vmul.vv v8, v8, v24 4696; RV32-NEXT: li a0, 56 4697; RV32-NEXT: vsrl.vx v8, v8, a0 4698; RV32-NEXT: addi sp, sp, 32 4699; RV32-NEXT: .cfi_def_cfa_offset 0 4700; RV32-NEXT: ret 4701; 4702; RV64-LABEL: vp_ctlz_zero_undef_v16i64_unmasked: 4703; RV64: # %bb.0: 4704; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4705; RV64-NEXT: vsrl.vi v16, v8, 1 4706; RV64-NEXT: lui a0, 349525 4707; RV64-NEXT: lui a1, 209715 4708; RV64-NEXT: lui a2, 61681 4709; RV64-NEXT: lui a3, 4112 4710; RV64-NEXT: addiw a0, a0, 1365 4711; RV64-NEXT: addiw a1, a1, 819 4712; RV64-NEXT: addiw a2, a2, -241 4713; RV64-NEXT: addiw a3, a3, 257 4714; RV64-NEXT: slli a4, a0, 32 4715; RV64-NEXT: add a0, a0, a4 4716; RV64-NEXT: slli a4, a1, 32 4717; RV64-NEXT: add a1, a1, a4 4718; RV64-NEXT: slli a4, a2, 32 4719; RV64-NEXT: add a2, a2, a4 4720; RV64-NEXT: slli a4, a3, 32 4721; RV64-NEXT: add a3, a3, a4 4722; RV64-NEXT: li a4, 32 4723; RV64-NEXT: vor.vv v8, v8, v16 4724; RV64-NEXT: vsrl.vi v16, v8, 2 4725; RV64-NEXT: vor.vv v8, v8, v16 4726; RV64-NEXT: vsrl.vi v16, v8, 4 4727; RV64-NEXT: vor.vv v8, v8, v16 4728; RV64-NEXT: vsrl.vi v16, v8, 8 4729; RV64-NEXT: vor.vv v8, v8, v16 4730; RV64-NEXT: vsrl.vi v16, v8, 16 4731; RV64-NEXT: vor.vv v8, v8, v16 4732; RV64-NEXT: vsrl.vx v16, v8, a4 4733; RV64-NEXT: vor.vv v8, v8, v16 4734; RV64-NEXT: vnot.v v8, v8 4735; RV64-NEXT: vsrl.vi v16, v8, 1 4736; RV64-NEXT: vand.vx v16, v16, a0 4737; RV64-NEXT: vsub.vv v8, v8, v16 4738; RV64-NEXT: vand.vx v16, v8, a1 4739; RV64-NEXT: vsrl.vi v8, v8, 2 4740; RV64-NEXT: vand.vx v8, v8, a1 4741; RV64-NEXT: vadd.vv v8, v16, v8 4742; RV64-NEXT: vsrl.vi v16, v8, 4 4743; RV64-NEXT: vadd.vv v8, v8, v16 4744; RV64-NEXT: vand.vx v8, v8, a2 4745; RV64-NEXT: vmul.vx v8, v8, a3 4746; RV64-NEXT: li a0, 56 4747; RV64-NEXT: vsrl.vx v8, v8, a0 4748; RV64-NEXT: ret 4749 %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) 4750 ret <16 x i64> %v 4751} 4752 4753define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { 4754; RV32-LABEL: vp_ctlz_zero_undef_v32i64: 4755; RV32: # %bb.0: 4756; RV32-NEXT: addi sp, sp, -48 4757; RV32-NEXT: .cfi_def_cfa_offset 48 4758; RV32-NEXT: csrr a1, vlenb 4759; RV32-NEXT: li a2, 56 4760; RV32-NEXT: mul a1, a1, a2 4761; RV32-NEXT: sub sp, sp, a1 4762; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb 4763; RV32-NEXT: csrr a1, vlenb 4764; RV32-NEXT: slli a1, a1, 4 4765; RV32-NEXT: add a1, sp, a1 4766; RV32-NEXT: addi a1, a1, 48 4767; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 4768; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 4769; RV32-NEXT: vslidedown.vi v24, v0, 2 4770; RV32-NEXT: lui a1, 349525 4771; RV32-NEXT: lui a2, 209715 4772; RV32-NEXT: addi a1, a1, 1365 4773; RV32-NEXT: sw a1, 40(sp) 4774; RV32-NEXT: sw a1, 44(sp) 4775; RV32-NEXT: lui a1, 61681 4776; RV32-NEXT: addi a2, a2, 819 4777; RV32-NEXT: sw a2, 32(sp) 4778; RV32-NEXT: sw a2, 36(sp) 4779; RV32-NEXT: lui a2, 4112 4780; RV32-NEXT: addi a1, a1, -241 4781; RV32-NEXT: sw a1, 24(sp) 4782; RV32-NEXT: sw a1, 28(sp) 4783; RV32-NEXT: li a1, 16 4784; RV32-NEXT: addi a2, a2, 257 4785; RV32-NEXT: sw a2, 16(sp) 4786; RV32-NEXT: sw a2, 20(sp) 4787; RV32-NEXT: mv a2, a0 4788; RV32-NEXT: bltu a0, a1, .LBB70_2 4789; RV32-NEXT: # %bb.1: 4790; RV32-NEXT: li a2, 16 4791; RV32-NEXT: .LBB70_2: 4792; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4793; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 4794; RV32-NEXT: li a1, 32 4795; RV32-NEXT: addi a3, sp, 40 4796; RV32-NEXT: addi a4, sp, 32 4797; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4798; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 4799; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4800; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4801; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4802; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 4803; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4804; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 4805; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4806; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 4807; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4808; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4809; RV32-NEXT: vlse64.v v16, (a3), zero 4810; RV32-NEXT: csrr a3, vlenb 4811; RV32-NEXT: li a5, 40 4812; RV32-NEXT: mul a3, a3, a5 4813; RV32-NEXT: add a3, sp, a3 4814; RV32-NEXT: addi a3, a3, 48 4815; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 4816; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4817; RV32-NEXT: vnot.v v16, v8, v0.t 4818; RV32-NEXT: csrr a3, vlenb 4819; RV32-NEXT: slli a3, a3, 5 4820; RV32-NEXT: add a3, sp, a3 4821; RV32-NEXT: addi a3, a3, 48 4822; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 4823; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4824; RV32-NEXT: vlse64.v v8, (a4), zero 4825; RV32-NEXT: csrr a3, vlenb 4826; RV32-NEXT: li a4, 48 4827; RV32-NEXT: mul a3, a3, a4 4828; RV32-NEXT: add a3, sp, a3 4829; RV32-NEXT: addi a3, a3, 48 4830; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4831; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4832; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 4833; RV32-NEXT: csrr a3, vlenb 4834; RV32-NEXT: li a4, 24 4835; RV32-NEXT: mul a3, a3, a4 4836; RV32-NEXT: add a3, sp, a3 4837; RV32-NEXT: addi a3, a3, 48 4838; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4839; RV32-NEXT: csrr a3, vlenb 4840; RV32-NEXT: li a4, 40 4841; RV32-NEXT: mul a3, a3, a4 4842; RV32-NEXT: add a3, sp, a3 4843; RV32-NEXT: addi a3, a3, 48 4844; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4845; RV32-NEXT: csrr a3, vlenb 4846; RV32-NEXT: li a4, 24 4847; RV32-NEXT: mul a3, a3, a4 4848; RV32-NEXT: add a3, sp, a3 4849; RV32-NEXT: addi a3, a3, 48 4850; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 4851; RV32-NEXT: vand.vv v8, v8, v16, v0.t 4852; RV32-NEXT: csrr a3, vlenb 4853; RV32-NEXT: slli a3, a3, 5 4854; RV32-NEXT: add a3, sp, a3 4855; RV32-NEXT: addi a3, a3, 48 4856; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4857; RV32-NEXT: vsub.vv v16, v16, v8, v0.t 4858; RV32-NEXT: csrr a3, vlenb 4859; RV32-NEXT: slli a3, a3, 5 4860; RV32-NEXT: add a3, sp, a3 4861; RV32-NEXT: addi a3, a3, 48 4862; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 4863; RV32-NEXT: csrr a3, vlenb 4864; RV32-NEXT: slli a3, a3, 5 4865; RV32-NEXT: add a3, sp, a3 4866; RV32-NEXT: addi a3, a3, 48 4867; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4868; RV32-NEXT: csrr a3, vlenb 4869; RV32-NEXT: li a4, 48 4870; RV32-NEXT: mul a3, a3, a4 4871; RV32-NEXT: add a3, sp, a3 4872; RV32-NEXT: addi a3, a3, 48 4873; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 4874; RV32-NEXT: vand.vv v16, v16, v8, v0.t 4875; RV32-NEXT: csrr a3, vlenb 4876; RV32-NEXT: li a4, 24 4877; RV32-NEXT: mul a3, a3, a4 4878; RV32-NEXT: add a3, sp, a3 4879; RV32-NEXT: addi a3, a3, 48 4880; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 4881; RV32-NEXT: csrr a3, vlenb 4882; RV32-NEXT: slli a3, a3, 5 4883; RV32-NEXT: add a3, sp, a3 4884; RV32-NEXT: addi a3, a3, 48 4885; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4886; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t 4887; RV32-NEXT: vand.vv v16, v16, v8, v0.t 4888; RV32-NEXT: csrr a3, vlenb 4889; RV32-NEXT: li a4, 24 4890; RV32-NEXT: mul a3, a3, a4 4891; RV32-NEXT: add a3, sp, a3 4892; RV32-NEXT: addi a3, a3, 48 4893; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 4894; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 4895; RV32-NEXT: csrr a3, vlenb 4896; RV32-NEXT: slli a3, a3, 3 4897; RV32-NEXT: add a3, sp, a3 4898; RV32-NEXT: addi a3, a3, 48 4899; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4900; RV32-NEXT: addi a3, sp, 24 4901; RV32-NEXT: addi a4, sp, 16 4902; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 4903; RV32-NEXT: vlse64.v v8, (a3), zero 4904; RV32-NEXT: csrr a3, vlenb 4905; RV32-NEXT: li a5, 24 4906; RV32-NEXT: mul a3, a3, a5 4907; RV32-NEXT: add a3, sp, a3 4908; RV32-NEXT: addi a3, a3, 48 4909; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4910; RV32-NEXT: vlse64.v v8, (a4), zero 4911; RV32-NEXT: csrr a3, vlenb 4912; RV32-NEXT: slli a3, a3, 5 4913; RV32-NEXT: add a3, sp, a3 4914; RV32-NEXT: addi a3, a3, 48 4915; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4916; RV32-NEXT: csrr a3, vlenb 4917; RV32-NEXT: slli a3, a3, 3 4918; RV32-NEXT: add a3, sp, a3 4919; RV32-NEXT: addi a3, a3, 48 4920; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 4921; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 4922; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4923; RV32-NEXT: addi a2, sp, 48 4924; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 4925; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 4926; RV32-NEXT: vadd.vv v16, v8, v16, v0.t 4927; RV32-NEXT: csrr a2, vlenb 4928; RV32-NEXT: li a3, 24 4929; RV32-NEXT: mul a2, a2, a3 4930; RV32-NEXT: add a2, sp, a2 4931; RV32-NEXT: addi a2, a2, 48 4932; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 4933; RV32-NEXT: vand.vv v16, v16, v8, v0.t 4934; RV32-NEXT: csrr a2, vlenb 4935; RV32-NEXT: slli a2, a2, 5 4936; RV32-NEXT: add a2, sp, a2 4937; RV32-NEXT: addi a2, a2, 48 4938; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 4939; RV32-NEXT: vmul.vv v8, v16, v8, v0.t 4940; RV32-NEXT: li a2, 56 4941; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t 4942; RV32-NEXT: csrr a3, vlenb 4943; RV32-NEXT: slli a3, a3, 3 4944; RV32-NEXT: add a3, sp, a3 4945; RV32-NEXT: addi a3, a3, 48 4946; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 4947; RV32-NEXT: addi a3, a0, -16 4948; RV32-NEXT: sltu a0, a0, a3 4949; RV32-NEXT: addi a0, a0, -1 4950; RV32-NEXT: and a0, a0, a3 4951; RV32-NEXT: vmv1r.v v0, v24 4952; RV32-NEXT: csrr a3, vlenb 4953; RV32-NEXT: slli a3, a3, 4 4954; RV32-NEXT: add a3, sp, a3 4955; RV32-NEXT: addi a3, a3, 48 4956; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload 4957; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 4958; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 4959; RV32-NEXT: vor.vv v8, v16, v8, v0.t 4960; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t 4961; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4962; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 4963; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4964; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t 4965; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4966; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t 4967; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4968; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 4969; RV32-NEXT: vor.vv v8, v8, v16, v0.t 4970; RV32-NEXT: vnot.v v8, v8, v0.t 4971; RV32-NEXT: addi a0, sp, 48 4972; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 4973; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t 4974; RV32-NEXT: csrr a0, vlenb 4975; RV32-NEXT: slli a0, a0, 4 4976; RV32-NEXT: add a0, sp, a0 4977; RV32-NEXT: addi a0, a0, 48 4978; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 4979; RV32-NEXT: csrr a0, vlenb 4980; RV32-NEXT: li a1, 40 4981; RV32-NEXT: mul a0, a0, a1 4982; RV32-NEXT: add a0, sp, a0 4983; RV32-NEXT: addi a0, a0, 48 4984; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 4985; RV32-NEXT: csrr a0, vlenb 4986; RV32-NEXT: slli a0, a0, 4 4987; RV32-NEXT: add a0, sp, a0 4988; RV32-NEXT: addi a0, a0, 48 4989; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 4990; RV32-NEXT: vand.vv v16, v8, v16, v0.t 4991; RV32-NEXT: addi a0, sp, 48 4992; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 4993; RV32-NEXT: vsub.vv v8, v8, v16, v0.t 4994; RV32-NEXT: csrr a0, vlenb 4995; RV32-NEXT: li a1, 40 4996; RV32-NEXT: mul a0, a0, a1 4997; RV32-NEXT: add a0, sp, a0 4998; RV32-NEXT: addi a0, a0, 48 4999; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 5000; RV32-NEXT: csrr a0, vlenb 5001; RV32-NEXT: li a1, 48 5002; RV32-NEXT: mul a0, a0, a1 5003; RV32-NEXT: add a0, sp, a0 5004; RV32-NEXT: addi a0, a0, 48 5005; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 5006; RV32-NEXT: csrr a0, vlenb 5007; RV32-NEXT: li a1, 40 5008; RV32-NEXT: mul a0, a0, a1 5009; RV32-NEXT: add a0, sp, a0 5010; RV32-NEXT: addi a0, a0, 48 5011; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 5012; RV32-NEXT: vand.vv v16, v16, v8, v0.t 5013; RV32-NEXT: csrr a0, vlenb 5014; RV32-NEXT: slli a0, a0, 4 5015; RV32-NEXT: add a0, sp, a0 5016; RV32-NEXT: addi a0, a0, 48 5017; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 5018; RV32-NEXT: csrr a0, vlenb 5019; RV32-NEXT: li a1, 40 5020; RV32-NEXT: mul a0, a0, a1 5021; RV32-NEXT: add a0, sp, a0 5022; RV32-NEXT: addi a0, a0, 48 5023; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 5024; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 5025; RV32-NEXT: csrr a0, vlenb 5026; RV32-NEXT: li a1, 48 5027; RV32-NEXT: mul a0, a0, a1 5028; RV32-NEXT: add a0, sp, a0 5029; RV32-NEXT: addi a0, a0, 48 5030; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 5031; RV32-NEXT: vand.vv v8, v8, v16, v0.t 5032; RV32-NEXT: csrr a0, vlenb 5033; RV32-NEXT: slli a0, a0, 4 5034; RV32-NEXT: add a0, sp, a0 5035; RV32-NEXT: addi a0, a0, 48 5036; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 5037; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 5038; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 5039; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 5040; RV32-NEXT: csrr a0, vlenb 5041; RV32-NEXT: li a1, 24 5042; RV32-NEXT: mul a0, a0, a1 5043; RV32-NEXT: add a0, sp, a0 5044; RV32-NEXT: addi a0, a0, 48 5045; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 5046; RV32-NEXT: vand.vv v8, v8, v16, v0.t 5047; RV32-NEXT: csrr a0, vlenb 5048; RV32-NEXT: slli a0, a0, 5 5049; RV32-NEXT: add a0, sp, a0 5050; RV32-NEXT: addi a0, a0, 48 5051; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 5052; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 5053; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t 5054; RV32-NEXT: csrr a0, vlenb 5055; RV32-NEXT: slli a0, a0, 3 5056; RV32-NEXT: add a0, sp, a0 5057; RV32-NEXT: addi a0, a0, 48 5058; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 5059; RV32-NEXT: csrr a0, vlenb 5060; RV32-NEXT: li a1, 56 5061; RV32-NEXT: mul a0, a0, a1 5062; RV32-NEXT: add sp, sp, a0 5063; RV32-NEXT: .cfi_def_cfa sp, 48 5064; RV32-NEXT: addi sp, sp, 48 5065; RV32-NEXT: .cfi_def_cfa_offset 0 5066; RV32-NEXT: ret 5067; 5068; RV64-LABEL: vp_ctlz_zero_undef_v32i64: 5069; RV64: # %bb.0: 5070; RV64-NEXT: addi sp, sp, -16 5071; RV64-NEXT: .cfi_def_cfa_offset 16 5072; RV64-NEXT: csrr a1, vlenb 5073; RV64-NEXT: slli a1, a1, 4 5074; RV64-NEXT: sub sp, sp, a1 5075; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 5076; RV64-NEXT: csrr a1, vlenb 5077; RV64-NEXT: slli a1, a1, 3 5078; RV64-NEXT: add a1, sp, a1 5079; RV64-NEXT: addi a1, a1, 16 5080; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 5081; RV64-NEXT: li a2, 16 5082; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 5083; RV64-NEXT: vslidedown.vi v24, v0, 2 5084; RV64-NEXT: mv a1, a0 5085; RV64-NEXT: bltu a0, a2, .LBB70_2 5086; RV64-NEXT: # %bb.1: 5087; RV64-NEXT: li a1, 16 5088; RV64-NEXT: .LBB70_2: 5089; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5090; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 5091; RV64-NEXT: li a1, 32 5092; RV64-NEXT: lui a2, 349525 5093; RV64-NEXT: lui a3, 209715 5094; RV64-NEXT: lui a4, 61681 5095; RV64-NEXT: lui a5, 4112 5096; RV64-NEXT: addiw a2, a2, 1365 5097; RV64-NEXT: addiw a3, a3, 819 5098; RV64-NEXT: addiw a6, a4, -241 5099; RV64-NEXT: addiw a7, a5, 257 5100; RV64-NEXT: slli a5, a2, 32 5101; RV64-NEXT: add a5, a2, a5 5102; RV64-NEXT: slli a4, a3, 32 5103; RV64-NEXT: add a4, a3, a4 5104; RV64-NEXT: slli a2, a6, 32 5105; RV64-NEXT: add a2, a6, a2 5106; RV64-NEXT: slli a3, a7, 32 5107; RV64-NEXT: add a3, a7, a3 5108; RV64-NEXT: addi a6, a0, -16 5109; RV64-NEXT: sltu a0, a0, a6 5110; RV64-NEXT: addi a0, a0, -1 5111; RV64-NEXT: and a6, a0, a6 5112; RV64-NEXT: li a0, 56 5113; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5114; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t 5115; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5116; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 5117; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5118; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 5119; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5120; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 5121; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5122; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t 5123; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5124; RV64-NEXT: vnot.v v8, v8, v0.t 5125; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 5126; RV64-NEXT: vand.vx v16, v16, a5, v0.t 5127; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 5128; RV64-NEXT: vand.vx v16, v8, a4, v0.t 5129; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 5130; RV64-NEXT: vand.vx v8, v8, a4, v0.t 5131; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 5132; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 5133; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 5134; RV64-NEXT: vand.vx v8, v8, a2, v0.t 5135; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 5136; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 5137; RV64-NEXT: addi a7, sp, 16 5138; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill 5139; RV64-NEXT: vmv1r.v v0, v24 5140; RV64-NEXT: csrr a7, vlenb 5141; RV64-NEXT: slli a7, a7, 3 5142; RV64-NEXT: add a7, sp, a7 5143; RV64-NEXT: addi a7, a7, 16 5144; RV64-NEXT: vl8r.v v8, (a7) # Unknown-size Folded Reload 5145; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5146; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 5147; RV64-NEXT: vor.vv v16, v8, v16, v0.t 5148; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t 5149; RV64-NEXT: vor.vv v8, v16, v8, v0.t 5150; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 5151; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5152; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t 5153; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5154; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t 5155; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5156; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t 5157; RV64-NEXT: vor.vv v8, v8, v16, v0.t 5158; RV64-NEXT: vnot.v v8, v8, v0.t 5159; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 5160; RV64-NEXT: vand.vx v16, v16, a5, v0.t 5161; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 5162; RV64-NEXT: vand.vx v16, v8, a4, v0.t 5163; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 5164; RV64-NEXT: vand.vx v8, v8, a4, v0.t 5165; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 5166; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 5167; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 5168; RV64-NEXT: vand.vx v8, v8, a2, v0.t 5169; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 5170; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t 5171; RV64-NEXT: addi a0, sp, 16 5172; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 5173; RV64-NEXT: csrr a0, vlenb 5174; RV64-NEXT: slli a0, a0, 4 5175; RV64-NEXT: add sp, sp, a0 5176; RV64-NEXT: .cfi_def_cfa sp, 16 5177; RV64-NEXT: addi sp, sp, 16 5178; RV64-NEXT: .cfi_def_cfa_offset 0 5179; RV64-NEXT: ret 5180 %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) 5181 ret <32 x i64> %v 5182} 5183 5184define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { 5185; RV32-LABEL: vp_ctlz_zero_undef_v32i64_unmasked: 5186; RV32: # %bb.0: 5187; RV32-NEXT: addi sp, sp, -48 5188; RV32-NEXT: .cfi_def_cfa_offset 48 5189; RV32-NEXT: csrr a1, vlenb 5190; RV32-NEXT: slli a1, a1, 4 5191; RV32-NEXT: sub sp, sp, a1 5192; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb 5193; RV32-NEXT: lui a1, 349525 5194; RV32-NEXT: lui a2, 209715 5195; RV32-NEXT: addi a1, a1, 1365 5196; RV32-NEXT: sw a1, 40(sp) 5197; RV32-NEXT: sw a1, 44(sp) 5198; RV32-NEXT: lui a1, 61681 5199; RV32-NEXT: addi a2, a2, 819 5200; RV32-NEXT: sw a2, 32(sp) 5201; RV32-NEXT: sw a2, 36(sp) 5202; RV32-NEXT: lui a2, 4112 5203; RV32-NEXT: addi a1, a1, -241 5204; RV32-NEXT: sw a1, 24(sp) 5205; RV32-NEXT: sw a1, 28(sp) 5206; RV32-NEXT: li a3, 16 5207; RV32-NEXT: addi a1, a2, 257 5208; RV32-NEXT: sw a1, 16(sp) 5209; RV32-NEXT: sw a1, 20(sp) 5210; RV32-NEXT: mv a1, a0 5211; RV32-NEXT: bltu a0, a3, .LBB71_2 5212; RV32-NEXT: # %bb.1: 5213; RV32-NEXT: li a1, 16 5214; RV32-NEXT: .LBB71_2: 5215; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5216; RV32-NEXT: vsrl.vi v0, v8, 1 5217; RV32-NEXT: li a2, 32 5218; RV32-NEXT: addi a3, sp, 40 5219; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 5220; RV32-NEXT: vlse64.v v24, (a3), zero 5221; RV32-NEXT: csrr a3, vlenb 5222; RV32-NEXT: slli a3, a3, 3 5223; RV32-NEXT: add a3, sp, a3 5224; RV32-NEXT: addi a3, a3, 48 5225; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 5226; RV32-NEXT: addi a3, a0, -16 5227; RV32-NEXT: sltu a0, a0, a3 5228; RV32-NEXT: addi a0, a0, -1 5229; RV32-NEXT: and a0, a0, a3 5230; RV32-NEXT: addi a3, sp, 32 5231; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5232; RV32-NEXT: vor.vv v8, v8, v0 5233; RV32-NEXT: vsrl.vi v0, v8, 2 5234; RV32-NEXT: vor.vv v8, v8, v0 5235; RV32-NEXT: vsrl.vi v0, v8, 4 5236; RV32-NEXT: vor.vv v8, v8, v0 5237; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5238; RV32-NEXT: vsrl.vi v0, v16, 1 5239; RV32-NEXT: vor.vv v16, v16, v0 5240; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5241; RV32-NEXT: vsrl.vi v0, v8, 8 5242; RV32-NEXT: vor.vv v8, v8, v0 5243; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5244; RV32-NEXT: vsrl.vi v0, v16, 2 5245; RV32-NEXT: vor.vv v16, v16, v0 5246; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5247; RV32-NEXT: vsrl.vi v0, v8, 16 5248; RV32-NEXT: vor.vv v8, v8, v0 5249; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5250; RV32-NEXT: vsrl.vi v0, v16, 4 5251; RV32-NEXT: vor.vv v16, v16, v0 5252; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5253; RV32-NEXT: vsrl.vx v0, v8, a2 5254; RV32-NEXT: vor.vv v8, v8, v0 5255; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5256; RV32-NEXT: vsrl.vi v0, v16, 8 5257; RV32-NEXT: vor.vv v16, v16, v0 5258; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5259; RV32-NEXT: vnot.v v0, v8 5260; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5261; RV32-NEXT: vsrl.vi v8, v16, 16 5262; RV32-NEXT: vor.vv v16, v16, v8 5263; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5264; RV32-NEXT: vsrl.vi v8, v0, 1 5265; RV32-NEXT: csrr a4, vlenb 5266; RV32-NEXT: slli a4, a4, 3 5267; RV32-NEXT: add a4, sp, a4 5268; RV32-NEXT: addi a4, a4, 48 5269; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 5270; RV32-NEXT: vand.vv v24, v8, v24 5271; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 5272; RV32-NEXT: vlse64.v v8, (a3), zero 5273; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5274; RV32-NEXT: vsub.vv v24, v0, v24 5275; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5276; RV32-NEXT: vsrl.vx v0, v16, a2 5277; RV32-NEXT: vor.vv v16, v16, v0 5278; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5279; RV32-NEXT: vand.vv v0, v24, v8 5280; RV32-NEXT: vsrl.vi v24, v24, 2 5281; RV32-NEXT: vand.vv v24, v24, v8 5282; RV32-NEXT: vadd.vv v24, v0, v24 5283; RV32-NEXT: addi a2, sp, 48 5284; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 5285; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5286; RV32-NEXT: vnot.v v16, v16 5287; RV32-NEXT: vsrl.vi v0, v16, 1 5288; RV32-NEXT: csrr a2, vlenb 5289; RV32-NEXT: slli a2, a2, 3 5290; RV32-NEXT: add a2, sp, a2 5291; RV32-NEXT: addi a2, a2, 48 5292; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 5293; RV32-NEXT: vand.vv v0, v0, v24 5294; RV32-NEXT: addi a2, sp, 24 5295; RV32-NEXT: addi a3, sp, 16 5296; RV32-NEXT: vsub.vv v0, v16, v0 5297; RV32-NEXT: addi a4, sp, 48 5298; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload 5299; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5300; RV32-NEXT: vsrl.vi v16, v24, 4 5301; RV32-NEXT: vadd.vv v16, v24, v16 5302; RV32-NEXT: csrr a4, vlenb 5303; RV32-NEXT: slli a4, a4, 3 5304; RV32-NEXT: add a4, sp, a4 5305; RV32-NEXT: addi a4, a4, 48 5306; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill 5307; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5308; RV32-NEXT: vand.vv v24, v0, v8 5309; RV32-NEXT: vsrl.vi v0, v0, 2 5310; RV32-NEXT: vand.vv v8, v0, v8 5311; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 5312; RV32-NEXT: vlse64.v v0, (a2), zero 5313; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5314; RV32-NEXT: vadd.vv v8, v24, v8 5315; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 5316; RV32-NEXT: vlse64.v v24, (a3), zero 5317; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5318; RV32-NEXT: vsrl.vi v16, v8, 4 5319; RV32-NEXT: vadd.vv v8, v8, v16 5320; RV32-NEXT: csrr a2, vlenb 5321; RV32-NEXT: slli a2, a2, 3 5322; RV32-NEXT: add a2, sp, a2 5323; RV32-NEXT: addi a2, a2, 48 5324; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 5325; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5326; RV32-NEXT: vand.vv v16, v16, v0 5327; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5328; RV32-NEXT: vand.vv v8, v8, v0 5329; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5330; RV32-NEXT: vmul.vv v16, v16, v24 5331; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5332; RV32-NEXT: vmul.vv v24, v8, v24 5333; RV32-NEXT: li a2, 56 5334; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5335; RV32-NEXT: vsrl.vx v8, v16, a2 5336; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 5337; RV32-NEXT: vsrl.vx v16, v24, a2 5338; RV32-NEXT: csrr a0, vlenb 5339; RV32-NEXT: slli a0, a0, 4 5340; RV32-NEXT: add sp, sp, a0 5341; RV32-NEXT: .cfi_def_cfa sp, 48 5342; RV32-NEXT: addi sp, sp, 48 5343; RV32-NEXT: .cfi_def_cfa_offset 0 5344; RV32-NEXT: ret 5345; 5346; RV64-LABEL: vp_ctlz_zero_undef_v32i64_unmasked: 5347; RV64: # %bb.0: 5348; RV64-NEXT: li a2, 16 5349; RV64-NEXT: mv a1, a0 5350; RV64-NEXT: bltu a0, a2, .LBB71_2 5351; RV64-NEXT: # %bb.1: 5352; RV64-NEXT: li a1, 16 5353; RV64-NEXT: .LBB71_2: 5354; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5355; RV64-NEXT: vsrl.vi v24, v8, 1 5356; RV64-NEXT: li a2, 32 5357; RV64-NEXT: lui a3, 349525 5358; RV64-NEXT: lui a4, 209715 5359; RV64-NEXT: lui a5, 61681 5360; RV64-NEXT: lui a6, 4112 5361; RV64-NEXT: addiw a7, a3, 1365 5362; RV64-NEXT: addiw a3, a4, 819 5363; RV64-NEXT: addiw a4, a5, -241 5364; RV64-NEXT: addiw a6, a6, 257 5365; RV64-NEXT: slli a5, a7, 32 5366; RV64-NEXT: add a7, a7, a5 5367; RV64-NEXT: slli a5, a3, 32 5368; RV64-NEXT: add a5, a3, a5 5369; RV64-NEXT: slli a3, a4, 32 5370; RV64-NEXT: add a3, a4, a3 5371; RV64-NEXT: slli a4, a6, 32 5372; RV64-NEXT: add a4, a6, a4 5373; RV64-NEXT: addi a6, a0, -16 5374; RV64-NEXT: sltu a0, a0, a6 5375; RV64-NEXT: addi a0, a0, -1 5376; RV64-NEXT: and a6, a0, a6 5377; RV64-NEXT: li a0, 56 5378; RV64-NEXT: vor.vv v8, v8, v24 5379; RV64-NEXT: vsrl.vi v24, v8, 2 5380; RV64-NEXT: vor.vv v8, v8, v24 5381; RV64-NEXT: vsrl.vi v24, v8, 4 5382; RV64-NEXT: vor.vv v8, v8, v24 5383; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5384; RV64-NEXT: vsrl.vi v24, v16, 1 5385; RV64-NEXT: vor.vv v16, v16, v24 5386; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5387; RV64-NEXT: vsrl.vi v24, v8, 8 5388; RV64-NEXT: vor.vv v8, v8, v24 5389; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5390; RV64-NEXT: vsrl.vi v24, v16, 2 5391; RV64-NEXT: vor.vv v16, v16, v24 5392; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5393; RV64-NEXT: vsrl.vi v24, v8, 16 5394; RV64-NEXT: vor.vv v8, v8, v24 5395; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5396; RV64-NEXT: vsrl.vi v24, v16, 4 5397; RV64-NEXT: vor.vv v16, v16, v24 5398; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5399; RV64-NEXT: vsrl.vx v24, v8, a2 5400; RV64-NEXT: vor.vv v8, v8, v24 5401; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5402; RV64-NEXT: vsrl.vi v24, v16, 8 5403; RV64-NEXT: vor.vv v16, v16, v24 5404; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5405; RV64-NEXT: vnot.v v8, v8 5406; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5407; RV64-NEXT: vsrl.vi v24, v16, 16 5408; RV64-NEXT: vor.vv v16, v16, v24 5409; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5410; RV64-NEXT: vsrl.vi v24, v8, 1 5411; RV64-NEXT: vand.vx v24, v24, a7 5412; RV64-NEXT: vsub.vv v8, v8, v24 5413; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5414; RV64-NEXT: vsrl.vx v24, v16, a2 5415; RV64-NEXT: vor.vv v16, v16, v24 5416; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5417; RV64-NEXT: vand.vx v24, v8, a5 5418; RV64-NEXT: vsrl.vi v8, v8, 2 5419; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5420; RV64-NEXT: vnot.v v16, v16 5421; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5422; RV64-NEXT: vand.vx v8, v8, a5 5423; RV64-NEXT: vadd.vv v8, v24, v8 5424; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5425; RV64-NEXT: vsrl.vi v24, v16, 1 5426; RV64-NEXT: vand.vx v24, v24, a7 5427; RV64-NEXT: vsub.vv v16, v16, v24 5428; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5429; RV64-NEXT: vsrl.vi v24, v8, 4 5430; RV64-NEXT: vadd.vv v8, v8, v24 5431; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5432; RV64-NEXT: vand.vx v24, v16, a5 5433; RV64-NEXT: vsrl.vi v16, v16, 2 5434; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5435; RV64-NEXT: vand.vx v8, v8, a3 5436; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5437; RV64-NEXT: vand.vx v16, v16, a5 5438; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5439; RV64-NEXT: vmul.vx v8, v8, a4 5440; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5441; RV64-NEXT: vadd.vv v16, v24, v16 5442; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 5443; RV64-NEXT: vsrl.vx v8, v8, a0 5444; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma 5445; RV64-NEXT: vsrl.vi v24, v16, 4 5446; RV64-NEXT: vadd.vv v16, v16, v24 5447; RV64-NEXT: vand.vx v16, v16, a3 5448; RV64-NEXT: vmul.vx v16, v16, a4 5449; RV64-NEXT: vsrl.vx v16, v16, a0 5450; RV64-NEXT: ret 5451 %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl) 5452 ret <32 x i64> %v 5453} 5454