1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I 3; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I 4; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F 5; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F 6; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D 7; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D 8; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 9; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 10 11define void @ctlz_v16i8(ptr %x, ptr %y) nounwind { 12; RVI-LABEL: ctlz_v16i8: 13; RVI: # %bb.0: 14; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma 15; RVI-NEXT: vle8.v v8, (a0) 16; RVI-NEXT: li a1, 85 17; RVI-NEXT: vsrl.vi v9, v8, 1 18; RVI-NEXT: vor.vv v8, v8, v9 19; RVI-NEXT: vsrl.vi v9, v8, 2 20; RVI-NEXT: vor.vv v8, v8, v9 21; RVI-NEXT: vsrl.vi v9, v8, 4 22; RVI-NEXT: vor.vv v8, v8, v9 23; RVI-NEXT: vnot.v v8, v8 24; RVI-NEXT: vsrl.vi v9, v8, 1 25; RVI-NEXT: vand.vx v9, v9, a1 26; RVI-NEXT: li a1, 51 27; RVI-NEXT: vsub.vv v8, v8, v9 28; RVI-NEXT: vand.vx v9, v8, a1 29; RVI-NEXT: vsrl.vi v8, v8, 2 30; RVI-NEXT: vand.vx v8, v8, a1 31; RVI-NEXT: vadd.vv v8, v9, v8 32; RVI-NEXT: vsrl.vi v9, v8, 4 33; RVI-NEXT: vadd.vv v8, v8, v9 34; RVI-NEXT: vand.vi v8, v8, 15 35; RVI-NEXT: vse8.v v8, (a0) 36; RVI-NEXT: ret 37; 38; RVF-LABEL: ctlz_v16i8: 39; RVF: # %bb.0: 40; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma 41; RVF-NEXT: vle8.v v8, (a0) 42; RVF-NEXT: li a1, 134 43; RVF-NEXT: vzext.vf2 v10, v8 44; RVF-NEXT: vfwcvt.f.xu.v v12, v10 45; RVF-NEXT: vnsrl.wi v8, v12, 23 46; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma 47; RVF-NEXT: vnsrl.wi v10, v8, 0 48; RVF-NEXT: vrsub.vx v8, v10, a1 49; RVF-NEXT: li a1, 8 50; RVF-NEXT: vminu.vx v8, v8, a1 51; RVF-NEXT: vse8.v v8, (a0) 52; RVF-NEXT: ret 53; 54; RVD-LABEL: ctlz_v16i8: 55; RVD: # %bb.0: 56; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma 57; RVD-NEXT: vle8.v v8, (a0) 58; RVD-NEXT: li a1, 134 59; RVD-NEXT: vzext.vf2 v10, v8 60; RVD-NEXT: vfwcvt.f.xu.v v12, v10 61; RVD-NEXT: vnsrl.wi v8, v12, 23 62; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma 63; RVD-NEXT: vnsrl.wi v10, v8, 0 64; RVD-NEXT: vrsub.vx v8, v10, a1 65; RVD-NEXT: li a1, 8 66; RVD-NEXT: vminu.vx v8, v8, a1 67; RVD-NEXT: vse8.v v8, (a0) 68; RVD-NEXT: ret 69; 70; ZVBB-LABEL: ctlz_v16i8: 71; ZVBB: # %bb.0: 72; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 73; ZVBB-NEXT: vle8.v v8, (a0) 74; ZVBB-NEXT: vclz.v v8, v8 75; ZVBB-NEXT: vse8.v v8, (a0) 76; ZVBB-NEXT: ret 77 %a = load <16 x i8>, ptr %x 78 %b = load <16 x i8>, ptr %y 79 %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) 80 store <16 x i8> %c, ptr %x 81 ret void 82} 83declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) 84 85define void @ctlz_v8i16(ptr %x, ptr %y) nounwind { 86; RVI-LABEL: ctlz_v8i16: 87; RVI: # %bb.0: 88; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma 89; RVI-NEXT: vle16.v v8, (a0) 90; RVI-NEXT: lui a1, 5 91; RVI-NEXT: addi a1, a1, 1365 92; RVI-NEXT: vsrl.vi v9, v8, 1 93; RVI-NEXT: vor.vv v8, v8, v9 94; RVI-NEXT: vsrl.vi v9, v8, 2 95; RVI-NEXT: vor.vv v8, v8, v9 96; RVI-NEXT: vsrl.vi v9, v8, 4 97; RVI-NEXT: vor.vv v8, v8, v9 98; RVI-NEXT: vsrl.vi v9, v8, 8 99; RVI-NEXT: vor.vv v8, v8, v9 100; RVI-NEXT: vnot.v v8, v8 101; RVI-NEXT: vsrl.vi v9, v8, 1 102; RVI-NEXT: vand.vx v9, v9, a1 103; RVI-NEXT: lui a1, 3 104; RVI-NEXT: addi a1, a1, 819 105; RVI-NEXT: vsub.vv v8, v8, v9 106; RVI-NEXT: vand.vx v9, v8, a1 107; RVI-NEXT: vsrl.vi v8, v8, 2 108; RVI-NEXT: vand.vx v8, v8, a1 109; RVI-NEXT: lui a1, 1 110; RVI-NEXT: addi a1, a1, -241 111; RVI-NEXT: vadd.vv v8, v9, v8 112; RVI-NEXT: vsrl.vi v9, v8, 4 113; RVI-NEXT: vadd.vv v8, v8, v9 114; RVI-NEXT: vand.vx v8, v8, a1 115; RVI-NEXT: li a1, 257 116; RVI-NEXT: vmul.vx v8, v8, a1 117; RVI-NEXT: vsrl.vi v8, v8, 8 118; RVI-NEXT: vse16.v v8, (a0) 119; RVI-NEXT: ret 120; 121; RVF-LABEL: ctlz_v8i16: 122; RVF: # %bb.0: 123; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma 124; RVF-NEXT: vle16.v v8, (a0) 125; RVF-NEXT: li a1, 142 126; RVF-NEXT: vfwcvt.f.xu.v v10, v8 127; RVF-NEXT: vnsrl.wi v8, v10, 23 128; RVF-NEXT: vrsub.vx v8, v8, a1 129; RVF-NEXT: li a1, 16 130; RVF-NEXT: vminu.vx v8, v8, a1 131; RVF-NEXT: vse16.v v8, (a0) 132; RVF-NEXT: ret 133; 134; RVD-LABEL: ctlz_v8i16: 135; RVD: # %bb.0: 136; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma 137; RVD-NEXT: vle16.v v8, (a0) 138; RVD-NEXT: li a1, 142 139; RVD-NEXT: vfwcvt.f.xu.v v10, v8 140; RVD-NEXT: vnsrl.wi v8, v10, 23 141; RVD-NEXT: vrsub.vx v8, v8, a1 142; RVD-NEXT: li a1, 16 143; RVD-NEXT: vminu.vx v8, v8, a1 144; RVD-NEXT: vse16.v v8, (a0) 145; RVD-NEXT: ret 146; 147; ZVBB-LABEL: ctlz_v8i16: 148; ZVBB: # %bb.0: 149; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 150; ZVBB-NEXT: vle16.v v8, (a0) 151; ZVBB-NEXT: vclz.v v8, v8 152; ZVBB-NEXT: vse16.v v8, (a0) 153; ZVBB-NEXT: ret 154 %a = load <8 x i16>, ptr %x 155 %b = load <8 x i16>, ptr %y 156 %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) 157 store <8 x i16> %c, ptr %x 158 ret void 159} 160declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) 161 162define void @ctlz_v4i32(ptr %x, ptr %y) nounwind { 163; RVI-LABEL: ctlz_v4i32: 164; RVI: # %bb.0: 165; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma 166; RVI-NEXT: vle32.v v8, (a0) 167; RVI-NEXT: lui a1, 349525 168; RVI-NEXT: addi a1, a1, 1365 169; RVI-NEXT: vsrl.vi v9, v8, 1 170; RVI-NEXT: vor.vv v8, v8, v9 171; RVI-NEXT: vsrl.vi v9, v8, 2 172; RVI-NEXT: vor.vv v8, v8, v9 173; RVI-NEXT: vsrl.vi v9, v8, 4 174; RVI-NEXT: vor.vv v8, v8, v9 175; RVI-NEXT: vsrl.vi v9, v8, 8 176; RVI-NEXT: vor.vv v8, v8, v9 177; RVI-NEXT: vsrl.vi v9, v8, 16 178; RVI-NEXT: vor.vv v8, v8, v9 179; RVI-NEXT: vnot.v v8, v8 180; RVI-NEXT: vsrl.vi v9, v8, 1 181; RVI-NEXT: vand.vx v9, v9, a1 182; RVI-NEXT: lui a1, 209715 183; RVI-NEXT: addi a1, a1, 819 184; RVI-NEXT: vsub.vv v8, v8, v9 185; RVI-NEXT: vand.vx v9, v8, a1 186; RVI-NEXT: vsrl.vi v8, v8, 2 187; RVI-NEXT: vand.vx v8, v8, a1 188; RVI-NEXT: lui a1, 61681 189; RVI-NEXT: addi a1, a1, -241 190; RVI-NEXT: vadd.vv v8, v9, v8 191; RVI-NEXT: vsrl.vi v9, v8, 4 192; RVI-NEXT: vadd.vv v8, v8, v9 193; RVI-NEXT: vand.vx v8, v8, a1 194; RVI-NEXT: lui a1, 4112 195; RVI-NEXT: addi a1, a1, 257 196; RVI-NEXT: vmul.vx v8, v8, a1 197; RVI-NEXT: vsrl.vi v8, v8, 24 198; RVI-NEXT: vse32.v v8, (a0) 199; RVI-NEXT: ret 200; 201; RVF-LABEL: ctlz_v4i32: 202; RVF: # %bb.0: 203; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma 204; RVF-NEXT: vle32.v v8, (a0) 205; RVF-NEXT: fsrmi a1, 1 206; RVF-NEXT: vfcvt.f.xu.v v8, v8 207; RVF-NEXT: fsrm a1 208; RVF-NEXT: li a1, 158 209; RVF-NEXT: vsrl.vi v8, v8, 23 210; RVF-NEXT: vrsub.vx v8, v8, a1 211; RVF-NEXT: li a1, 32 212; RVF-NEXT: vminu.vx v8, v8, a1 213; RVF-NEXT: vse32.v v8, (a0) 214; RVF-NEXT: ret 215; 216; RVD-LABEL: ctlz_v4i32: 217; RVD: # %bb.0: 218; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma 219; RVD-NEXT: vle32.v v8, (a0) 220; RVD-NEXT: li a1, 52 221; RVD-NEXT: vfwcvt.f.xu.v v10, v8 222; RVD-NEXT: vnsrl.wx v8, v10, a1 223; RVD-NEXT: li a1, 1054 224; RVD-NEXT: vrsub.vx v8, v8, a1 225; RVD-NEXT: li a1, 32 226; RVD-NEXT: vminu.vx v8, v8, a1 227; RVD-NEXT: vse32.v v8, (a0) 228; RVD-NEXT: ret 229; 230; ZVBB-LABEL: ctlz_v4i32: 231; ZVBB: # %bb.0: 232; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 233; ZVBB-NEXT: vle32.v v8, (a0) 234; ZVBB-NEXT: vclz.v v8, v8 235; ZVBB-NEXT: vse32.v v8, (a0) 236; ZVBB-NEXT: ret 237 %a = load <4 x i32>, ptr %x 238 %b = load <4 x i32>, ptr %y 239 %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) 240 store <4 x i32> %c, ptr %x 241 ret void 242} 243declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) 244 245define void @ctlz_v2i64(ptr %x, ptr %y) nounwind { 246; RV32I-LABEL: ctlz_v2i64: 247; RV32I: # %bb.0: 248; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 249; RV32I-NEXT: vle64.v v8, (a0) 250; RV32I-NEXT: lui a1, 349525 251; RV32I-NEXT: addi a1, a1, 1365 252; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 253; RV32I-NEXT: vmv.v.x v9, a1 254; RV32I-NEXT: li a1, 32 255; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 256; RV32I-NEXT: vsrl.vi v10, v8, 1 257; RV32I-NEXT: vor.vv v8, v8, v10 258; RV32I-NEXT: vsrl.vi v10, v8, 2 259; RV32I-NEXT: vor.vv v8, v8, v10 260; RV32I-NEXT: vsrl.vi v10, v8, 4 261; RV32I-NEXT: vor.vv v8, v8, v10 262; RV32I-NEXT: vsrl.vi v10, v8, 8 263; RV32I-NEXT: vor.vv v8, v8, v10 264; RV32I-NEXT: vsrl.vi v10, v8, 16 265; RV32I-NEXT: vor.vv v8, v8, v10 266; RV32I-NEXT: vsrl.vx v10, v8, a1 267; RV32I-NEXT: lui a1, 209715 268; RV32I-NEXT: addi a1, a1, 819 269; RV32I-NEXT: vor.vv v8, v8, v10 270; RV32I-NEXT: vnot.v v8, v8 271; RV32I-NEXT: vsrl.vi v10, v8, 1 272; RV32I-NEXT: vand.vv v9, v10, v9 273; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 274; RV32I-NEXT: vmv.v.x v10, a1 275; RV32I-NEXT: lui a1, 61681 276; RV32I-NEXT: addi a1, a1, -241 277; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 278; RV32I-NEXT: vsub.vv v8, v8, v9 279; RV32I-NEXT: vand.vv v9, v8, v10 280; RV32I-NEXT: vsrl.vi v8, v8, 2 281; RV32I-NEXT: vand.vv v8, v8, v10 282; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 283; RV32I-NEXT: vmv.v.x v10, a1 284; RV32I-NEXT: lui a1, 4112 285; RV32I-NEXT: addi a1, a1, 257 286; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 287; RV32I-NEXT: vadd.vv v8, v9, v8 288; RV32I-NEXT: vsrl.vi v9, v8, 4 289; RV32I-NEXT: vadd.vv v8, v8, v9 290; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 291; RV32I-NEXT: vmv.v.x v9, a1 292; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 293; RV32I-NEXT: vand.vv v8, v8, v10 294; RV32I-NEXT: vmul.vv v8, v8, v9 295; RV32I-NEXT: li a1, 56 296; RV32I-NEXT: vsrl.vx v8, v8, a1 297; RV32I-NEXT: vse64.v v8, (a0) 298; RV32I-NEXT: ret 299; 300; RV64I-LABEL: ctlz_v2i64: 301; RV64I: # %bb.0: 302; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 303; RV64I-NEXT: vle64.v v8, (a0) 304; RV64I-NEXT: lui a1, 349525 305; RV64I-NEXT: lui a2, 209715 306; RV64I-NEXT: lui a3, 61681 307; RV64I-NEXT: lui a4, 4112 308; RV64I-NEXT: addiw a1, a1, 1365 309; RV64I-NEXT: addiw a2, a2, 819 310; RV64I-NEXT: addiw a3, a3, -241 311; RV64I-NEXT: addiw a4, a4, 257 312; RV64I-NEXT: slli a5, a1, 32 313; RV64I-NEXT: add a1, a1, a5 314; RV64I-NEXT: slli a5, a2, 32 315; RV64I-NEXT: add a2, a2, a5 316; RV64I-NEXT: slli a5, a3, 32 317; RV64I-NEXT: add a3, a3, a5 318; RV64I-NEXT: slli a5, a4, 32 319; RV64I-NEXT: add a4, a4, a5 320; RV64I-NEXT: li a5, 32 321; RV64I-NEXT: vsrl.vi v9, v8, 1 322; RV64I-NEXT: vor.vv v8, v8, v9 323; RV64I-NEXT: vsrl.vi v9, v8, 2 324; RV64I-NEXT: vor.vv v8, v8, v9 325; RV64I-NEXT: vsrl.vi v9, v8, 4 326; RV64I-NEXT: vor.vv v8, v8, v9 327; RV64I-NEXT: vsrl.vi v9, v8, 8 328; RV64I-NEXT: vor.vv v8, v8, v9 329; RV64I-NEXT: vsrl.vi v9, v8, 16 330; RV64I-NEXT: vor.vv v8, v8, v9 331; RV64I-NEXT: vsrl.vx v9, v8, a5 332; RV64I-NEXT: vor.vv v8, v8, v9 333; RV64I-NEXT: vnot.v v8, v8 334; RV64I-NEXT: vsrl.vi v9, v8, 1 335; RV64I-NEXT: vand.vx v9, v9, a1 336; RV64I-NEXT: vsub.vv v8, v8, v9 337; RV64I-NEXT: vand.vx v9, v8, a2 338; RV64I-NEXT: vsrl.vi v8, v8, 2 339; RV64I-NEXT: vand.vx v8, v8, a2 340; RV64I-NEXT: vadd.vv v8, v9, v8 341; RV64I-NEXT: vsrl.vi v9, v8, 4 342; RV64I-NEXT: vadd.vv v8, v8, v9 343; RV64I-NEXT: vand.vx v8, v8, a3 344; RV64I-NEXT: vmul.vx v8, v8, a4 345; RV64I-NEXT: li a1, 56 346; RV64I-NEXT: vsrl.vx v8, v8, a1 347; RV64I-NEXT: vse64.v v8, (a0) 348; RV64I-NEXT: ret 349; 350; RVF-LABEL: ctlz_v2i64: 351; RVF: # %bb.0: 352; RVF-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 353; RVF-NEXT: vle64.v v8, (a0) 354; RVF-NEXT: li a1, 190 355; RVF-NEXT: vmv.v.x v9, a1 356; RVF-NEXT: fsrmi a1, 1 357; RVF-NEXT: vfncvt.f.xu.w v10, v8 358; RVF-NEXT: fsrm a1 359; RVF-NEXT: vsrl.vi v8, v10, 23 360; RVF-NEXT: vwsubu.vv v10, v9, v8 361; RVF-NEXT: li a1, 64 362; RVF-NEXT: vsetvli zero, zero, e64, m1, ta, ma 363; RVF-NEXT: vminu.vx v8, v10, a1 364; RVF-NEXT: vse64.v v8, (a0) 365; RVF-NEXT: ret 366; 367; RVD-LABEL: ctlz_v2i64: 368; RVD: # %bb.0: 369; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma 370; RVD-NEXT: vle64.v v8, (a0) 371; RVD-NEXT: fsrmi a1, 1 372; RVD-NEXT: vfcvt.f.xu.v v8, v8 373; RVD-NEXT: fsrm a1 374; RVD-NEXT: li a1, 52 375; RVD-NEXT: vsrl.vx v8, v8, a1 376; RVD-NEXT: li a1, 1086 377; RVD-NEXT: vrsub.vx v8, v8, a1 378; RVD-NEXT: li a1, 64 379; RVD-NEXT: vminu.vx v8, v8, a1 380; RVD-NEXT: vse64.v v8, (a0) 381; RVD-NEXT: ret 382; 383; ZVBB-LABEL: ctlz_v2i64: 384; ZVBB: # %bb.0: 385; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 386; ZVBB-NEXT: vle64.v v8, (a0) 387; ZVBB-NEXT: vclz.v v8, v8 388; ZVBB-NEXT: vse64.v v8, (a0) 389; ZVBB-NEXT: ret 390 %a = load <2 x i64>, ptr %x 391 %b = load <2 x i64>, ptr %y 392 %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) 393 store <2 x i64> %c, ptr %x 394 ret void 395} 396declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) 397 398define void @ctlz_v32i8(ptr %x, ptr %y) nounwind { 399; RVI-LABEL: ctlz_v32i8: 400; RVI: # %bb.0: 401; RVI-NEXT: li a1, 32 402; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma 403; RVI-NEXT: vle8.v v8, (a0) 404; RVI-NEXT: li a1, 85 405; RVI-NEXT: vsrl.vi v10, v8, 1 406; RVI-NEXT: vor.vv v8, v8, v10 407; RVI-NEXT: vsrl.vi v10, v8, 2 408; RVI-NEXT: vor.vv v8, v8, v10 409; RVI-NEXT: vsrl.vi v10, v8, 4 410; RVI-NEXT: vor.vv v8, v8, v10 411; RVI-NEXT: vnot.v v8, v8 412; RVI-NEXT: vsrl.vi v10, v8, 1 413; RVI-NEXT: vand.vx v10, v10, a1 414; RVI-NEXT: li a1, 51 415; RVI-NEXT: vsub.vv v8, v8, v10 416; RVI-NEXT: vand.vx v10, v8, a1 417; RVI-NEXT: vsrl.vi v8, v8, 2 418; RVI-NEXT: vand.vx v8, v8, a1 419; RVI-NEXT: vadd.vv v8, v10, v8 420; RVI-NEXT: vsrl.vi v10, v8, 4 421; RVI-NEXT: vadd.vv v8, v8, v10 422; RVI-NEXT: vand.vi v8, v8, 15 423; RVI-NEXT: vse8.v v8, (a0) 424; RVI-NEXT: ret 425; 426; RVF-LABEL: ctlz_v32i8: 427; RVF: # %bb.0: 428; RVF-NEXT: li a1, 32 429; RVF-NEXT: vsetvli zero, a1, e16, m4, ta, ma 430; RVF-NEXT: vle8.v v8, (a0) 431; RVF-NEXT: li a1, 134 432; RVF-NEXT: vzext.vf2 v12, v8 433; RVF-NEXT: vfwcvt.f.xu.v v16, v12 434; RVF-NEXT: vnsrl.wi v8, v16, 23 435; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma 436; RVF-NEXT: vnsrl.wi v12, v8, 0 437; RVF-NEXT: vrsub.vx v8, v12, a1 438; RVF-NEXT: li a1, 8 439; RVF-NEXT: vminu.vx v8, v8, a1 440; RVF-NEXT: vse8.v v8, (a0) 441; RVF-NEXT: ret 442; 443; RVD-LABEL: ctlz_v32i8: 444; RVD: # %bb.0: 445; RVD-NEXT: li a1, 32 446; RVD-NEXT: vsetvli zero, a1, e16, m4, ta, ma 447; RVD-NEXT: vle8.v v8, (a0) 448; RVD-NEXT: li a1, 134 449; RVD-NEXT: vzext.vf2 v12, v8 450; RVD-NEXT: vfwcvt.f.xu.v v16, v12 451; RVD-NEXT: vnsrl.wi v8, v16, 23 452; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma 453; RVD-NEXT: vnsrl.wi v12, v8, 0 454; RVD-NEXT: vrsub.vx v8, v12, a1 455; RVD-NEXT: li a1, 8 456; RVD-NEXT: vminu.vx v8, v8, a1 457; RVD-NEXT: vse8.v v8, (a0) 458; RVD-NEXT: ret 459; 460; ZVBB-LABEL: ctlz_v32i8: 461; ZVBB: # %bb.0: 462; ZVBB-NEXT: li a1, 32 463; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma 464; ZVBB-NEXT: vle8.v v8, (a0) 465; ZVBB-NEXT: vclz.v v8, v8 466; ZVBB-NEXT: vse8.v v8, (a0) 467; ZVBB-NEXT: ret 468 %a = load <32 x i8>, ptr %x 469 %b = load <32 x i8>, ptr %y 470 %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 false) 471 store <32 x i8> %c, ptr %x 472 ret void 473} 474declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) 475 476define void @ctlz_v16i16(ptr %x, ptr %y) nounwind { 477; RVI-LABEL: ctlz_v16i16: 478; RVI: # %bb.0: 479; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma 480; RVI-NEXT: vle16.v v8, (a0) 481; RVI-NEXT: lui a1, 5 482; RVI-NEXT: addi a1, a1, 1365 483; RVI-NEXT: vsrl.vi v10, v8, 1 484; RVI-NEXT: vor.vv v8, v8, v10 485; RVI-NEXT: vsrl.vi v10, v8, 2 486; RVI-NEXT: vor.vv v8, v8, v10 487; RVI-NEXT: vsrl.vi v10, v8, 4 488; RVI-NEXT: vor.vv v8, v8, v10 489; RVI-NEXT: vsrl.vi v10, v8, 8 490; RVI-NEXT: vor.vv v8, v8, v10 491; RVI-NEXT: vnot.v v8, v8 492; RVI-NEXT: vsrl.vi v10, v8, 1 493; RVI-NEXT: vand.vx v10, v10, a1 494; RVI-NEXT: lui a1, 3 495; RVI-NEXT: addi a1, a1, 819 496; RVI-NEXT: vsub.vv v8, v8, v10 497; RVI-NEXT: vand.vx v10, v8, a1 498; RVI-NEXT: vsrl.vi v8, v8, 2 499; RVI-NEXT: vand.vx v8, v8, a1 500; RVI-NEXT: lui a1, 1 501; RVI-NEXT: addi a1, a1, -241 502; RVI-NEXT: vadd.vv v8, v10, v8 503; RVI-NEXT: vsrl.vi v10, v8, 4 504; RVI-NEXT: vadd.vv v8, v8, v10 505; RVI-NEXT: vand.vx v8, v8, a1 506; RVI-NEXT: li a1, 257 507; RVI-NEXT: vmul.vx v8, v8, a1 508; RVI-NEXT: vsrl.vi v8, v8, 8 509; RVI-NEXT: vse16.v v8, (a0) 510; RVI-NEXT: ret 511; 512; RVF-LABEL: ctlz_v16i16: 513; RVF: # %bb.0: 514; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma 515; RVF-NEXT: vle16.v v8, (a0) 516; RVF-NEXT: li a1, 142 517; RVF-NEXT: vfwcvt.f.xu.v v12, v8 518; RVF-NEXT: vnsrl.wi v8, v12, 23 519; RVF-NEXT: vrsub.vx v8, v8, a1 520; RVF-NEXT: li a1, 16 521; RVF-NEXT: vminu.vx v8, v8, a1 522; RVF-NEXT: vse16.v v8, (a0) 523; RVF-NEXT: ret 524; 525; RVD-LABEL: ctlz_v16i16: 526; RVD: # %bb.0: 527; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma 528; RVD-NEXT: vle16.v v8, (a0) 529; RVD-NEXT: li a1, 142 530; RVD-NEXT: vfwcvt.f.xu.v v12, v8 531; RVD-NEXT: vnsrl.wi v8, v12, 23 532; RVD-NEXT: vrsub.vx v8, v8, a1 533; RVD-NEXT: li a1, 16 534; RVD-NEXT: vminu.vx v8, v8, a1 535; RVD-NEXT: vse16.v v8, (a0) 536; RVD-NEXT: ret 537; 538; ZVBB-LABEL: ctlz_v16i16: 539; ZVBB: # %bb.0: 540; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 541; ZVBB-NEXT: vle16.v v8, (a0) 542; ZVBB-NEXT: vclz.v v8, v8 543; ZVBB-NEXT: vse16.v v8, (a0) 544; ZVBB-NEXT: ret 545 %a = load <16 x i16>, ptr %x 546 %b = load <16 x i16>, ptr %y 547 %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 false) 548 store <16 x i16> %c, ptr %x 549 ret void 550} 551declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) 552 553define void @ctlz_v8i32(ptr %x, ptr %y) nounwind { 554; RVI-LABEL: ctlz_v8i32: 555; RVI: # %bb.0: 556; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma 557; RVI-NEXT: vle32.v v8, (a0) 558; RVI-NEXT: lui a1, 349525 559; RVI-NEXT: addi a1, a1, 1365 560; RVI-NEXT: vsrl.vi v10, v8, 1 561; RVI-NEXT: vor.vv v8, v8, v10 562; RVI-NEXT: vsrl.vi v10, v8, 2 563; RVI-NEXT: vor.vv v8, v8, v10 564; RVI-NEXT: vsrl.vi v10, v8, 4 565; RVI-NEXT: vor.vv v8, v8, v10 566; RVI-NEXT: vsrl.vi v10, v8, 8 567; RVI-NEXT: vor.vv v8, v8, v10 568; RVI-NEXT: vsrl.vi v10, v8, 16 569; RVI-NEXT: vor.vv v8, v8, v10 570; RVI-NEXT: vnot.v v8, v8 571; RVI-NEXT: vsrl.vi v10, v8, 1 572; RVI-NEXT: vand.vx v10, v10, a1 573; RVI-NEXT: lui a1, 209715 574; RVI-NEXT: addi a1, a1, 819 575; RVI-NEXT: vsub.vv v8, v8, v10 576; RVI-NEXT: vand.vx v10, v8, a1 577; RVI-NEXT: vsrl.vi v8, v8, 2 578; RVI-NEXT: vand.vx v8, v8, a1 579; RVI-NEXT: lui a1, 61681 580; RVI-NEXT: addi a1, a1, -241 581; RVI-NEXT: vadd.vv v8, v10, v8 582; RVI-NEXT: vsrl.vi v10, v8, 4 583; RVI-NEXT: vadd.vv v8, v8, v10 584; RVI-NEXT: vand.vx v8, v8, a1 585; RVI-NEXT: lui a1, 4112 586; RVI-NEXT: addi a1, a1, 257 587; RVI-NEXT: vmul.vx v8, v8, a1 588; RVI-NEXT: vsrl.vi v8, v8, 24 589; RVI-NEXT: vse32.v v8, (a0) 590; RVI-NEXT: ret 591; 592; RVF-LABEL: ctlz_v8i32: 593; RVF: # %bb.0: 594; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma 595; RVF-NEXT: vle32.v v8, (a0) 596; RVF-NEXT: fsrmi a1, 1 597; RVF-NEXT: vfcvt.f.xu.v v8, v8 598; RVF-NEXT: fsrm a1 599; RVF-NEXT: li a1, 158 600; RVF-NEXT: vsrl.vi v8, v8, 23 601; RVF-NEXT: vrsub.vx v8, v8, a1 602; RVF-NEXT: li a1, 32 603; RVF-NEXT: vminu.vx v8, v8, a1 604; RVF-NEXT: vse32.v v8, (a0) 605; RVF-NEXT: ret 606; 607; RVD-LABEL: ctlz_v8i32: 608; RVD: # %bb.0: 609; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma 610; RVD-NEXT: vle32.v v8, (a0) 611; RVD-NEXT: li a1, 52 612; RVD-NEXT: vfwcvt.f.xu.v v12, v8 613; RVD-NEXT: vnsrl.wx v8, v12, a1 614; RVD-NEXT: li a1, 1054 615; RVD-NEXT: vrsub.vx v8, v8, a1 616; RVD-NEXT: li a1, 32 617; RVD-NEXT: vminu.vx v8, v8, a1 618; RVD-NEXT: vse32.v v8, (a0) 619; RVD-NEXT: ret 620; 621; ZVBB-LABEL: ctlz_v8i32: 622; ZVBB: # %bb.0: 623; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 624; ZVBB-NEXT: vle32.v v8, (a0) 625; ZVBB-NEXT: vclz.v v8, v8 626; ZVBB-NEXT: vse32.v v8, (a0) 627; ZVBB-NEXT: ret 628 %a = load <8 x i32>, ptr %x 629 %b = load <8 x i32>, ptr %y 630 %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) 631 store <8 x i32> %c, ptr %x 632 ret void 633} 634declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) 635 636define void @ctlz_v4i64(ptr %x, ptr %y) nounwind { 637; RV32I-LABEL: ctlz_v4i64: 638; RV32I: # %bb.0: 639; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 640; RV32I-NEXT: vle64.v v8, (a0) 641; RV32I-NEXT: lui a1, 349525 642; RV32I-NEXT: addi a1, a1, 1365 643; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 644; RV32I-NEXT: vmv.v.x v10, a1 645; RV32I-NEXT: li a1, 32 646; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 647; RV32I-NEXT: vsrl.vi v12, v8, 1 648; RV32I-NEXT: vor.vv v8, v8, v12 649; RV32I-NEXT: vsrl.vi v12, v8, 2 650; RV32I-NEXT: vor.vv v8, v8, v12 651; RV32I-NEXT: vsrl.vi v12, v8, 4 652; RV32I-NEXT: vor.vv v8, v8, v12 653; RV32I-NEXT: vsrl.vi v12, v8, 8 654; RV32I-NEXT: vor.vv v8, v8, v12 655; RV32I-NEXT: vsrl.vi v12, v8, 16 656; RV32I-NEXT: vor.vv v8, v8, v12 657; RV32I-NEXT: vsrl.vx v12, v8, a1 658; RV32I-NEXT: lui a1, 209715 659; RV32I-NEXT: addi a1, a1, 819 660; RV32I-NEXT: vor.vv v8, v8, v12 661; RV32I-NEXT: vnot.v v8, v8 662; RV32I-NEXT: vsrl.vi v12, v8, 1 663; RV32I-NEXT: vand.vv v10, v12, v10 664; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 665; RV32I-NEXT: vmv.v.x v12, a1 666; RV32I-NEXT: lui a1, 61681 667; RV32I-NEXT: addi a1, a1, -241 668; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 669; RV32I-NEXT: vsub.vv v8, v8, v10 670; RV32I-NEXT: vand.vv v10, v8, v12 671; RV32I-NEXT: vsrl.vi v8, v8, 2 672; RV32I-NEXT: vand.vv v8, v8, v12 673; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 674; RV32I-NEXT: vmv.v.x v12, a1 675; RV32I-NEXT: lui a1, 4112 676; RV32I-NEXT: addi a1, a1, 257 677; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 678; RV32I-NEXT: vadd.vv v8, v10, v8 679; RV32I-NEXT: vsrl.vi v10, v8, 4 680; RV32I-NEXT: vadd.vv v8, v8, v10 681; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 682; RV32I-NEXT: vmv.v.x v10, a1 683; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 684; RV32I-NEXT: vand.vv v8, v8, v12 685; RV32I-NEXT: vmul.vv v8, v8, v10 686; RV32I-NEXT: li a1, 56 687; RV32I-NEXT: vsrl.vx v8, v8, a1 688; RV32I-NEXT: vse64.v v8, (a0) 689; RV32I-NEXT: ret 690; 691; RV64I-LABEL: ctlz_v4i64: 692; RV64I: # %bb.0: 693; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 694; RV64I-NEXT: vle64.v v8, (a0) 695; RV64I-NEXT: lui a1, 349525 696; RV64I-NEXT: lui a2, 209715 697; RV64I-NEXT: lui a3, 61681 698; RV64I-NEXT: lui a4, 4112 699; RV64I-NEXT: addiw a1, a1, 1365 700; RV64I-NEXT: addiw a2, a2, 819 701; RV64I-NEXT: addiw a3, a3, -241 702; RV64I-NEXT: addiw a4, a4, 257 703; RV64I-NEXT: slli a5, a1, 32 704; RV64I-NEXT: add a1, a1, a5 705; RV64I-NEXT: slli a5, a2, 32 706; RV64I-NEXT: add a2, a2, a5 707; RV64I-NEXT: slli a5, a3, 32 708; RV64I-NEXT: add a3, a3, a5 709; RV64I-NEXT: slli a5, a4, 32 710; RV64I-NEXT: add a4, a4, a5 711; RV64I-NEXT: li a5, 32 712; RV64I-NEXT: vsrl.vi v10, v8, 1 713; RV64I-NEXT: vor.vv v8, v8, v10 714; RV64I-NEXT: vsrl.vi v10, v8, 2 715; RV64I-NEXT: vor.vv v8, v8, v10 716; RV64I-NEXT: vsrl.vi v10, v8, 4 717; RV64I-NEXT: vor.vv v8, v8, v10 718; RV64I-NEXT: vsrl.vi v10, v8, 8 719; RV64I-NEXT: vor.vv v8, v8, v10 720; RV64I-NEXT: vsrl.vi v10, v8, 16 721; RV64I-NEXT: vor.vv v8, v8, v10 722; RV64I-NEXT: vsrl.vx v10, v8, a5 723; RV64I-NEXT: vor.vv v8, v8, v10 724; RV64I-NEXT: vnot.v v8, v8 725; RV64I-NEXT: vsrl.vi v10, v8, 1 726; RV64I-NEXT: vand.vx v10, v10, a1 727; RV64I-NEXT: vsub.vv v8, v8, v10 728; RV64I-NEXT: vand.vx v10, v8, a2 729; RV64I-NEXT: vsrl.vi v8, v8, 2 730; RV64I-NEXT: vand.vx v8, v8, a2 731; RV64I-NEXT: vadd.vv v8, v10, v8 732; RV64I-NEXT: vsrl.vi v10, v8, 4 733; RV64I-NEXT: vadd.vv v8, v8, v10 734; RV64I-NEXT: vand.vx v8, v8, a3 735; RV64I-NEXT: vmul.vx v8, v8, a4 736; RV64I-NEXT: li a1, 56 737; RV64I-NEXT: vsrl.vx v8, v8, a1 738; RV64I-NEXT: vse64.v v8, (a0) 739; RV64I-NEXT: ret 740; 741; RVF-LABEL: ctlz_v4i64: 742; RVF: # %bb.0: 743; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma 744; RVF-NEXT: vle64.v v8, (a0) 745; RVF-NEXT: li a1, 190 746; RVF-NEXT: vmv.v.x v10, a1 747; RVF-NEXT: fsrmi a1, 1 748; RVF-NEXT: vfncvt.f.xu.w v11, v8 749; RVF-NEXT: fsrm a1 750; RVF-NEXT: vsrl.vi v8, v11, 23 751; RVF-NEXT: vwsubu.vv v12, v10, v8 752; RVF-NEXT: li a1, 64 753; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma 754; RVF-NEXT: vminu.vx v8, v12, a1 755; RVF-NEXT: vse64.v v8, (a0) 756; RVF-NEXT: ret 757; 758; RVD-LABEL: ctlz_v4i64: 759; RVD: # %bb.0: 760; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma 761; RVD-NEXT: vle64.v v8, (a0) 762; RVD-NEXT: fsrmi a1, 1 763; RVD-NEXT: vfcvt.f.xu.v v8, v8 764; RVD-NEXT: fsrm a1 765; RVD-NEXT: li a1, 52 766; RVD-NEXT: vsrl.vx v8, v8, a1 767; RVD-NEXT: li a1, 1086 768; RVD-NEXT: vrsub.vx v8, v8, a1 769; RVD-NEXT: li a1, 64 770; RVD-NEXT: vminu.vx v8, v8, a1 771; RVD-NEXT: vse64.v v8, (a0) 772; RVD-NEXT: ret 773; 774; ZVBB-LABEL: ctlz_v4i64: 775; ZVBB: # %bb.0: 776; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 777; ZVBB-NEXT: vle64.v v8, (a0) 778; ZVBB-NEXT: vclz.v v8, v8 779; ZVBB-NEXT: vse64.v v8, (a0) 780; ZVBB-NEXT: ret 781 %a = load <4 x i64>, ptr %x 782 %b = load <4 x i64>, ptr %y 783 %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) 784 store <4 x i64> %c, ptr %x 785 ret void 786} 787declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) 788 789define void @ctlz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { 790; RVI-LABEL: ctlz_zero_undef_v16i8: 791; RVI: # %bb.0: 792; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma 793; RVI-NEXT: vle8.v v8, (a0) 794; RVI-NEXT: li a1, 85 795; RVI-NEXT: vsrl.vi v9, v8, 1 796; RVI-NEXT: vor.vv v8, v8, v9 797; RVI-NEXT: vsrl.vi v9, v8, 2 798; RVI-NEXT: vor.vv v8, v8, v9 799; RVI-NEXT: vsrl.vi v9, v8, 4 800; RVI-NEXT: vor.vv v8, v8, v9 801; RVI-NEXT: vnot.v v8, v8 802; RVI-NEXT: vsrl.vi v9, v8, 1 803; RVI-NEXT: vand.vx v9, v9, a1 804; RVI-NEXT: li a1, 51 805; RVI-NEXT: vsub.vv v8, v8, v9 806; RVI-NEXT: vand.vx v9, v8, a1 807; RVI-NEXT: vsrl.vi v8, v8, 2 808; RVI-NEXT: vand.vx v8, v8, a1 809; RVI-NEXT: vadd.vv v8, v9, v8 810; RVI-NEXT: vsrl.vi v9, v8, 4 811; RVI-NEXT: vadd.vv v8, v8, v9 812; RVI-NEXT: vand.vi v8, v8, 15 813; RVI-NEXT: vse8.v v8, (a0) 814; RVI-NEXT: ret 815; 816; RVF-LABEL: ctlz_zero_undef_v16i8: 817; RVF: # %bb.0: 818; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma 819; RVF-NEXT: vle8.v v8, (a0) 820; RVF-NEXT: vzext.vf2 v10, v8 821; RVF-NEXT: vfwcvt.f.xu.v v12, v10 822; RVF-NEXT: vnsrl.wi v8, v12, 23 823; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma 824; RVF-NEXT: vnsrl.wi v10, v8, 0 825; RVF-NEXT: li a1, 134 826; RVF-NEXT: vrsub.vx v8, v10, a1 827; RVF-NEXT: vse8.v v8, (a0) 828; RVF-NEXT: ret 829; 830; RVD-LABEL: ctlz_zero_undef_v16i8: 831; RVD: # %bb.0: 832; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma 833; RVD-NEXT: vle8.v v8, (a0) 834; RVD-NEXT: vzext.vf2 v10, v8 835; RVD-NEXT: vfwcvt.f.xu.v v12, v10 836; RVD-NEXT: vnsrl.wi v8, v12, 23 837; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma 838; RVD-NEXT: vnsrl.wi v10, v8, 0 839; RVD-NEXT: li a1, 134 840; RVD-NEXT: vrsub.vx v8, v10, a1 841; RVD-NEXT: vse8.v v8, (a0) 842; RVD-NEXT: ret 843; 844; ZVBB-LABEL: ctlz_zero_undef_v16i8: 845; ZVBB: # %bb.0: 846; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 847; ZVBB-NEXT: vle8.v v8, (a0) 848; ZVBB-NEXT: vclz.v v8, v8 849; ZVBB-NEXT: vse8.v v8, (a0) 850; ZVBB-NEXT: ret 851 %a = load <16 x i8>, ptr %x 852 %b = load <16 x i8>, ptr %y 853 %c = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) 854 store <16 x i8> %c, ptr %x 855 ret void 856} 857 858define void @ctlz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { 859; RVI-LABEL: ctlz_zero_undef_v8i16: 860; RVI: # %bb.0: 861; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma 862; RVI-NEXT: vle16.v v8, (a0) 863; RVI-NEXT: lui a1, 5 864; RVI-NEXT: addi a1, a1, 1365 865; RVI-NEXT: vsrl.vi v9, v8, 1 866; RVI-NEXT: vor.vv v8, v8, v9 867; RVI-NEXT: vsrl.vi v9, v8, 2 868; RVI-NEXT: vor.vv v8, v8, v9 869; RVI-NEXT: vsrl.vi v9, v8, 4 870; RVI-NEXT: vor.vv v8, v8, v9 871; RVI-NEXT: vsrl.vi v9, v8, 8 872; RVI-NEXT: vor.vv v8, v8, v9 873; RVI-NEXT: vnot.v v8, v8 874; RVI-NEXT: vsrl.vi v9, v8, 1 875; RVI-NEXT: vand.vx v9, v9, a1 876; RVI-NEXT: lui a1, 3 877; RVI-NEXT: addi a1, a1, 819 878; RVI-NEXT: vsub.vv v8, v8, v9 879; RVI-NEXT: vand.vx v9, v8, a1 880; RVI-NEXT: vsrl.vi v8, v8, 2 881; RVI-NEXT: vand.vx v8, v8, a1 882; RVI-NEXT: lui a1, 1 883; RVI-NEXT: addi a1, a1, -241 884; RVI-NEXT: vadd.vv v8, v9, v8 885; RVI-NEXT: vsrl.vi v9, v8, 4 886; RVI-NEXT: vadd.vv v8, v8, v9 887; RVI-NEXT: vand.vx v8, v8, a1 888; RVI-NEXT: li a1, 257 889; RVI-NEXT: vmul.vx v8, v8, a1 890; RVI-NEXT: vsrl.vi v8, v8, 8 891; RVI-NEXT: vse16.v v8, (a0) 892; RVI-NEXT: ret 893; 894; RVF-LABEL: ctlz_zero_undef_v8i16: 895; RVF: # %bb.0: 896; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma 897; RVF-NEXT: vle16.v v8, (a0) 898; RVF-NEXT: vfwcvt.f.xu.v v10, v8 899; RVF-NEXT: vnsrl.wi v8, v10, 23 900; RVF-NEXT: li a1, 142 901; RVF-NEXT: vrsub.vx v8, v8, a1 902; RVF-NEXT: vse16.v v8, (a0) 903; RVF-NEXT: ret 904; 905; RVD-LABEL: ctlz_zero_undef_v8i16: 906; RVD: # %bb.0: 907; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma 908; RVD-NEXT: vle16.v v8, (a0) 909; RVD-NEXT: vfwcvt.f.xu.v v10, v8 910; RVD-NEXT: vnsrl.wi v8, v10, 23 911; RVD-NEXT: li a1, 142 912; RVD-NEXT: vrsub.vx v8, v8, a1 913; RVD-NEXT: vse16.v v8, (a0) 914; RVD-NEXT: ret 915; 916; ZVBB-LABEL: ctlz_zero_undef_v8i16: 917; ZVBB: # %bb.0: 918; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 919; ZVBB-NEXT: vle16.v v8, (a0) 920; ZVBB-NEXT: vclz.v v8, v8 921; ZVBB-NEXT: vse16.v v8, (a0) 922; ZVBB-NEXT: ret 923 %a = load <8 x i16>, ptr %x 924 %b = load <8 x i16>, ptr %y 925 %c = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) 926 store <8 x i16> %c, ptr %x 927 ret void 928} 929 930define void @ctlz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { 931; RVI-LABEL: ctlz_zero_undef_v4i32: 932; RVI: # %bb.0: 933; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma 934; RVI-NEXT: vle32.v v8, (a0) 935; RVI-NEXT: lui a1, 349525 936; RVI-NEXT: addi a1, a1, 1365 937; RVI-NEXT: vsrl.vi v9, v8, 1 938; RVI-NEXT: vor.vv v8, v8, v9 939; RVI-NEXT: vsrl.vi v9, v8, 2 940; RVI-NEXT: vor.vv v8, v8, v9 941; RVI-NEXT: vsrl.vi v9, v8, 4 942; RVI-NEXT: vor.vv v8, v8, v9 943; RVI-NEXT: vsrl.vi v9, v8, 8 944; RVI-NEXT: vor.vv v8, v8, v9 945; RVI-NEXT: vsrl.vi v9, v8, 16 946; RVI-NEXT: vor.vv v8, v8, v9 947; RVI-NEXT: vnot.v v8, v8 948; RVI-NEXT: vsrl.vi v9, v8, 1 949; RVI-NEXT: vand.vx v9, v9, a1 950; RVI-NEXT: lui a1, 209715 951; RVI-NEXT: addi a1, a1, 819 952; RVI-NEXT: vsub.vv v8, v8, v9 953; RVI-NEXT: vand.vx v9, v8, a1 954; RVI-NEXT: vsrl.vi v8, v8, 2 955; RVI-NEXT: vand.vx v8, v8, a1 956; RVI-NEXT: lui a1, 61681 957; RVI-NEXT: addi a1, a1, -241 958; RVI-NEXT: vadd.vv v8, v9, v8 959; RVI-NEXT: vsrl.vi v9, v8, 4 960; RVI-NEXT: vadd.vv v8, v8, v9 961; RVI-NEXT: vand.vx v8, v8, a1 962; RVI-NEXT: lui a1, 4112 963; RVI-NEXT: addi a1, a1, 257 964; RVI-NEXT: vmul.vx v8, v8, a1 965; RVI-NEXT: vsrl.vi v8, v8, 24 966; RVI-NEXT: vse32.v v8, (a0) 967; RVI-NEXT: ret 968; 969; RVF-LABEL: ctlz_zero_undef_v4i32: 970; RVF: # %bb.0: 971; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma 972; RVF-NEXT: vle32.v v8, (a0) 973; RVF-NEXT: fsrmi a1, 1 974; RVF-NEXT: vfcvt.f.xu.v v8, v8 975; RVF-NEXT: fsrm a1 976; RVF-NEXT: vsrl.vi v8, v8, 23 977; RVF-NEXT: li a1, 158 978; RVF-NEXT: vrsub.vx v8, v8, a1 979; RVF-NEXT: vse32.v v8, (a0) 980; RVF-NEXT: ret 981; 982; RVD-LABEL: ctlz_zero_undef_v4i32: 983; RVD: # %bb.0: 984; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma 985; RVD-NEXT: vle32.v v8, (a0) 986; RVD-NEXT: li a1, 52 987; RVD-NEXT: vfwcvt.f.xu.v v10, v8 988; RVD-NEXT: vnsrl.wx v8, v10, a1 989; RVD-NEXT: li a1, 1054 990; RVD-NEXT: vrsub.vx v8, v8, a1 991; RVD-NEXT: vse32.v v8, (a0) 992; RVD-NEXT: ret 993; 994; ZVBB-LABEL: ctlz_zero_undef_v4i32: 995; ZVBB: # %bb.0: 996; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 997; ZVBB-NEXT: vle32.v v8, (a0) 998; ZVBB-NEXT: vclz.v v8, v8 999; ZVBB-NEXT: vse32.v v8, (a0) 1000; ZVBB-NEXT: ret 1001 %a = load <4 x i32>, ptr %x 1002 %b = load <4 x i32>, ptr %y 1003 %c = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) 1004 store <4 x i32> %c, ptr %x 1005 ret void 1006} 1007 1008define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { 1009; RV32I-LABEL: ctlz_zero_undef_v2i64: 1010; RV32I: # %bb.0: 1011; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1012; RV32I-NEXT: vle64.v v8, (a0) 1013; RV32I-NEXT: lui a1, 349525 1014; RV32I-NEXT: addi a1, a1, 1365 1015; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1016; RV32I-NEXT: vmv.v.x v9, a1 1017; RV32I-NEXT: li a1, 32 1018; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1019; RV32I-NEXT: vsrl.vi v10, v8, 1 1020; RV32I-NEXT: vor.vv v8, v8, v10 1021; RV32I-NEXT: vsrl.vi v10, v8, 2 1022; RV32I-NEXT: vor.vv v8, v8, v10 1023; RV32I-NEXT: vsrl.vi v10, v8, 4 1024; RV32I-NEXT: vor.vv v8, v8, v10 1025; RV32I-NEXT: vsrl.vi v10, v8, 8 1026; RV32I-NEXT: vor.vv v8, v8, v10 1027; RV32I-NEXT: vsrl.vi v10, v8, 16 1028; RV32I-NEXT: vor.vv v8, v8, v10 1029; RV32I-NEXT: vsrl.vx v10, v8, a1 1030; RV32I-NEXT: lui a1, 209715 1031; RV32I-NEXT: addi a1, a1, 819 1032; RV32I-NEXT: vor.vv v8, v8, v10 1033; RV32I-NEXT: vnot.v v8, v8 1034; RV32I-NEXT: vsrl.vi v10, v8, 1 1035; RV32I-NEXT: vand.vv v9, v10, v9 1036; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1037; RV32I-NEXT: vmv.v.x v10, a1 1038; RV32I-NEXT: lui a1, 61681 1039; RV32I-NEXT: addi a1, a1, -241 1040; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1041; RV32I-NEXT: vsub.vv v8, v8, v9 1042; RV32I-NEXT: vand.vv v9, v8, v10 1043; RV32I-NEXT: vsrl.vi v8, v8, 2 1044; RV32I-NEXT: vand.vv v8, v8, v10 1045; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1046; RV32I-NEXT: vmv.v.x v10, a1 1047; RV32I-NEXT: lui a1, 4112 1048; RV32I-NEXT: addi a1, a1, 257 1049; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1050; RV32I-NEXT: vadd.vv v8, v9, v8 1051; RV32I-NEXT: vsrl.vi v9, v8, 4 1052; RV32I-NEXT: vadd.vv v8, v8, v9 1053; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1054; RV32I-NEXT: vmv.v.x v9, a1 1055; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1056; RV32I-NEXT: vand.vv v8, v8, v10 1057; RV32I-NEXT: vmul.vv v8, v8, v9 1058; RV32I-NEXT: li a1, 56 1059; RV32I-NEXT: vsrl.vx v8, v8, a1 1060; RV32I-NEXT: vse64.v v8, (a0) 1061; RV32I-NEXT: ret 1062; 1063; RV64I-LABEL: ctlz_zero_undef_v2i64: 1064; RV64I: # %bb.0: 1065; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1066; RV64I-NEXT: vle64.v v8, (a0) 1067; RV64I-NEXT: lui a1, 349525 1068; RV64I-NEXT: lui a2, 209715 1069; RV64I-NEXT: lui a3, 61681 1070; RV64I-NEXT: lui a4, 4112 1071; RV64I-NEXT: addiw a1, a1, 1365 1072; RV64I-NEXT: addiw a2, a2, 819 1073; RV64I-NEXT: addiw a3, a3, -241 1074; RV64I-NEXT: addiw a4, a4, 257 1075; RV64I-NEXT: slli a5, a1, 32 1076; RV64I-NEXT: add a1, a1, a5 1077; RV64I-NEXT: slli a5, a2, 32 1078; RV64I-NEXT: add a2, a2, a5 1079; RV64I-NEXT: slli a5, a3, 32 1080; RV64I-NEXT: add a3, a3, a5 1081; RV64I-NEXT: slli a5, a4, 32 1082; RV64I-NEXT: add a4, a4, a5 1083; RV64I-NEXT: li a5, 32 1084; RV64I-NEXT: vsrl.vi v9, v8, 1 1085; RV64I-NEXT: vor.vv v8, v8, v9 1086; RV64I-NEXT: vsrl.vi v9, v8, 2 1087; RV64I-NEXT: vor.vv v8, v8, v9 1088; RV64I-NEXT: vsrl.vi v9, v8, 4 1089; RV64I-NEXT: vor.vv v8, v8, v9 1090; RV64I-NEXT: vsrl.vi v9, v8, 8 1091; RV64I-NEXT: vor.vv v8, v8, v9 1092; RV64I-NEXT: vsrl.vi v9, v8, 16 1093; RV64I-NEXT: vor.vv v8, v8, v9 1094; RV64I-NEXT: vsrl.vx v9, v8, a5 1095; RV64I-NEXT: vor.vv v8, v8, v9 1096; RV64I-NEXT: vnot.v v8, v8 1097; RV64I-NEXT: vsrl.vi v9, v8, 1 1098; RV64I-NEXT: vand.vx v9, v9, a1 1099; RV64I-NEXT: vsub.vv v8, v8, v9 1100; RV64I-NEXT: vand.vx v9, v8, a2 1101; RV64I-NEXT: vsrl.vi v8, v8, 2 1102; RV64I-NEXT: vand.vx v8, v8, a2 1103; RV64I-NEXT: vadd.vv v8, v9, v8 1104; RV64I-NEXT: vsrl.vi v9, v8, 4 1105; RV64I-NEXT: vadd.vv v8, v8, v9 1106; RV64I-NEXT: vand.vx v8, v8, a3 1107; RV64I-NEXT: vmul.vx v8, v8, a4 1108; RV64I-NEXT: li a1, 56 1109; RV64I-NEXT: vsrl.vx v8, v8, a1 1110; RV64I-NEXT: vse64.v v8, (a0) 1111; RV64I-NEXT: ret 1112; 1113; RVF-LABEL: ctlz_zero_undef_v2i64: 1114; RVF: # %bb.0: 1115; RVF-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1116; RVF-NEXT: vle64.v v8, (a0) 1117; RVF-NEXT: li a1, 190 1118; RVF-NEXT: vmv.v.x v9, a1 1119; RVF-NEXT: fsrmi a1, 1 1120; RVF-NEXT: vfncvt.f.xu.w v10, v8 1121; RVF-NEXT: fsrm a1 1122; RVF-NEXT: vsrl.vi v8, v10, 23 1123; RVF-NEXT: vwsubu.vv v10, v9, v8 1124; RVF-NEXT: vse64.v v10, (a0) 1125; RVF-NEXT: ret 1126; 1127; RVD-LABEL: ctlz_zero_undef_v2i64: 1128; RVD: # %bb.0: 1129; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1130; RVD-NEXT: vle64.v v8, (a0) 1131; RVD-NEXT: fsrmi a1, 1 1132; RVD-NEXT: vfcvt.f.xu.v v8, v8 1133; RVD-NEXT: fsrm a1 1134; RVD-NEXT: li a1, 52 1135; RVD-NEXT: vsrl.vx v8, v8, a1 1136; RVD-NEXT: li a1, 1086 1137; RVD-NEXT: vrsub.vx v8, v8, a1 1138; RVD-NEXT: vse64.v v8, (a0) 1139; RVD-NEXT: ret 1140; 1141; ZVBB-LABEL: ctlz_zero_undef_v2i64: 1142; ZVBB: # %bb.0: 1143; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1144; ZVBB-NEXT: vle64.v v8, (a0) 1145; ZVBB-NEXT: vclz.v v8, v8 1146; ZVBB-NEXT: vse64.v v8, (a0) 1147; ZVBB-NEXT: ret 1148 %a = load <2 x i64>, ptr %x 1149 %b = load <2 x i64>, ptr %y 1150 %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) 1151 store <2 x i64> %c, ptr %x 1152 ret void 1153} 1154 1155define void @ctlz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { 1156; RVI-LABEL: ctlz_zero_undef_v32i8: 1157; RVI: # %bb.0: 1158; RVI-NEXT: li a1, 32 1159; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma 1160; RVI-NEXT: vle8.v v8, (a0) 1161; RVI-NEXT: li a1, 85 1162; RVI-NEXT: vsrl.vi v10, v8, 1 1163; RVI-NEXT: vor.vv v8, v8, v10 1164; RVI-NEXT: vsrl.vi v10, v8, 2 1165; RVI-NEXT: vor.vv v8, v8, v10 1166; RVI-NEXT: vsrl.vi v10, v8, 4 1167; RVI-NEXT: vor.vv v8, v8, v10 1168; RVI-NEXT: vnot.v v8, v8 1169; RVI-NEXT: vsrl.vi v10, v8, 1 1170; RVI-NEXT: vand.vx v10, v10, a1 1171; RVI-NEXT: li a1, 51 1172; RVI-NEXT: vsub.vv v8, v8, v10 1173; RVI-NEXT: vand.vx v10, v8, a1 1174; RVI-NEXT: vsrl.vi v8, v8, 2 1175; RVI-NEXT: vand.vx v8, v8, a1 1176; RVI-NEXT: vadd.vv v8, v10, v8 1177; RVI-NEXT: vsrl.vi v10, v8, 4 1178; RVI-NEXT: vadd.vv v8, v8, v10 1179; RVI-NEXT: vand.vi v8, v8, 15 1180; RVI-NEXT: vse8.v v8, (a0) 1181; RVI-NEXT: ret 1182; 1183; RVF-LABEL: ctlz_zero_undef_v32i8: 1184; RVF: # %bb.0: 1185; RVF-NEXT: li a1, 32 1186; RVF-NEXT: vsetvli zero, a1, e16, m4, ta, ma 1187; RVF-NEXT: vle8.v v8, (a0) 1188; RVF-NEXT: vzext.vf2 v12, v8 1189; RVF-NEXT: vfwcvt.f.xu.v v16, v12 1190; RVF-NEXT: vnsrl.wi v8, v16, 23 1191; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma 1192; RVF-NEXT: vnsrl.wi v12, v8, 0 1193; RVF-NEXT: li a1, 134 1194; RVF-NEXT: vrsub.vx v8, v12, a1 1195; RVF-NEXT: vse8.v v8, (a0) 1196; RVF-NEXT: ret 1197; 1198; RVD-LABEL: ctlz_zero_undef_v32i8: 1199; RVD: # %bb.0: 1200; RVD-NEXT: li a1, 32 1201; RVD-NEXT: vsetvli zero, a1, e16, m4, ta, ma 1202; RVD-NEXT: vle8.v v8, (a0) 1203; RVD-NEXT: vzext.vf2 v12, v8 1204; RVD-NEXT: vfwcvt.f.xu.v v16, v12 1205; RVD-NEXT: vnsrl.wi v8, v16, 23 1206; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma 1207; RVD-NEXT: vnsrl.wi v12, v8, 0 1208; RVD-NEXT: li a1, 134 1209; RVD-NEXT: vrsub.vx v8, v12, a1 1210; RVD-NEXT: vse8.v v8, (a0) 1211; RVD-NEXT: ret 1212; 1213; ZVBB-LABEL: ctlz_zero_undef_v32i8: 1214; ZVBB: # %bb.0: 1215; ZVBB-NEXT: li a1, 32 1216; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma 1217; ZVBB-NEXT: vle8.v v8, (a0) 1218; ZVBB-NEXT: vclz.v v8, v8 1219; ZVBB-NEXT: vse8.v v8, (a0) 1220; ZVBB-NEXT: ret 1221 %a = load <32 x i8>, ptr %x 1222 %b = load <32 x i8>, ptr %y 1223 %c = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 true) 1224 store <32 x i8> %c, ptr %x 1225 ret void 1226} 1227 1228define void @ctlz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { 1229; RVI-LABEL: ctlz_zero_undef_v16i16: 1230; RVI: # %bb.0: 1231; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1232; RVI-NEXT: vle16.v v8, (a0) 1233; RVI-NEXT: lui a1, 5 1234; RVI-NEXT: addi a1, a1, 1365 1235; RVI-NEXT: vsrl.vi v10, v8, 1 1236; RVI-NEXT: vor.vv v8, v8, v10 1237; RVI-NEXT: vsrl.vi v10, v8, 2 1238; RVI-NEXT: vor.vv v8, v8, v10 1239; RVI-NEXT: vsrl.vi v10, v8, 4 1240; RVI-NEXT: vor.vv v8, v8, v10 1241; RVI-NEXT: vsrl.vi v10, v8, 8 1242; RVI-NEXT: vor.vv v8, v8, v10 1243; RVI-NEXT: vnot.v v8, v8 1244; RVI-NEXT: vsrl.vi v10, v8, 1 1245; RVI-NEXT: vand.vx v10, v10, a1 1246; RVI-NEXT: lui a1, 3 1247; RVI-NEXT: addi a1, a1, 819 1248; RVI-NEXT: vsub.vv v8, v8, v10 1249; RVI-NEXT: vand.vx v10, v8, a1 1250; RVI-NEXT: vsrl.vi v8, v8, 2 1251; RVI-NEXT: vand.vx v8, v8, a1 1252; RVI-NEXT: lui a1, 1 1253; RVI-NEXT: addi a1, a1, -241 1254; RVI-NEXT: vadd.vv v8, v10, v8 1255; RVI-NEXT: vsrl.vi v10, v8, 4 1256; RVI-NEXT: vadd.vv v8, v8, v10 1257; RVI-NEXT: vand.vx v8, v8, a1 1258; RVI-NEXT: li a1, 257 1259; RVI-NEXT: vmul.vx v8, v8, a1 1260; RVI-NEXT: vsrl.vi v8, v8, 8 1261; RVI-NEXT: vse16.v v8, (a0) 1262; RVI-NEXT: ret 1263; 1264; RVF-LABEL: ctlz_zero_undef_v16i16: 1265; RVF: # %bb.0: 1266; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1267; RVF-NEXT: vle16.v v8, (a0) 1268; RVF-NEXT: vfwcvt.f.xu.v v12, v8 1269; RVF-NEXT: vnsrl.wi v8, v12, 23 1270; RVF-NEXT: li a1, 142 1271; RVF-NEXT: vrsub.vx v8, v8, a1 1272; RVF-NEXT: vse16.v v8, (a0) 1273; RVF-NEXT: ret 1274; 1275; RVD-LABEL: ctlz_zero_undef_v16i16: 1276; RVD: # %bb.0: 1277; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1278; RVD-NEXT: vle16.v v8, (a0) 1279; RVD-NEXT: vfwcvt.f.xu.v v12, v8 1280; RVD-NEXT: vnsrl.wi v8, v12, 23 1281; RVD-NEXT: li a1, 142 1282; RVD-NEXT: vrsub.vx v8, v8, a1 1283; RVD-NEXT: vse16.v v8, (a0) 1284; RVD-NEXT: ret 1285; 1286; ZVBB-LABEL: ctlz_zero_undef_v16i16: 1287; ZVBB: # %bb.0: 1288; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1289; ZVBB-NEXT: vle16.v v8, (a0) 1290; ZVBB-NEXT: vclz.v v8, v8 1291; ZVBB-NEXT: vse16.v v8, (a0) 1292; ZVBB-NEXT: ret 1293 %a = load <16 x i16>, ptr %x 1294 %b = load <16 x i16>, ptr %y 1295 %c = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 true) 1296 store <16 x i16> %c, ptr %x 1297 ret void 1298} 1299 1300define void @ctlz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { 1301; RVI-LABEL: ctlz_zero_undef_v8i32: 1302; RVI: # %bb.0: 1303; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1304; RVI-NEXT: vle32.v v8, (a0) 1305; RVI-NEXT: lui a1, 349525 1306; RVI-NEXT: addi a1, a1, 1365 1307; RVI-NEXT: vsrl.vi v10, v8, 1 1308; RVI-NEXT: vor.vv v8, v8, v10 1309; RVI-NEXT: vsrl.vi v10, v8, 2 1310; RVI-NEXT: vor.vv v8, v8, v10 1311; RVI-NEXT: vsrl.vi v10, v8, 4 1312; RVI-NEXT: vor.vv v8, v8, v10 1313; RVI-NEXT: vsrl.vi v10, v8, 8 1314; RVI-NEXT: vor.vv v8, v8, v10 1315; RVI-NEXT: vsrl.vi v10, v8, 16 1316; RVI-NEXT: vor.vv v8, v8, v10 1317; RVI-NEXT: vnot.v v8, v8 1318; RVI-NEXT: vsrl.vi v10, v8, 1 1319; RVI-NEXT: vand.vx v10, v10, a1 1320; RVI-NEXT: lui a1, 209715 1321; RVI-NEXT: addi a1, a1, 819 1322; RVI-NEXT: vsub.vv v8, v8, v10 1323; RVI-NEXT: vand.vx v10, v8, a1 1324; RVI-NEXT: vsrl.vi v8, v8, 2 1325; RVI-NEXT: vand.vx v8, v8, a1 1326; RVI-NEXT: lui a1, 61681 1327; RVI-NEXT: addi a1, a1, -241 1328; RVI-NEXT: vadd.vv v8, v10, v8 1329; RVI-NEXT: vsrl.vi v10, v8, 4 1330; RVI-NEXT: vadd.vv v8, v8, v10 1331; RVI-NEXT: vand.vx v8, v8, a1 1332; RVI-NEXT: lui a1, 4112 1333; RVI-NEXT: addi a1, a1, 257 1334; RVI-NEXT: vmul.vx v8, v8, a1 1335; RVI-NEXT: vsrl.vi v8, v8, 24 1336; RVI-NEXT: vse32.v v8, (a0) 1337; RVI-NEXT: ret 1338; 1339; RVF-LABEL: ctlz_zero_undef_v8i32: 1340; RVF: # %bb.0: 1341; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1342; RVF-NEXT: vle32.v v8, (a0) 1343; RVF-NEXT: fsrmi a1, 1 1344; RVF-NEXT: vfcvt.f.xu.v v8, v8 1345; RVF-NEXT: fsrm a1 1346; RVF-NEXT: vsrl.vi v8, v8, 23 1347; RVF-NEXT: li a1, 158 1348; RVF-NEXT: vrsub.vx v8, v8, a1 1349; RVF-NEXT: vse32.v v8, (a0) 1350; RVF-NEXT: ret 1351; 1352; RVD-LABEL: ctlz_zero_undef_v8i32: 1353; RVD: # %bb.0: 1354; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1355; RVD-NEXT: vle32.v v8, (a0) 1356; RVD-NEXT: li a1, 52 1357; RVD-NEXT: vfwcvt.f.xu.v v12, v8 1358; RVD-NEXT: vnsrl.wx v8, v12, a1 1359; RVD-NEXT: li a1, 1054 1360; RVD-NEXT: vrsub.vx v8, v8, a1 1361; RVD-NEXT: vse32.v v8, (a0) 1362; RVD-NEXT: ret 1363; 1364; ZVBB-LABEL: ctlz_zero_undef_v8i32: 1365; ZVBB: # %bb.0: 1366; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1367; ZVBB-NEXT: vle32.v v8, (a0) 1368; ZVBB-NEXT: vclz.v v8, v8 1369; ZVBB-NEXT: vse32.v v8, (a0) 1370; ZVBB-NEXT: ret 1371 %a = load <8 x i32>, ptr %x 1372 %b = load <8 x i32>, ptr %y 1373 %c = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 true) 1374 store <8 x i32> %c, ptr %x 1375 ret void 1376} 1377 1378define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { 1379; RV32I-LABEL: ctlz_zero_undef_v4i64: 1380; RV32I: # %bb.0: 1381; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1382; RV32I-NEXT: vle64.v v8, (a0) 1383; RV32I-NEXT: lui a1, 349525 1384; RV32I-NEXT: addi a1, a1, 1365 1385; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1386; RV32I-NEXT: vmv.v.x v10, a1 1387; RV32I-NEXT: li a1, 32 1388; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1389; RV32I-NEXT: vsrl.vi v12, v8, 1 1390; RV32I-NEXT: vor.vv v8, v8, v12 1391; RV32I-NEXT: vsrl.vi v12, v8, 2 1392; RV32I-NEXT: vor.vv v8, v8, v12 1393; RV32I-NEXT: vsrl.vi v12, v8, 4 1394; RV32I-NEXT: vor.vv v8, v8, v12 1395; RV32I-NEXT: vsrl.vi v12, v8, 8 1396; RV32I-NEXT: vor.vv v8, v8, v12 1397; RV32I-NEXT: vsrl.vi v12, v8, 16 1398; RV32I-NEXT: vor.vv v8, v8, v12 1399; RV32I-NEXT: vsrl.vx v12, v8, a1 1400; RV32I-NEXT: lui a1, 209715 1401; RV32I-NEXT: addi a1, a1, 819 1402; RV32I-NEXT: vor.vv v8, v8, v12 1403; RV32I-NEXT: vnot.v v8, v8 1404; RV32I-NEXT: vsrl.vi v12, v8, 1 1405; RV32I-NEXT: vand.vv v10, v12, v10 1406; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1407; RV32I-NEXT: vmv.v.x v12, a1 1408; RV32I-NEXT: lui a1, 61681 1409; RV32I-NEXT: addi a1, a1, -241 1410; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1411; RV32I-NEXT: vsub.vv v8, v8, v10 1412; RV32I-NEXT: vand.vv v10, v8, v12 1413; RV32I-NEXT: vsrl.vi v8, v8, 2 1414; RV32I-NEXT: vand.vv v8, v8, v12 1415; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1416; RV32I-NEXT: vmv.v.x v12, a1 1417; RV32I-NEXT: lui a1, 4112 1418; RV32I-NEXT: addi a1, a1, 257 1419; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1420; RV32I-NEXT: vadd.vv v8, v10, v8 1421; RV32I-NEXT: vsrl.vi v10, v8, 4 1422; RV32I-NEXT: vadd.vv v8, v8, v10 1423; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1424; RV32I-NEXT: vmv.v.x v10, a1 1425; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1426; RV32I-NEXT: vand.vv v8, v8, v12 1427; RV32I-NEXT: vmul.vv v8, v8, v10 1428; RV32I-NEXT: li a1, 56 1429; RV32I-NEXT: vsrl.vx v8, v8, a1 1430; RV32I-NEXT: vse64.v v8, (a0) 1431; RV32I-NEXT: ret 1432; 1433; RV64I-LABEL: ctlz_zero_undef_v4i64: 1434; RV64I: # %bb.0: 1435; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1436; RV64I-NEXT: vle64.v v8, (a0) 1437; RV64I-NEXT: lui a1, 349525 1438; RV64I-NEXT: lui a2, 209715 1439; RV64I-NEXT: lui a3, 61681 1440; RV64I-NEXT: lui a4, 4112 1441; RV64I-NEXT: addiw a1, a1, 1365 1442; RV64I-NEXT: addiw a2, a2, 819 1443; RV64I-NEXT: addiw a3, a3, -241 1444; RV64I-NEXT: addiw a4, a4, 257 1445; RV64I-NEXT: slli a5, a1, 32 1446; RV64I-NEXT: add a1, a1, a5 1447; RV64I-NEXT: slli a5, a2, 32 1448; RV64I-NEXT: add a2, a2, a5 1449; RV64I-NEXT: slli a5, a3, 32 1450; RV64I-NEXT: add a3, a3, a5 1451; RV64I-NEXT: slli a5, a4, 32 1452; RV64I-NEXT: add a4, a4, a5 1453; RV64I-NEXT: li a5, 32 1454; RV64I-NEXT: vsrl.vi v10, v8, 1 1455; RV64I-NEXT: vor.vv v8, v8, v10 1456; RV64I-NEXT: vsrl.vi v10, v8, 2 1457; RV64I-NEXT: vor.vv v8, v8, v10 1458; RV64I-NEXT: vsrl.vi v10, v8, 4 1459; RV64I-NEXT: vor.vv v8, v8, v10 1460; RV64I-NEXT: vsrl.vi v10, v8, 8 1461; RV64I-NEXT: vor.vv v8, v8, v10 1462; RV64I-NEXT: vsrl.vi v10, v8, 16 1463; RV64I-NEXT: vor.vv v8, v8, v10 1464; RV64I-NEXT: vsrl.vx v10, v8, a5 1465; RV64I-NEXT: vor.vv v8, v8, v10 1466; RV64I-NEXT: vnot.v v8, v8 1467; RV64I-NEXT: vsrl.vi v10, v8, 1 1468; RV64I-NEXT: vand.vx v10, v10, a1 1469; RV64I-NEXT: vsub.vv v8, v8, v10 1470; RV64I-NEXT: vand.vx v10, v8, a2 1471; RV64I-NEXT: vsrl.vi v8, v8, 2 1472; RV64I-NEXT: vand.vx v8, v8, a2 1473; RV64I-NEXT: vadd.vv v8, v10, v8 1474; RV64I-NEXT: vsrl.vi v10, v8, 4 1475; RV64I-NEXT: vadd.vv v8, v8, v10 1476; RV64I-NEXT: vand.vx v8, v8, a3 1477; RV64I-NEXT: vmul.vx v8, v8, a4 1478; RV64I-NEXT: li a1, 56 1479; RV64I-NEXT: vsrl.vx v8, v8, a1 1480; RV64I-NEXT: vse64.v v8, (a0) 1481; RV64I-NEXT: ret 1482; 1483; RVF-LABEL: ctlz_zero_undef_v4i64: 1484; RVF: # %bb.0: 1485; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1486; RVF-NEXT: vle64.v v8, (a0) 1487; RVF-NEXT: li a1, 190 1488; RVF-NEXT: vmv.v.x v10, a1 1489; RVF-NEXT: fsrmi a1, 1 1490; RVF-NEXT: vfncvt.f.xu.w v11, v8 1491; RVF-NEXT: fsrm a1 1492; RVF-NEXT: vsrl.vi v8, v11, 23 1493; RVF-NEXT: vwsubu.vv v12, v10, v8 1494; RVF-NEXT: vse64.v v12, (a0) 1495; RVF-NEXT: ret 1496; 1497; RVD-LABEL: ctlz_zero_undef_v4i64: 1498; RVD: # %bb.0: 1499; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1500; RVD-NEXT: vle64.v v8, (a0) 1501; RVD-NEXT: fsrmi a1, 1 1502; RVD-NEXT: vfcvt.f.xu.v v8, v8 1503; RVD-NEXT: fsrm a1 1504; RVD-NEXT: li a1, 52 1505; RVD-NEXT: vsrl.vx v8, v8, a1 1506; RVD-NEXT: li a1, 1086 1507; RVD-NEXT: vrsub.vx v8, v8, a1 1508; RVD-NEXT: vse64.v v8, (a0) 1509; RVD-NEXT: ret 1510; 1511; ZVBB-LABEL: ctlz_zero_undef_v4i64: 1512; ZVBB: # %bb.0: 1513; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1514; ZVBB-NEXT: vle64.v v8, (a0) 1515; ZVBB-NEXT: vclz.v v8, v8 1516; ZVBB-NEXT: vse64.v v8, (a0) 1517; ZVBB-NEXT: ret 1518 %a = load <4 x i64>, ptr %x 1519 %b = load <4 x i64>, ptr %y 1520 %c = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 true) 1521 store <4 x i64> %c, ptr %x 1522 ret void 1523} 1524;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1525; RV32D: {{.*}} 1526; RV32F: {{.*}} 1527; RV64D: {{.*}} 1528; RV64F: {{.*}} 1529