1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV32I 3; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVI,RV64I 4; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV32F 5; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+zvl128b,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVF,RV64F 6; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV32D 7; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RVD,RV64D 8; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 9; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 10 11define void @cttz_v16i8(ptr %x, ptr %y) nounwind { 12; RVI-LABEL: cttz_v16i8: 13; RVI: # %bb.0: 14; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma 15; RVI-NEXT: vle8.v v8, (a0) 16; RVI-NEXT: li a1, 1 17; RVI-NEXT: vsub.vx v9, v8, a1 18; RVI-NEXT: li a1, 85 19; RVI-NEXT: vnot.v v8, v8 20; RVI-NEXT: vand.vv v8, v8, v9 21; RVI-NEXT: vsrl.vi v9, v8, 1 22; RVI-NEXT: vand.vx v9, v9, a1 23; RVI-NEXT: li a1, 51 24; RVI-NEXT: vsub.vv v8, v8, v9 25; RVI-NEXT: vand.vx v9, v8, a1 26; RVI-NEXT: vsrl.vi v8, v8, 2 27; RVI-NEXT: vand.vx v8, v8, a1 28; RVI-NEXT: vadd.vv v8, v9, v8 29; RVI-NEXT: vsrl.vi v9, v8, 4 30; RVI-NEXT: vadd.vv v8, v8, v9 31; RVI-NEXT: vand.vi v8, v8, 15 32; RVI-NEXT: vse8.v v8, (a0) 33; RVI-NEXT: ret 34; 35; RVF-LABEL: cttz_v16i8: 36; RVF: # %bb.0: 37; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma 38; RVF-NEXT: vle8.v v8, (a0) 39; RVF-NEXT: li a1, 127 40; RVF-NEXT: vrsub.vi v9, v8, 0 41; RVF-NEXT: vand.vv v9, v8, v9 42; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma 43; RVF-NEXT: vzext.vf2 v10, v9 44; RVF-NEXT: vfwcvt.f.xu.v v12, v10 45; RVF-NEXT: vnsrl.wi v10, v12, 23 46; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma 47; RVF-NEXT: vnsrl.wi v9, v10, 0 48; RVF-NEXT: vmseq.vi v0, v8, 0 49; RVF-NEXT: vsub.vx v8, v9, a1 50; RVF-NEXT: vmerge.vim v8, v8, 8, v0 51; RVF-NEXT: vse8.v v8, (a0) 52; RVF-NEXT: ret 53; 54; RVD-LABEL: cttz_v16i8: 55; RVD: # %bb.0: 56; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma 57; RVD-NEXT: vle8.v v8, (a0) 58; RVD-NEXT: li a1, 127 59; RVD-NEXT: vrsub.vi v9, v8, 0 60; RVD-NEXT: vand.vv v9, v8, v9 61; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma 62; RVD-NEXT: vzext.vf2 v10, v9 63; RVD-NEXT: vfwcvt.f.xu.v v12, v10 64; RVD-NEXT: vnsrl.wi v10, v12, 23 65; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma 66; RVD-NEXT: vnsrl.wi v9, v10, 0 67; RVD-NEXT: vmseq.vi v0, v8, 0 68; RVD-NEXT: vsub.vx v8, v9, a1 69; RVD-NEXT: vmerge.vim v8, v8, 8, v0 70; RVD-NEXT: vse8.v v8, (a0) 71; RVD-NEXT: ret 72; 73; ZVBB-LABEL: cttz_v16i8: 74; ZVBB: # %bb.0: 75; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 76; ZVBB-NEXT: vle8.v v8, (a0) 77; ZVBB-NEXT: vctz.v v8, v8 78; ZVBB-NEXT: vse8.v v8, (a0) 79; ZVBB-NEXT: ret 80 %a = load <16 x i8>, ptr %x 81 %b = load <16 x i8>, ptr %y 82 %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) 83 store <16 x i8> %c, ptr %x 84 ret void 85} 86declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 87 88define void @cttz_v8i16(ptr %x, ptr %y) nounwind { 89; RVI-LABEL: cttz_v8i16: 90; RVI: # %bb.0: 91; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma 92; RVI-NEXT: vle16.v v8, (a0) 93; RVI-NEXT: li a1, 1 94; RVI-NEXT: vsub.vx v9, v8, a1 95; RVI-NEXT: lui a1, 5 96; RVI-NEXT: addi a1, a1, 1365 97; RVI-NEXT: vnot.v v8, v8 98; RVI-NEXT: vand.vv v8, v8, v9 99; RVI-NEXT: vsrl.vi v9, v8, 1 100; RVI-NEXT: vand.vx v9, v9, a1 101; RVI-NEXT: lui a1, 3 102; RVI-NEXT: addi a1, a1, 819 103; RVI-NEXT: vsub.vv v8, v8, v9 104; RVI-NEXT: vand.vx v9, v8, a1 105; RVI-NEXT: vsrl.vi v8, v8, 2 106; RVI-NEXT: vand.vx v8, v8, a1 107; RVI-NEXT: lui a1, 1 108; RVI-NEXT: addi a1, a1, -241 109; RVI-NEXT: vadd.vv v8, v9, v8 110; RVI-NEXT: vsrl.vi v9, v8, 4 111; RVI-NEXT: vadd.vv v8, v8, v9 112; RVI-NEXT: vand.vx v8, v8, a1 113; RVI-NEXT: li a1, 257 114; RVI-NEXT: vmul.vx v8, v8, a1 115; RVI-NEXT: vsrl.vi v8, v8, 8 116; RVI-NEXT: vse16.v v8, (a0) 117; RVI-NEXT: ret 118; 119; RVF-LABEL: cttz_v8i16: 120; RVF: # %bb.0: 121; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma 122; RVF-NEXT: vle16.v v8, (a0) 123; RVF-NEXT: li a1, 127 124; RVF-NEXT: vrsub.vi v9, v8, 0 125; RVF-NEXT: vmseq.vi v0, v8, 0 126; RVF-NEXT: vand.vv v8, v8, v9 127; RVF-NEXT: vfwcvt.f.xu.v v10, v8 128; RVF-NEXT: vnsrl.wi v8, v10, 23 129; RVF-NEXT: vsub.vx v8, v8, a1 130; RVF-NEXT: li a1, 16 131; RVF-NEXT: vmerge.vxm v8, v8, a1, v0 132; RVF-NEXT: vse16.v v8, (a0) 133; RVF-NEXT: ret 134; 135; RVD-LABEL: cttz_v8i16: 136; RVD: # %bb.0: 137; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma 138; RVD-NEXT: vle16.v v8, (a0) 139; RVD-NEXT: li a1, 127 140; RVD-NEXT: vrsub.vi v9, v8, 0 141; RVD-NEXT: vmseq.vi v0, v8, 0 142; RVD-NEXT: vand.vv v8, v8, v9 143; RVD-NEXT: vfwcvt.f.xu.v v10, v8 144; RVD-NEXT: vnsrl.wi v8, v10, 23 145; RVD-NEXT: vsub.vx v8, v8, a1 146; RVD-NEXT: li a1, 16 147; RVD-NEXT: vmerge.vxm v8, v8, a1, v0 148; RVD-NEXT: vse16.v v8, (a0) 149; RVD-NEXT: ret 150; 151; ZVBB-LABEL: cttz_v8i16: 152; ZVBB: # %bb.0: 153; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 154; ZVBB-NEXT: vle16.v v8, (a0) 155; ZVBB-NEXT: vctz.v v8, v8 156; ZVBB-NEXT: vse16.v v8, (a0) 157; ZVBB-NEXT: ret 158 %a = load <8 x i16>, ptr %x 159 %b = load <8 x i16>, ptr %y 160 %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) 161 store <8 x i16> %c, ptr %x 162 ret void 163} 164declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 165 166define void @cttz_v4i32(ptr %x, ptr %y) nounwind { 167; RVI-LABEL: cttz_v4i32: 168; RVI: # %bb.0: 169; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma 170; RVI-NEXT: vle32.v v8, (a0) 171; RVI-NEXT: li a1, 1 172; RVI-NEXT: vsub.vx v9, v8, a1 173; RVI-NEXT: lui a1, 349525 174; RVI-NEXT: addi a1, a1, 1365 175; RVI-NEXT: vnot.v v8, v8 176; RVI-NEXT: vand.vv v8, v8, v9 177; RVI-NEXT: vsrl.vi v9, v8, 1 178; RVI-NEXT: vand.vx v9, v9, a1 179; RVI-NEXT: lui a1, 209715 180; RVI-NEXT: addi a1, a1, 819 181; RVI-NEXT: vsub.vv v8, v8, v9 182; RVI-NEXT: vand.vx v9, v8, a1 183; RVI-NEXT: vsrl.vi v8, v8, 2 184; RVI-NEXT: vand.vx v8, v8, a1 185; RVI-NEXT: lui a1, 61681 186; RVI-NEXT: addi a1, a1, -241 187; RVI-NEXT: vadd.vv v8, v9, v8 188; RVI-NEXT: vsrl.vi v9, v8, 4 189; RVI-NEXT: vadd.vv v8, v8, v9 190; RVI-NEXT: vand.vx v8, v8, a1 191; RVI-NEXT: lui a1, 4112 192; RVI-NEXT: addi a1, a1, 257 193; RVI-NEXT: vmul.vx v8, v8, a1 194; RVI-NEXT: vsrl.vi v8, v8, 24 195; RVI-NEXT: vse32.v v8, (a0) 196; RVI-NEXT: ret 197; 198; RVF-LABEL: cttz_v4i32: 199; RVF: # %bb.0: 200; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma 201; RVF-NEXT: vle32.v v8, (a0) 202; RVF-NEXT: fsrmi a1, 1 203; RVF-NEXT: vrsub.vi v9, v8, 0 204; RVF-NEXT: vand.vv v9, v8, v9 205; RVF-NEXT: vfcvt.f.xu.v v9, v9 206; RVF-NEXT: fsrm a1 207; RVF-NEXT: li a1, 127 208; RVF-NEXT: vmseq.vi v0, v8, 0 209; RVF-NEXT: vsrl.vi v8, v9, 23 210; RVF-NEXT: vsub.vx v8, v8, a1 211; RVF-NEXT: li a1, 32 212; RVF-NEXT: vmerge.vxm v8, v8, a1, v0 213; RVF-NEXT: vse32.v v8, (a0) 214; RVF-NEXT: ret 215; 216; RVD-LABEL: cttz_v4i32: 217; RVD: # %bb.0: 218; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma 219; RVD-NEXT: vle32.v v8, (a0) 220; RVD-NEXT: li a1, 52 221; RVD-NEXT: vrsub.vi v9, v8, 0 222; RVD-NEXT: vand.vv v9, v8, v9 223; RVD-NEXT: vfwcvt.f.xu.v v10, v9 224; RVD-NEXT: vnsrl.wx v9, v10, a1 225; RVD-NEXT: li a1, 1023 226; RVD-NEXT: vmseq.vi v0, v8, 0 227; RVD-NEXT: vsub.vx v8, v9, a1 228; RVD-NEXT: li a1, 32 229; RVD-NEXT: vmerge.vxm v8, v8, a1, v0 230; RVD-NEXT: vse32.v v8, (a0) 231; RVD-NEXT: ret 232; 233; ZVBB-LABEL: cttz_v4i32: 234; ZVBB: # %bb.0: 235; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 236; ZVBB-NEXT: vle32.v v8, (a0) 237; ZVBB-NEXT: vctz.v v8, v8 238; ZVBB-NEXT: vse32.v v8, (a0) 239; ZVBB-NEXT: ret 240 %a = load <4 x i32>, ptr %x 241 %b = load <4 x i32>, ptr %y 242 %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) 243 store <4 x i32> %c, ptr %x 244 ret void 245} 246declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 247 248define void @cttz_v2i64(ptr %x, ptr %y) nounwind { 249; RV32I-LABEL: cttz_v2i64: 250; RV32I: # %bb.0: 251; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 252; RV32I-NEXT: vle64.v v8, (a0) 253; RV32I-NEXT: lui a1, 349525 254; RV32I-NEXT: addi a1, a1, 1365 255; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 256; RV32I-NEXT: vmv.v.x v9, a1 257; RV32I-NEXT: li a1, 1 258; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 259; RV32I-NEXT: vsub.vx v10, v8, a1 260; RV32I-NEXT: lui a1, 209715 261; RV32I-NEXT: addi a1, a1, 819 262; RV32I-NEXT: vnot.v v8, v8 263; RV32I-NEXT: vand.vv v8, v8, v10 264; RV32I-NEXT: vsrl.vi v10, v8, 1 265; RV32I-NEXT: vand.vv v9, v10, v9 266; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 267; RV32I-NEXT: vmv.v.x v10, a1 268; RV32I-NEXT: lui a1, 61681 269; RV32I-NEXT: addi a1, a1, -241 270; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 271; RV32I-NEXT: vsub.vv v8, v8, v9 272; RV32I-NEXT: vand.vv v9, v8, v10 273; RV32I-NEXT: vsrl.vi v8, v8, 2 274; RV32I-NEXT: vand.vv v8, v8, v10 275; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 276; RV32I-NEXT: vmv.v.x v10, a1 277; RV32I-NEXT: lui a1, 4112 278; RV32I-NEXT: addi a1, a1, 257 279; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 280; RV32I-NEXT: vadd.vv v8, v9, v8 281; RV32I-NEXT: vsrl.vi v9, v8, 4 282; RV32I-NEXT: vadd.vv v8, v8, v9 283; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 284; RV32I-NEXT: vmv.v.x v9, a1 285; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 286; RV32I-NEXT: vand.vv v8, v8, v10 287; RV32I-NEXT: vmul.vv v8, v8, v9 288; RV32I-NEXT: li a1, 56 289; RV32I-NEXT: vsrl.vx v8, v8, a1 290; RV32I-NEXT: vse64.v v8, (a0) 291; RV32I-NEXT: ret 292; 293; RV64I-LABEL: cttz_v2i64: 294; RV64I: # %bb.0: 295; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 296; RV64I-NEXT: vle64.v v8, (a0) 297; RV64I-NEXT: lui a1, 349525 298; RV64I-NEXT: lui a2, 209715 299; RV64I-NEXT: lui a3, 61681 300; RV64I-NEXT: lui a4, 4112 301; RV64I-NEXT: addiw a1, a1, 1365 302; RV64I-NEXT: addiw a2, a2, 819 303; RV64I-NEXT: addiw a3, a3, -241 304; RV64I-NEXT: addiw a4, a4, 257 305; RV64I-NEXT: slli a5, a1, 32 306; RV64I-NEXT: add a1, a1, a5 307; RV64I-NEXT: slli a5, a2, 32 308; RV64I-NEXT: add a2, a2, a5 309; RV64I-NEXT: slli a5, a3, 32 310; RV64I-NEXT: add a3, a3, a5 311; RV64I-NEXT: slli a5, a4, 32 312; RV64I-NEXT: add a4, a4, a5 313; RV64I-NEXT: li a5, 1 314; RV64I-NEXT: vsub.vx v9, v8, a5 315; RV64I-NEXT: vnot.v v8, v8 316; RV64I-NEXT: vand.vv v8, v8, v9 317; RV64I-NEXT: vsrl.vi v9, v8, 1 318; RV64I-NEXT: vand.vx v9, v9, a1 319; RV64I-NEXT: vsub.vv v8, v8, v9 320; RV64I-NEXT: vand.vx v9, v8, a2 321; RV64I-NEXT: vsrl.vi v8, v8, 2 322; RV64I-NEXT: vand.vx v8, v8, a2 323; RV64I-NEXT: vadd.vv v8, v9, v8 324; RV64I-NEXT: vsrl.vi v9, v8, 4 325; RV64I-NEXT: vadd.vv v8, v8, v9 326; RV64I-NEXT: vand.vx v8, v8, a3 327; RV64I-NEXT: vmul.vx v8, v8, a4 328; RV64I-NEXT: li a1, 56 329; RV64I-NEXT: vsrl.vx v8, v8, a1 330; RV64I-NEXT: vse64.v v8, (a0) 331; RV64I-NEXT: ret 332; 333; RVF-LABEL: cttz_v2i64: 334; RVF: # %bb.0: 335; RVF-NEXT: vsetivli zero, 2, e64, m1, ta, ma 336; RVF-NEXT: vle64.v v8, (a0) 337; RVF-NEXT: fsrmi a1, 1 338; RVF-NEXT: vrsub.vi v9, v8, 0 339; RVF-NEXT: vand.vv v9, v8, v9 340; RVF-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 341; RVF-NEXT: vfncvt.f.xu.w v10, v9 342; RVF-NEXT: fsrm a1 343; RVF-NEXT: li a1, 127 344; RVF-NEXT: vsetvli zero, zero, e64, m1, ta, ma 345; RVF-NEXT: vmseq.vi v0, v8, 0 346; RVF-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 347; RVF-NEXT: vsrl.vi v8, v10, 23 348; RVF-NEXT: vwsubu.vx v9, v8, a1 349; RVF-NEXT: li a1, 64 350; RVF-NEXT: vsetvli zero, zero, e64, m1, ta, ma 351; RVF-NEXT: vmerge.vxm v8, v9, a1, v0 352; RVF-NEXT: vse64.v v8, (a0) 353; RVF-NEXT: ret 354; 355; RVD-LABEL: cttz_v2i64: 356; RVD: # %bb.0: 357; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma 358; RVD-NEXT: vle64.v v8, (a0) 359; RVD-NEXT: fsrmi a1, 1 360; RVD-NEXT: vrsub.vi v9, v8, 0 361; RVD-NEXT: vand.vv v9, v8, v9 362; RVD-NEXT: vfcvt.f.xu.v v9, v9 363; RVD-NEXT: fsrm a1 364; RVD-NEXT: li a1, 52 365; RVD-NEXT: vsrl.vx v9, v9, a1 366; RVD-NEXT: li a1, 1023 367; RVD-NEXT: vmseq.vi v0, v8, 0 368; RVD-NEXT: vsub.vx v8, v9, a1 369; RVD-NEXT: li a1, 64 370; RVD-NEXT: vmerge.vxm v8, v8, a1, v0 371; RVD-NEXT: vse64.v v8, (a0) 372; RVD-NEXT: ret 373; 374; ZVBB-LABEL: cttz_v2i64: 375; ZVBB: # %bb.0: 376; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 377; ZVBB-NEXT: vle64.v v8, (a0) 378; ZVBB-NEXT: vctz.v v8, v8 379; ZVBB-NEXT: vse64.v v8, (a0) 380; ZVBB-NEXT: ret 381 %a = load <2 x i64>, ptr %x 382 %b = load <2 x i64>, ptr %y 383 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) 384 store <2 x i64> %c, ptr %x 385 ret void 386} 387declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 388 389define void @cttz_v32i8(ptr %x, ptr %y) nounwind { 390; RVI-LABEL: cttz_v32i8: 391; RVI: # %bb.0: 392; RVI-NEXT: li a1, 32 393; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma 394; RVI-NEXT: vle8.v v8, (a0) 395; RVI-NEXT: li a1, 1 396; RVI-NEXT: vsub.vx v10, v8, a1 397; RVI-NEXT: li a1, 85 398; RVI-NEXT: vnot.v v8, v8 399; RVI-NEXT: vand.vv v8, v8, v10 400; RVI-NEXT: vsrl.vi v10, v8, 1 401; RVI-NEXT: vand.vx v10, v10, a1 402; RVI-NEXT: li a1, 51 403; RVI-NEXT: vsub.vv v8, v8, v10 404; RVI-NEXT: vand.vx v10, v8, a1 405; RVI-NEXT: vsrl.vi v8, v8, 2 406; RVI-NEXT: vand.vx v8, v8, a1 407; RVI-NEXT: vadd.vv v8, v10, v8 408; RVI-NEXT: vsrl.vi v10, v8, 4 409; RVI-NEXT: vadd.vv v8, v8, v10 410; RVI-NEXT: vand.vi v8, v8, 15 411; RVI-NEXT: vse8.v v8, (a0) 412; RVI-NEXT: ret 413; 414; RVF-LABEL: cttz_v32i8: 415; RVF: # %bb.0: 416; RVF-NEXT: li a1, 32 417; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma 418; RVF-NEXT: vle8.v v8, (a0) 419; RVF-NEXT: li a1, 127 420; RVF-NEXT: vrsub.vi v10, v8, 0 421; RVF-NEXT: vand.vv v10, v8, v10 422; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma 423; RVF-NEXT: vzext.vf2 v12, v10 424; RVF-NEXT: vfwcvt.f.xu.v v16, v12 425; RVF-NEXT: vnsrl.wi v12, v16, 23 426; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma 427; RVF-NEXT: vnsrl.wi v10, v12, 0 428; RVF-NEXT: vmseq.vi v0, v8, 0 429; RVF-NEXT: vsub.vx v8, v10, a1 430; RVF-NEXT: vmerge.vim v8, v8, 8, v0 431; RVF-NEXT: vse8.v v8, (a0) 432; RVF-NEXT: ret 433; 434; RVD-LABEL: cttz_v32i8: 435; RVD: # %bb.0: 436; RVD-NEXT: li a1, 32 437; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma 438; RVD-NEXT: vle8.v v8, (a0) 439; RVD-NEXT: li a1, 127 440; RVD-NEXT: vrsub.vi v10, v8, 0 441; RVD-NEXT: vand.vv v10, v8, v10 442; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma 443; RVD-NEXT: vzext.vf2 v12, v10 444; RVD-NEXT: vfwcvt.f.xu.v v16, v12 445; RVD-NEXT: vnsrl.wi v12, v16, 23 446; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma 447; RVD-NEXT: vnsrl.wi v10, v12, 0 448; RVD-NEXT: vmseq.vi v0, v8, 0 449; RVD-NEXT: vsub.vx v8, v10, a1 450; RVD-NEXT: vmerge.vim v8, v8, 8, v0 451; RVD-NEXT: vse8.v v8, (a0) 452; RVD-NEXT: ret 453; 454; ZVBB-LABEL: cttz_v32i8: 455; ZVBB: # %bb.0: 456; ZVBB-NEXT: li a1, 32 457; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma 458; ZVBB-NEXT: vle8.v v8, (a0) 459; ZVBB-NEXT: vctz.v v8, v8 460; ZVBB-NEXT: vse8.v v8, (a0) 461; ZVBB-NEXT: ret 462 %a = load <32 x i8>, ptr %x 463 %b = load <32 x i8>, ptr %y 464 %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 false) 465 store <32 x i8> %c, ptr %x 466 ret void 467} 468declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) 469 470define void @cttz_v16i16(ptr %x, ptr %y) nounwind { 471; RVI-LABEL: cttz_v16i16: 472; RVI: # %bb.0: 473; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma 474; RVI-NEXT: vle16.v v8, (a0) 475; RVI-NEXT: li a1, 1 476; RVI-NEXT: vsub.vx v10, v8, a1 477; RVI-NEXT: lui a1, 5 478; RVI-NEXT: addi a1, a1, 1365 479; RVI-NEXT: vnot.v v8, v8 480; RVI-NEXT: vand.vv v8, v8, v10 481; RVI-NEXT: vsrl.vi v10, v8, 1 482; RVI-NEXT: vand.vx v10, v10, a1 483; RVI-NEXT: lui a1, 3 484; RVI-NEXT: addi a1, a1, 819 485; RVI-NEXT: vsub.vv v8, v8, v10 486; RVI-NEXT: vand.vx v10, v8, a1 487; RVI-NEXT: vsrl.vi v8, v8, 2 488; RVI-NEXT: vand.vx v8, v8, a1 489; RVI-NEXT: lui a1, 1 490; RVI-NEXT: addi a1, a1, -241 491; RVI-NEXT: vadd.vv v8, v10, v8 492; RVI-NEXT: vsrl.vi v10, v8, 4 493; RVI-NEXT: vadd.vv v8, v8, v10 494; RVI-NEXT: vand.vx v8, v8, a1 495; RVI-NEXT: li a1, 257 496; RVI-NEXT: vmul.vx v8, v8, a1 497; RVI-NEXT: vsrl.vi v8, v8, 8 498; RVI-NEXT: vse16.v v8, (a0) 499; RVI-NEXT: ret 500; 501; RVF-LABEL: cttz_v16i16: 502; RVF: # %bb.0: 503; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma 504; RVF-NEXT: vle16.v v8, (a0) 505; RVF-NEXT: li a1, 127 506; RVF-NEXT: vrsub.vi v10, v8, 0 507; RVF-NEXT: vmseq.vi v0, v8, 0 508; RVF-NEXT: vand.vv v8, v8, v10 509; RVF-NEXT: vfwcvt.f.xu.v v12, v8 510; RVF-NEXT: vnsrl.wi v8, v12, 23 511; RVF-NEXT: vsub.vx v8, v8, a1 512; RVF-NEXT: li a1, 16 513; RVF-NEXT: vmerge.vxm v8, v8, a1, v0 514; RVF-NEXT: vse16.v v8, (a0) 515; RVF-NEXT: ret 516; 517; RVD-LABEL: cttz_v16i16: 518; RVD: # %bb.0: 519; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma 520; RVD-NEXT: vle16.v v8, (a0) 521; RVD-NEXT: li a1, 127 522; RVD-NEXT: vrsub.vi v10, v8, 0 523; RVD-NEXT: vmseq.vi v0, v8, 0 524; RVD-NEXT: vand.vv v8, v8, v10 525; RVD-NEXT: vfwcvt.f.xu.v v12, v8 526; RVD-NEXT: vnsrl.wi v8, v12, 23 527; RVD-NEXT: vsub.vx v8, v8, a1 528; RVD-NEXT: li a1, 16 529; RVD-NEXT: vmerge.vxm v8, v8, a1, v0 530; RVD-NEXT: vse16.v v8, (a0) 531; RVD-NEXT: ret 532; 533; ZVBB-LABEL: cttz_v16i16: 534; ZVBB: # %bb.0: 535; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 536; ZVBB-NEXT: vle16.v v8, (a0) 537; ZVBB-NEXT: vctz.v v8, v8 538; ZVBB-NEXT: vse16.v v8, (a0) 539; ZVBB-NEXT: ret 540 %a = load <16 x i16>, ptr %x 541 %b = load <16 x i16>, ptr %y 542 %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 false) 543 store <16 x i16> %c, ptr %x 544 ret void 545} 546declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) 547 548define void @cttz_v8i32(ptr %x, ptr %y) nounwind { 549; RVI-LABEL: cttz_v8i32: 550; RVI: # %bb.0: 551; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma 552; RVI-NEXT: vle32.v v8, (a0) 553; RVI-NEXT: li a1, 1 554; RVI-NEXT: vsub.vx v10, v8, a1 555; RVI-NEXT: lui a1, 349525 556; RVI-NEXT: addi a1, a1, 1365 557; RVI-NEXT: vnot.v v8, v8 558; RVI-NEXT: vand.vv v8, v8, v10 559; RVI-NEXT: vsrl.vi v10, v8, 1 560; RVI-NEXT: vand.vx v10, v10, a1 561; RVI-NEXT: lui a1, 209715 562; RVI-NEXT: addi a1, a1, 819 563; RVI-NEXT: vsub.vv v8, v8, v10 564; RVI-NEXT: vand.vx v10, v8, a1 565; RVI-NEXT: vsrl.vi v8, v8, 2 566; RVI-NEXT: vand.vx v8, v8, a1 567; RVI-NEXT: lui a1, 61681 568; RVI-NEXT: addi a1, a1, -241 569; RVI-NEXT: vadd.vv v8, v10, v8 570; RVI-NEXT: vsrl.vi v10, v8, 4 571; RVI-NEXT: vadd.vv v8, v8, v10 572; RVI-NEXT: vand.vx v8, v8, a1 573; RVI-NEXT: lui a1, 4112 574; RVI-NEXT: addi a1, a1, 257 575; RVI-NEXT: vmul.vx v8, v8, a1 576; RVI-NEXT: vsrl.vi v8, v8, 24 577; RVI-NEXT: vse32.v v8, (a0) 578; RVI-NEXT: ret 579; 580; RVF-LABEL: cttz_v8i32: 581; RVF: # %bb.0: 582; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma 583; RVF-NEXT: vle32.v v8, (a0) 584; RVF-NEXT: fsrmi a1, 1 585; RVF-NEXT: vrsub.vi v10, v8, 0 586; RVF-NEXT: vand.vv v10, v8, v10 587; RVF-NEXT: vfcvt.f.xu.v v10, v10 588; RVF-NEXT: fsrm a1 589; RVF-NEXT: li a1, 127 590; RVF-NEXT: vmseq.vi v0, v8, 0 591; RVF-NEXT: vsrl.vi v8, v10, 23 592; RVF-NEXT: vsub.vx v8, v8, a1 593; RVF-NEXT: li a1, 32 594; RVF-NEXT: vmerge.vxm v8, v8, a1, v0 595; RVF-NEXT: vse32.v v8, (a0) 596; RVF-NEXT: ret 597; 598; RVD-LABEL: cttz_v8i32: 599; RVD: # %bb.0: 600; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma 601; RVD-NEXT: vle32.v v8, (a0) 602; RVD-NEXT: li a1, 52 603; RVD-NEXT: vrsub.vi v10, v8, 0 604; RVD-NEXT: vand.vv v10, v8, v10 605; RVD-NEXT: vfwcvt.f.xu.v v12, v10 606; RVD-NEXT: vnsrl.wx v10, v12, a1 607; RVD-NEXT: li a1, 1023 608; RVD-NEXT: vmseq.vi v0, v8, 0 609; RVD-NEXT: vsub.vx v8, v10, a1 610; RVD-NEXT: li a1, 32 611; RVD-NEXT: vmerge.vxm v8, v8, a1, v0 612; RVD-NEXT: vse32.v v8, (a0) 613; RVD-NEXT: ret 614; 615; ZVBB-LABEL: cttz_v8i32: 616; ZVBB: # %bb.0: 617; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 618; ZVBB-NEXT: vle32.v v8, (a0) 619; ZVBB-NEXT: vctz.v v8, v8 620; ZVBB-NEXT: vse32.v v8, (a0) 621; ZVBB-NEXT: ret 622 %a = load <8 x i32>, ptr %x 623 %b = load <8 x i32>, ptr %y 624 %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 false) 625 store <8 x i32> %c, ptr %x 626 ret void 627} 628declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) 629 630define void @cttz_v4i64(ptr %x, ptr %y) nounwind { 631; RV32I-LABEL: cttz_v4i64: 632; RV32I: # %bb.0: 633; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 634; RV32I-NEXT: vle64.v v8, (a0) 635; RV32I-NEXT: lui a1, 349525 636; RV32I-NEXT: addi a1, a1, 1365 637; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 638; RV32I-NEXT: vmv.v.x v10, a1 639; RV32I-NEXT: li a1, 1 640; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 641; RV32I-NEXT: vsub.vx v12, v8, a1 642; RV32I-NEXT: lui a1, 209715 643; RV32I-NEXT: addi a1, a1, 819 644; RV32I-NEXT: vnot.v v8, v8 645; RV32I-NEXT: vand.vv v8, v8, v12 646; RV32I-NEXT: vsrl.vi v12, v8, 1 647; RV32I-NEXT: vand.vv v10, v12, v10 648; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 649; RV32I-NEXT: vmv.v.x v12, a1 650; RV32I-NEXT: lui a1, 61681 651; RV32I-NEXT: addi a1, a1, -241 652; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 653; RV32I-NEXT: vsub.vv v8, v8, v10 654; RV32I-NEXT: vand.vv v10, v8, v12 655; RV32I-NEXT: vsrl.vi v8, v8, 2 656; RV32I-NEXT: vand.vv v8, v8, v12 657; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 658; RV32I-NEXT: vmv.v.x v12, a1 659; RV32I-NEXT: lui a1, 4112 660; RV32I-NEXT: addi a1, a1, 257 661; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 662; RV32I-NEXT: vadd.vv v8, v10, v8 663; RV32I-NEXT: vsrl.vi v10, v8, 4 664; RV32I-NEXT: vadd.vv v8, v8, v10 665; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 666; RV32I-NEXT: vmv.v.x v10, a1 667; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 668; RV32I-NEXT: vand.vv v8, v8, v12 669; RV32I-NEXT: vmul.vv v8, v8, v10 670; RV32I-NEXT: li a1, 56 671; RV32I-NEXT: vsrl.vx v8, v8, a1 672; RV32I-NEXT: vse64.v v8, (a0) 673; RV32I-NEXT: ret 674; 675; RV64I-LABEL: cttz_v4i64: 676; RV64I: # %bb.0: 677; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 678; RV64I-NEXT: vle64.v v8, (a0) 679; RV64I-NEXT: lui a1, 349525 680; RV64I-NEXT: lui a2, 209715 681; RV64I-NEXT: lui a3, 61681 682; RV64I-NEXT: lui a4, 4112 683; RV64I-NEXT: addiw a1, a1, 1365 684; RV64I-NEXT: addiw a2, a2, 819 685; RV64I-NEXT: addiw a3, a3, -241 686; RV64I-NEXT: addiw a4, a4, 257 687; RV64I-NEXT: slli a5, a1, 32 688; RV64I-NEXT: add a1, a1, a5 689; RV64I-NEXT: slli a5, a2, 32 690; RV64I-NEXT: add a2, a2, a5 691; RV64I-NEXT: slli a5, a3, 32 692; RV64I-NEXT: add a3, a3, a5 693; RV64I-NEXT: slli a5, a4, 32 694; RV64I-NEXT: add a4, a4, a5 695; RV64I-NEXT: li a5, 1 696; RV64I-NEXT: vsub.vx v10, v8, a5 697; RV64I-NEXT: vnot.v v8, v8 698; RV64I-NEXT: vand.vv v8, v8, v10 699; RV64I-NEXT: vsrl.vi v10, v8, 1 700; RV64I-NEXT: vand.vx v10, v10, a1 701; RV64I-NEXT: vsub.vv v8, v8, v10 702; RV64I-NEXT: vand.vx v10, v8, a2 703; RV64I-NEXT: vsrl.vi v8, v8, 2 704; RV64I-NEXT: vand.vx v8, v8, a2 705; RV64I-NEXT: vadd.vv v8, v10, v8 706; RV64I-NEXT: vsrl.vi v10, v8, 4 707; RV64I-NEXT: vadd.vv v8, v8, v10 708; RV64I-NEXT: vand.vx v8, v8, a3 709; RV64I-NEXT: vmul.vx v8, v8, a4 710; RV64I-NEXT: li a1, 56 711; RV64I-NEXT: vsrl.vx v8, v8, a1 712; RV64I-NEXT: vse64.v v8, (a0) 713; RV64I-NEXT: ret 714; 715; RVF-LABEL: cttz_v4i64: 716; RVF: # %bb.0: 717; RVF-NEXT: vsetivli zero, 4, e64, m2, ta, ma 718; RVF-NEXT: vle64.v v8, (a0) 719; RVF-NEXT: fsrmi a1, 1 720; RVF-NEXT: vrsub.vi v10, v8, 0 721; RVF-NEXT: vand.vv v10, v8, v10 722; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma 723; RVF-NEXT: vfncvt.f.xu.w v12, v10 724; RVF-NEXT: fsrm a1 725; RVF-NEXT: li a1, 127 726; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma 727; RVF-NEXT: vmseq.vi v0, v8, 0 728; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma 729; RVF-NEXT: vsrl.vi v8, v12, 23 730; RVF-NEXT: vwsubu.vx v10, v8, a1 731; RVF-NEXT: li a1, 64 732; RVF-NEXT: vsetvli zero, zero, e64, m2, ta, ma 733; RVF-NEXT: vmerge.vxm v8, v10, a1, v0 734; RVF-NEXT: vse64.v v8, (a0) 735; RVF-NEXT: ret 736; 737; RVD-LABEL: cttz_v4i64: 738; RVD: # %bb.0: 739; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma 740; RVD-NEXT: vle64.v v8, (a0) 741; RVD-NEXT: fsrmi a1, 1 742; RVD-NEXT: vrsub.vi v10, v8, 0 743; RVD-NEXT: vand.vv v10, v8, v10 744; RVD-NEXT: vfcvt.f.xu.v v10, v10 745; RVD-NEXT: fsrm a1 746; RVD-NEXT: li a1, 52 747; RVD-NEXT: vsrl.vx v10, v10, a1 748; RVD-NEXT: li a1, 1023 749; RVD-NEXT: vmseq.vi v0, v8, 0 750; RVD-NEXT: vsub.vx v8, v10, a1 751; RVD-NEXT: li a1, 64 752; RVD-NEXT: vmerge.vxm v8, v8, a1, v0 753; RVD-NEXT: vse64.v v8, (a0) 754; RVD-NEXT: ret 755; 756; ZVBB-LABEL: cttz_v4i64: 757; ZVBB: # %bb.0: 758; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 759; ZVBB-NEXT: vle64.v v8, (a0) 760; ZVBB-NEXT: vctz.v v8, v8 761; ZVBB-NEXT: vse64.v v8, (a0) 762; ZVBB-NEXT: ret 763 %a = load <4 x i64>, ptr %x 764 %b = load <4 x i64>, ptr %y 765 %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 false) 766 store <4 x i64> %c, ptr %x 767 ret void 768} 769declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) 770 771define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { 772; RVI-LABEL: cttz_zero_undef_v16i8: 773; RVI: # %bb.0: 774; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma 775; RVI-NEXT: vle8.v v8, (a0) 776; RVI-NEXT: li a1, 1 777; RVI-NEXT: vsub.vx v9, v8, a1 778; RVI-NEXT: li a1, 85 779; RVI-NEXT: vnot.v v8, v8 780; RVI-NEXT: vand.vv v8, v8, v9 781; RVI-NEXT: vsrl.vi v9, v8, 1 782; RVI-NEXT: vand.vx v9, v9, a1 783; RVI-NEXT: li a1, 51 784; RVI-NEXT: vsub.vv v8, v8, v9 785; RVI-NEXT: vand.vx v9, v8, a1 786; RVI-NEXT: vsrl.vi v8, v8, 2 787; RVI-NEXT: vand.vx v8, v8, a1 788; RVI-NEXT: vadd.vv v8, v9, v8 789; RVI-NEXT: vsrl.vi v9, v8, 4 790; RVI-NEXT: vadd.vv v8, v8, v9 791; RVI-NEXT: vand.vi v8, v8, 15 792; RVI-NEXT: vse8.v v8, (a0) 793; RVI-NEXT: ret 794; 795; RVF-LABEL: cttz_zero_undef_v16i8: 796; RVF: # %bb.0: 797; RVF-NEXT: vsetivli zero, 16, e8, m1, ta, ma 798; RVF-NEXT: vle8.v v8, (a0) 799; RVF-NEXT: vrsub.vi v9, v8, 0 800; RVF-NEXT: vand.vv v8, v8, v9 801; RVF-NEXT: vsetvli zero, zero, e16, m2, ta, ma 802; RVF-NEXT: vzext.vf2 v10, v8 803; RVF-NEXT: vfwcvt.f.xu.v v12, v10 804; RVF-NEXT: vnsrl.wi v8, v12, 23 805; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma 806; RVF-NEXT: vnsrl.wi v10, v8, 0 807; RVF-NEXT: li a1, 127 808; RVF-NEXT: vsub.vx v8, v10, a1 809; RVF-NEXT: vse8.v v8, (a0) 810; RVF-NEXT: ret 811; 812; RVD-LABEL: cttz_zero_undef_v16i8: 813; RVD: # %bb.0: 814; RVD-NEXT: vsetivli zero, 16, e8, m1, ta, ma 815; RVD-NEXT: vle8.v v8, (a0) 816; RVD-NEXT: vrsub.vi v9, v8, 0 817; RVD-NEXT: vand.vv v8, v8, v9 818; RVD-NEXT: vsetvli zero, zero, e16, m2, ta, ma 819; RVD-NEXT: vzext.vf2 v10, v8 820; RVD-NEXT: vfwcvt.f.xu.v v12, v10 821; RVD-NEXT: vnsrl.wi v8, v12, 23 822; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma 823; RVD-NEXT: vnsrl.wi v10, v8, 0 824; RVD-NEXT: li a1, 127 825; RVD-NEXT: vsub.vx v8, v10, a1 826; RVD-NEXT: vse8.v v8, (a0) 827; RVD-NEXT: ret 828; 829; ZVBB-LABEL: cttz_zero_undef_v16i8: 830; ZVBB: # %bb.0: 831; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 832; ZVBB-NEXT: vle8.v v8, (a0) 833; ZVBB-NEXT: vctz.v v8, v8 834; ZVBB-NEXT: vse8.v v8, (a0) 835; ZVBB-NEXT: ret 836 %a = load <16 x i8>, ptr %x 837 %b = load <16 x i8>, ptr %y 838 %c = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) 839 store <16 x i8> %c, ptr %x 840 ret void 841} 842 843define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { 844; RVI-LABEL: cttz_zero_undef_v8i16: 845; RVI: # %bb.0: 846; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma 847; RVI-NEXT: vle16.v v8, (a0) 848; RVI-NEXT: li a1, 1 849; RVI-NEXT: vsub.vx v9, v8, a1 850; RVI-NEXT: lui a1, 5 851; RVI-NEXT: addi a1, a1, 1365 852; RVI-NEXT: vnot.v v8, v8 853; RVI-NEXT: vand.vv v8, v8, v9 854; RVI-NEXT: vsrl.vi v9, v8, 1 855; RVI-NEXT: vand.vx v9, v9, a1 856; RVI-NEXT: lui a1, 3 857; RVI-NEXT: addi a1, a1, 819 858; RVI-NEXT: vsub.vv v8, v8, v9 859; RVI-NEXT: vand.vx v9, v8, a1 860; RVI-NEXT: vsrl.vi v8, v8, 2 861; RVI-NEXT: vand.vx v8, v8, a1 862; RVI-NEXT: lui a1, 1 863; RVI-NEXT: addi a1, a1, -241 864; RVI-NEXT: vadd.vv v8, v9, v8 865; RVI-NEXT: vsrl.vi v9, v8, 4 866; RVI-NEXT: vadd.vv v8, v8, v9 867; RVI-NEXT: vand.vx v8, v8, a1 868; RVI-NEXT: li a1, 257 869; RVI-NEXT: vmul.vx v8, v8, a1 870; RVI-NEXT: vsrl.vi v8, v8, 8 871; RVI-NEXT: vse16.v v8, (a0) 872; RVI-NEXT: ret 873; 874; RVF-LABEL: cttz_zero_undef_v8i16: 875; RVF: # %bb.0: 876; RVF-NEXT: vsetivli zero, 8, e16, m1, ta, ma 877; RVF-NEXT: vle16.v v8, (a0) 878; RVF-NEXT: vrsub.vi v9, v8, 0 879; RVF-NEXT: vand.vv v8, v8, v9 880; RVF-NEXT: vfwcvt.f.xu.v v10, v8 881; RVF-NEXT: vnsrl.wi v8, v10, 23 882; RVF-NEXT: li a1, 127 883; RVF-NEXT: vsub.vx v8, v8, a1 884; RVF-NEXT: vse16.v v8, (a0) 885; RVF-NEXT: ret 886; 887; RVD-LABEL: cttz_zero_undef_v8i16: 888; RVD: # %bb.0: 889; RVD-NEXT: vsetivli zero, 8, e16, m1, ta, ma 890; RVD-NEXT: vle16.v v8, (a0) 891; RVD-NEXT: vrsub.vi v9, v8, 0 892; RVD-NEXT: vand.vv v8, v8, v9 893; RVD-NEXT: vfwcvt.f.xu.v v10, v8 894; RVD-NEXT: vnsrl.wi v8, v10, 23 895; RVD-NEXT: li a1, 127 896; RVD-NEXT: vsub.vx v8, v8, a1 897; RVD-NEXT: vse16.v v8, (a0) 898; RVD-NEXT: ret 899; 900; ZVBB-LABEL: cttz_zero_undef_v8i16: 901; ZVBB: # %bb.0: 902; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 903; ZVBB-NEXT: vle16.v v8, (a0) 904; ZVBB-NEXT: vctz.v v8, v8 905; ZVBB-NEXT: vse16.v v8, (a0) 906; ZVBB-NEXT: ret 907 %a = load <8 x i16>, ptr %x 908 %b = load <8 x i16>, ptr %y 909 %c = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) 910 store <8 x i16> %c, ptr %x 911 ret void 912} 913 914define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { 915; RVI-LABEL: cttz_zero_undef_v4i32: 916; RVI: # %bb.0: 917; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma 918; RVI-NEXT: vle32.v v8, (a0) 919; RVI-NEXT: li a1, 1 920; RVI-NEXT: vsub.vx v9, v8, a1 921; RVI-NEXT: lui a1, 349525 922; RVI-NEXT: addi a1, a1, 1365 923; RVI-NEXT: vnot.v v8, v8 924; RVI-NEXT: vand.vv v8, v8, v9 925; RVI-NEXT: vsrl.vi v9, v8, 1 926; RVI-NEXT: vand.vx v9, v9, a1 927; RVI-NEXT: lui a1, 209715 928; RVI-NEXT: addi a1, a1, 819 929; RVI-NEXT: vsub.vv v8, v8, v9 930; RVI-NEXT: vand.vx v9, v8, a1 931; RVI-NEXT: vsrl.vi v8, v8, 2 932; RVI-NEXT: vand.vx v8, v8, a1 933; RVI-NEXT: lui a1, 61681 934; RVI-NEXT: addi a1, a1, -241 935; RVI-NEXT: vadd.vv v8, v9, v8 936; RVI-NEXT: vsrl.vi v9, v8, 4 937; RVI-NEXT: vadd.vv v8, v8, v9 938; RVI-NEXT: vand.vx v8, v8, a1 939; RVI-NEXT: lui a1, 4112 940; RVI-NEXT: addi a1, a1, 257 941; RVI-NEXT: vmul.vx v8, v8, a1 942; RVI-NEXT: vsrl.vi v8, v8, 24 943; RVI-NEXT: vse32.v v8, (a0) 944; RVI-NEXT: ret 945; 946; RVF-LABEL: cttz_zero_undef_v4i32: 947; RVF: # %bb.0: 948; RVF-NEXT: vsetivli zero, 4, e32, m1, ta, ma 949; RVF-NEXT: vle32.v v8, (a0) 950; RVF-NEXT: fsrmi a1, 1 951; RVF-NEXT: vrsub.vi v9, v8, 0 952; RVF-NEXT: vand.vv v8, v8, v9 953; RVF-NEXT: vfcvt.f.xu.v v8, v8 954; RVF-NEXT: fsrm a1 955; RVF-NEXT: vsrl.vi v8, v8, 23 956; RVF-NEXT: li a1, 127 957; RVF-NEXT: vsub.vx v8, v8, a1 958; RVF-NEXT: vse32.v v8, (a0) 959; RVF-NEXT: ret 960; 961; RVD-LABEL: cttz_zero_undef_v4i32: 962; RVD: # %bb.0: 963; RVD-NEXT: vsetivli zero, 4, e32, m1, ta, ma 964; RVD-NEXT: vle32.v v8, (a0) 965; RVD-NEXT: li a1, 52 966; RVD-NEXT: vrsub.vi v9, v8, 0 967; RVD-NEXT: vand.vv v8, v8, v9 968; RVD-NEXT: vfwcvt.f.xu.v v10, v8 969; RVD-NEXT: vnsrl.wx v8, v10, a1 970; RVD-NEXT: li a1, 1023 971; RVD-NEXT: vsub.vx v8, v8, a1 972; RVD-NEXT: vse32.v v8, (a0) 973; RVD-NEXT: ret 974; 975; ZVBB-LABEL: cttz_zero_undef_v4i32: 976; ZVBB: # %bb.0: 977; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 978; ZVBB-NEXT: vle32.v v8, (a0) 979; ZVBB-NEXT: vctz.v v8, v8 980; ZVBB-NEXT: vse32.v v8, (a0) 981; ZVBB-NEXT: ret 982 %a = load <4 x i32>, ptr %x 983 %b = load <4 x i32>, ptr %y 984 %c = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) 985 store <4 x i32> %c, ptr %x 986 ret void 987} 988 989define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { 990; RV32I-LABEL: cttz_zero_undef_v2i64: 991; RV32I: # %bb.0: 992; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 993; RV32I-NEXT: vle64.v v8, (a0) 994; RV32I-NEXT: lui a1, 349525 995; RV32I-NEXT: addi a1, a1, 1365 996; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 997; RV32I-NEXT: vmv.v.x v9, a1 998; RV32I-NEXT: li a1, 1 999; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1000; RV32I-NEXT: vsub.vx v10, v8, a1 1001; RV32I-NEXT: lui a1, 209715 1002; RV32I-NEXT: addi a1, a1, 819 1003; RV32I-NEXT: vnot.v v8, v8 1004; RV32I-NEXT: vand.vv v8, v8, v10 1005; RV32I-NEXT: vsrl.vi v10, v8, 1 1006; RV32I-NEXT: vand.vv v9, v10, v9 1007; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1008; RV32I-NEXT: vmv.v.x v10, a1 1009; RV32I-NEXT: lui a1, 61681 1010; RV32I-NEXT: addi a1, a1, -241 1011; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1012; RV32I-NEXT: vsub.vv v8, v8, v9 1013; RV32I-NEXT: vand.vv v9, v8, v10 1014; RV32I-NEXT: vsrl.vi v8, v8, 2 1015; RV32I-NEXT: vand.vv v8, v8, v10 1016; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1017; RV32I-NEXT: vmv.v.x v10, a1 1018; RV32I-NEXT: lui a1, 4112 1019; RV32I-NEXT: addi a1, a1, 257 1020; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1021; RV32I-NEXT: vadd.vv v8, v9, v8 1022; RV32I-NEXT: vsrl.vi v9, v8, 4 1023; RV32I-NEXT: vadd.vv v8, v8, v9 1024; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1025; RV32I-NEXT: vmv.v.x v9, a1 1026; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1027; RV32I-NEXT: vand.vv v8, v8, v10 1028; RV32I-NEXT: vmul.vv v8, v8, v9 1029; RV32I-NEXT: li a1, 56 1030; RV32I-NEXT: vsrl.vx v8, v8, a1 1031; RV32I-NEXT: vse64.v v8, (a0) 1032; RV32I-NEXT: ret 1033; 1034; RV64I-LABEL: cttz_zero_undef_v2i64: 1035; RV64I: # %bb.0: 1036; RV64I-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1037; RV64I-NEXT: vle64.v v8, (a0) 1038; RV64I-NEXT: lui a1, 349525 1039; RV64I-NEXT: lui a2, 209715 1040; RV64I-NEXT: lui a3, 61681 1041; RV64I-NEXT: lui a4, 4112 1042; RV64I-NEXT: addiw a1, a1, 1365 1043; RV64I-NEXT: addiw a2, a2, 819 1044; RV64I-NEXT: addiw a3, a3, -241 1045; RV64I-NEXT: addiw a4, a4, 257 1046; RV64I-NEXT: slli a5, a1, 32 1047; RV64I-NEXT: add a1, a1, a5 1048; RV64I-NEXT: slli a5, a2, 32 1049; RV64I-NEXT: add a2, a2, a5 1050; RV64I-NEXT: slli a5, a3, 32 1051; RV64I-NEXT: add a3, a3, a5 1052; RV64I-NEXT: slli a5, a4, 32 1053; RV64I-NEXT: add a4, a4, a5 1054; RV64I-NEXT: li a5, 1 1055; RV64I-NEXT: vsub.vx v9, v8, a5 1056; RV64I-NEXT: vnot.v v8, v8 1057; RV64I-NEXT: vand.vv v8, v8, v9 1058; RV64I-NEXT: vsrl.vi v9, v8, 1 1059; RV64I-NEXT: vand.vx v9, v9, a1 1060; RV64I-NEXT: vsub.vv v8, v8, v9 1061; RV64I-NEXT: vand.vx v9, v8, a2 1062; RV64I-NEXT: vsrl.vi v8, v8, 2 1063; RV64I-NEXT: vand.vx v8, v8, a2 1064; RV64I-NEXT: vadd.vv v8, v9, v8 1065; RV64I-NEXT: vsrl.vi v9, v8, 4 1066; RV64I-NEXT: vadd.vv v8, v8, v9 1067; RV64I-NEXT: vand.vx v8, v8, a3 1068; RV64I-NEXT: vmul.vx v8, v8, a4 1069; RV64I-NEXT: li a1, 56 1070; RV64I-NEXT: vsrl.vx v8, v8, a1 1071; RV64I-NEXT: vse64.v v8, (a0) 1072; RV64I-NEXT: ret 1073; 1074; RVF-LABEL: cttz_zero_undef_v2i64: 1075; RVF: # %bb.0: 1076; RVF-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1077; RVF-NEXT: vle64.v v8, (a0) 1078; RVF-NEXT: fsrmi a1, 1 1079; RVF-NEXT: vrsub.vi v9, v8, 0 1080; RVF-NEXT: vand.vv v8, v8, v9 1081; RVF-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 1082; RVF-NEXT: vfncvt.f.xu.w v9, v8 1083; RVF-NEXT: fsrm a1 1084; RVF-NEXT: vsrl.vi v8, v9, 23 1085; RVF-NEXT: li a1, 127 1086; RVF-NEXT: vwsubu.vx v9, v8, a1 1087; RVF-NEXT: vse64.v v9, (a0) 1088; RVF-NEXT: ret 1089; 1090; RVD-LABEL: cttz_zero_undef_v2i64: 1091; RVD: # %bb.0: 1092; RVD-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1093; RVD-NEXT: vle64.v v8, (a0) 1094; RVD-NEXT: fsrmi a1, 1 1095; RVD-NEXT: vrsub.vi v9, v8, 0 1096; RVD-NEXT: vand.vv v8, v8, v9 1097; RVD-NEXT: vfcvt.f.xu.v v8, v8 1098; RVD-NEXT: fsrm a1 1099; RVD-NEXT: li a1, 52 1100; RVD-NEXT: vsrl.vx v8, v8, a1 1101; RVD-NEXT: li a1, 1023 1102; RVD-NEXT: vsub.vx v8, v8, a1 1103; RVD-NEXT: vse64.v v8, (a0) 1104; RVD-NEXT: ret 1105; 1106; ZVBB-LABEL: cttz_zero_undef_v2i64: 1107; ZVBB: # %bb.0: 1108; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1109; ZVBB-NEXT: vle64.v v8, (a0) 1110; ZVBB-NEXT: vctz.v v8, v8 1111; ZVBB-NEXT: vse64.v v8, (a0) 1112; ZVBB-NEXT: ret 1113 %a = load <2 x i64>, ptr %x 1114 %b = load <2 x i64>, ptr %y 1115 %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) 1116 store <2 x i64> %c, ptr %x 1117 ret void 1118} 1119 1120define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { 1121; RVI-LABEL: cttz_zero_undef_v32i8: 1122; RVI: # %bb.0: 1123; RVI-NEXT: li a1, 32 1124; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma 1125; RVI-NEXT: vle8.v v8, (a0) 1126; RVI-NEXT: li a1, 1 1127; RVI-NEXT: vsub.vx v10, v8, a1 1128; RVI-NEXT: li a1, 85 1129; RVI-NEXT: vnot.v v8, v8 1130; RVI-NEXT: vand.vv v8, v8, v10 1131; RVI-NEXT: vsrl.vi v10, v8, 1 1132; RVI-NEXT: vand.vx v10, v10, a1 1133; RVI-NEXT: li a1, 51 1134; RVI-NEXT: vsub.vv v8, v8, v10 1135; RVI-NEXT: vand.vx v10, v8, a1 1136; RVI-NEXT: vsrl.vi v8, v8, 2 1137; RVI-NEXT: vand.vx v8, v8, a1 1138; RVI-NEXT: vadd.vv v8, v10, v8 1139; RVI-NEXT: vsrl.vi v10, v8, 4 1140; RVI-NEXT: vadd.vv v8, v8, v10 1141; RVI-NEXT: vand.vi v8, v8, 15 1142; RVI-NEXT: vse8.v v8, (a0) 1143; RVI-NEXT: ret 1144; 1145; RVF-LABEL: cttz_zero_undef_v32i8: 1146; RVF: # %bb.0: 1147; RVF-NEXT: li a1, 32 1148; RVF-NEXT: vsetvli zero, a1, e8, m2, ta, ma 1149; RVF-NEXT: vle8.v v8, (a0) 1150; RVF-NEXT: vrsub.vi v10, v8, 0 1151; RVF-NEXT: vand.vv v8, v8, v10 1152; RVF-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1153; RVF-NEXT: vzext.vf2 v12, v8 1154; RVF-NEXT: vfwcvt.f.xu.v v16, v12 1155; RVF-NEXT: vnsrl.wi v8, v16, 23 1156; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma 1157; RVF-NEXT: vnsrl.wi v12, v8, 0 1158; RVF-NEXT: li a1, 127 1159; RVF-NEXT: vsub.vx v8, v12, a1 1160; RVF-NEXT: vse8.v v8, (a0) 1161; RVF-NEXT: ret 1162; 1163; RVD-LABEL: cttz_zero_undef_v32i8: 1164; RVD: # %bb.0: 1165; RVD-NEXT: li a1, 32 1166; RVD-NEXT: vsetvli zero, a1, e8, m2, ta, ma 1167; RVD-NEXT: vle8.v v8, (a0) 1168; RVD-NEXT: vrsub.vi v10, v8, 0 1169; RVD-NEXT: vand.vv v8, v8, v10 1170; RVD-NEXT: vsetvli zero, zero, e16, m4, ta, ma 1171; RVD-NEXT: vzext.vf2 v12, v8 1172; RVD-NEXT: vfwcvt.f.xu.v v16, v12 1173; RVD-NEXT: vnsrl.wi v8, v16, 23 1174; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma 1175; RVD-NEXT: vnsrl.wi v12, v8, 0 1176; RVD-NEXT: li a1, 127 1177; RVD-NEXT: vsub.vx v8, v12, a1 1178; RVD-NEXT: vse8.v v8, (a0) 1179; RVD-NEXT: ret 1180; 1181; ZVBB-LABEL: cttz_zero_undef_v32i8: 1182; ZVBB: # %bb.0: 1183; ZVBB-NEXT: li a1, 32 1184; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma 1185; ZVBB-NEXT: vle8.v v8, (a0) 1186; ZVBB-NEXT: vctz.v v8, v8 1187; ZVBB-NEXT: vse8.v v8, (a0) 1188; ZVBB-NEXT: ret 1189 %a = load <32 x i8>, ptr %x 1190 %b = load <32 x i8>, ptr %y 1191 %c = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true) 1192 store <32 x i8> %c, ptr %x 1193 ret void 1194} 1195 1196define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { 1197; RVI-LABEL: cttz_zero_undef_v16i16: 1198; RVI: # %bb.0: 1199; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1200; RVI-NEXT: vle16.v v8, (a0) 1201; RVI-NEXT: li a1, 1 1202; RVI-NEXT: vsub.vx v10, v8, a1 1203; RVI-NEXT: lui a1, 5 1204; RVI-NEXT: addi a1, a1, 1365 1205; RVI-NEXT: vnot.v v8, v8 1206; RVI-NEXT: vand.vv v8, v8, v10 1207; RVI-NEXT: vsrl.vi v10, v8, 1 1208; RVI-NEXT: vand.vx v10, v10, a1 1209; RVI-NEXT: lui a1, 3 1210; RVI-NEXT: addi a1, a1, 819 1211; RVI-NEXT: vsub.vv v8, v8, v10 1212; RVI-NEXT: vand.vx v10, v8, a1 1213; RVI-NEXT: vsrl.vi v8, v8, 2 1214; RVI-NEXT: vand.vx v8, v8, a1 1215; RVI-NEXT: lui a1, 1 1216; RVI-NEXT: addi a1, a1, -241 1217; RVI-NEXT: vadd.vv v8, v10, v8 1218; RVI-NEXT: vsrl.vi v10, v8, 4 1219; RVI-NEXT: vadd.vv v8, v8, v10 1220; RVI-NEXT: vand.vx v8, v8, a1 1221; RVI-NEXT: li a1, 257 1222; RVI-NEXT: vmul.vx v8, v8, a1 1223; RVI-NEXT: vsrl.vi v8, v8, 8 1224; RVI-NEXT: vse16.v v8, (a0) 1225; RVI-NEXT: ret 1226; 1227; RVF-LABEL: cttz_zero_undef_v16i16: 1228; RVF: # %bb.0: 1229; RVF-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1230; RVF-NEXT: vle16.v v8, (a0) 1231; RVF-NEXT: vrsub.vi v10, v8, 0 1232; RVF-NEXT: vand.vv v8, v8, v10 1233; RVF-NEXT: vfwcvt.f.xu.v v12, v8 1234; RVF-NEXT: vnsrl.wi v8, v12, 23 1235; RVF-NEXT: li a1, 127 1236; RVF-NEXT: vsub.vx v8, v8, a1 1237; RVF-NEXT: vse16.v v8, (a0) 1238; RVF-NEXT: ret 1239; 1240; RVD-LABEL: cttz_zero_undef_v16i16: 1241; RVD: # %bb.0: 1242; RVD-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1243; RVD-NEXT: vle16.v v8, (a0) 1244; RVD-NEXT: vrsub.vi v10, v8, 0 1245; RVD-NEXT: vand.vv v8, v8, v10 1246; RVD-NEXT: vfwcvt.f.xu.v v12, v8 1247; RVD-NEXT: vnsrl.wi v8, v12, 23 1248; RVD-NEXT: li a1, 127 1249; RVD-NEXT: vsub.vx v8, v8, a1 1250; RVD-NEXT: vse16.v v8, (a0) 1251; RVD-NEXT: ret 1252; 1253; ZVBB-LABEL: cttz_zero_undef_v16i16: 1254; ZVBB: # %bb.0: 1255; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1256; ZVBB-NEXT: vle16.v v8, (a0) 1257; ZVBB-NEXT: vctz.v v8, v8 1258; ZVBB-NEXT: vse16.v v8, (a0) 1259; ZVBB-NEXT: ret 1260 %a = load <16 x i16>, ptr %x 1261 %b = load <16 x i16>, ptr %y 1262 %c = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true) 1263 store <16 x i16> %c, ptr %x 1264 ret void 1265} 1266 1267define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { 1268; RVI-LABEL: cttz_zero_undef_v8i32: 1269; RVI: # %bb.0: 1270; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1271; RVI-NEXT: vle32.v v8, (a0) 1272; RVI-NEXT: li a1, 1 1273; RVI-NEXT: vsub.vx v10, v8, a1 1274; RVI-NEXT: lui a1, 349525 1275; RVI-NEXT: addi a1, a1, 1365 1276; RVI-NEXT: vnot.v v8, v8 1277; RVI-NEXT: vand.vv v8, v8, v10 1278; RVI-NEXT: vsrl.vi v10, v8, 1 1279; RVI-NEXT: vand.vx v10, v10, a1 1280; RVI-NEXT: lui a1, 209715 1281; RVI-NEXT: addi a1, a1, 819 1282; RVI-NEXT: vsub.vv v8, v8, v10 1283; RVI-NEXT: vand.vx v10, v8, a1 1284; RVI-NEXT: vsrl.vi v8, v8, 2 1285; RVI-NEXT: vand.vx v8, v8, a1 1286; RVI-NEXT: lui a1, 61681 1287; RVI-NEXT: addi a1, a1, -241 1288; RVI-NEXT: vadd.vv v8, v10, v8 1289; RVI-NEXT: vsrl.vi v10, v8, 4 1290; RVI-NEXT: vadd.vv v8, v8, v10 1291; RVI-NEXT: vand.vx v8, v8, a1 1292; RVI-NEXT: lui a1, 4112 1293; RVI-NEXT: addi a1, a1, 257 1294; RVI-NEXT: vmul.vx v8, v8, a1 1295; RVI-NEXT: vsrl.vi v8, v8, 24 1296; RVI-NEXT: vse32.v v8, (a0) 1297; RVI-NEXT: ret 1298; 1299; RVF-LABEL: cttz_zero_undef_v8i32: 1300; RVF: # %bb.0: 1301; RVF-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1302; RVF-NEXT: vle32.v v8, (a0) 1303; RVF-NEXT: fsrmi a1, 1 1304; RVF-NEXT: vrsub.vi v10, v8, 0 1305; RVF-NEXT: vand.vv v8, v8, v10 1306; RVF-NEXT: vfcvt.f.xu.v v8, v8 1307; RVF-NEXT: fsrm a1 1308; RVF-NEXT: vsrl.vi v8, v8, 23 1309; RVF-NEXT: li a1, 127 1310; RVF-NEXT: vsub.vx v8, v8, a1 1311; RVF-NEXT: vse32.v v8, (a0) 1312; RVF-NEXT: ret 1313; 1314; RVD-LABEL: cttz_zero_undef_v8i32: 1315; RVD: # %bb.0: 1316; RVD-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1317; RVD-NEXT: vle32.v v8, (a0) 1318; RVD-NEXT: li a1, 52 1319; RVD-NEXT: vrsub.vi v10, v8, 0 1320; RVD-NEXT: vand.vv v8, v8, v10 1321; RVD-NEXT: vfwcvt.f.xu.v v12, v8 1322; RVD-NEXT: vnsrl.wx v8, v12, a1 1323; RVD-NEXT: li a1, 1023 1324; RVD-NEXT: vsub.vx v8, v8, a1 1325; RVD-NEXT: vse32.v v8, (a0) 1326; RVD-NEXT: ret 1327; 1328; ZVBB-LABEL: cttz_zero_undef_v8i32: 1329; ZVBB: # %bb.0: 1330; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1331; ZVBB-NEXT: vle32.v v8, (a0) 1332; ZVBB-NEXT: vctz.v v8, v8 1333; ZVBB-NEXT: vse32.v v8, (a0) 1334; ZVBB-NEXT: ret 1335 %a = load <8 x i32>, ptr %x 1336 %b = load <8 x i32>, ptr %y 1337 %c = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true) 1338 store <8 x i32> %c, ptr %x 1339 ret void 1340} 1341 1342define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { 1343; RV32I-LABEL: cttz_zero_undef_v4i64: 1344; RV32I: # %bb.0: 1345; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1346; RV32I-NEXT: vle64.v v8, (a0) 1347; RV32I-NEXT: lui a1, 349525 1348; RV32I-NEXT: addi a1, a1, 1365 1349; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1350; RV32I-NEXT: vmv.v.x v10, a1 1351; RV32I-NEXT: li a1, 1 1352; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1353; RV32I-NEXT: vsub.vx v12, v8, a1 1354; RV32I-NEXT: lui a1, 209715 1355; RV32I-NEXT: addi a1, a1, 819 1356; RV32I-NEXT: vnot.v v8, v8 1357; RV32I-NEXT: vand.vv v8, v8, v12 1358; RV32I-NEXT: vsrl.vi v12, v8, 1 1359; RV32I-NEXT: vand.vv v10, v12, v10 1360; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1361; RV32I-NEXT: vmv.v.x v12, a1 1362; RV32I-NEXT: lui a1, 61681 1363; RV32I-NEXT: addi a1, a1, -241 1364; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1365; RV32I-NEXT: vsub.vv v8, v8, v10 1366; RV32I-NEXT: vand.vv v10, v8, v12 1367; RV32I-NEXT: vsrl.vi v8, v8, 2 1368; RV32I-NEXT: vand.vv v8, v8, v12 1369; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1370; RV32I-NEXT: vmv.v.x v12, a1 1371; RV32I-NEXT: lui a1, 4112 1372; RV32I-NEXT: addi a1, a1, 257 1373; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1374; RV32I-NEXT: vadd.vv v8, v10, v8 1375; RV32I-NEXT: vsrl.vi v10, v8, 4 1376; RV32I-NEXT: vadd.vv v8, v8, v10 1377; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1378; RV32I-NEXT: vmv.v.x v10, a1 1379; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1380; RV32I-NEXT: vand.vv v8, v8, v12 1381; RV32I-NEXT: vmul.vv v8, v8, v10 1382; RV32I-NEXT: li a1, 56 1383; RV32I-NEXT: vsrl.vx v8, v8, a1 1384; RV32I-NEXT: vse64.v v8, (a0) 1385; RV32I-NEXT: ret 1386; 1387; RV64I-LABEL: cttz_zero_undef_v4i64: 1388; RV64I: # %bb.0: 1389; RV64I-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1390; RV64I-NEXT: vle64.v v8, (a0) 1391; RV64I-NEXT: lui a1, 349525 1392; RV64I-NEXT: lui a2, 209715 1393; RV64I-NEXT: lui a3, 61681 1394; RV64I-NEXT: lui a4, 4112 1395; RV64I-NEXT: addiw a1, a1, 1365 1396; RV64I-NEXT: addiw a2, a2, 819 1397; RV64I-NEXT: addiw a3, a3, -241 1398; RV64I-NEXT: addiw a4, a4, 257 1399; RV64I-NEXT: slli a5, a1, 32 1400; RV64I-NEXT: add a1, a1, a5 1401; RV64I-NEXT: slli a5, a2, 32 1402; RV64I-NEXT: add a2, a2, a5 1403; RV64I-NEXT: slli a5, a3, 32 1404; RV64I-NEXT: add a3, a3, a5 1405; RV64I-NEXT: slli a5, a4, 32 1406; RV64I-NEXT: add a4, a4, a5 1407; RV64I-NEXT: li a5, 1 1408; RV64I-NEXT: vsub.vx v10, v8, a5 1409; RV64I-NEXT: vnot.v v8, v8 1410; RV64I-NEXT: vand.vv v8, v8, v10 1411; RV64I-NEXT: vsrl.vi v10, v8, 1 1412; RV64I-NEXT: vand.vx v10, v10, a1 1413; RV64I-NEXT: vsub.vv v8, v8, v10 1414; RV64I-NEXT: vand.vx v10, v8, a2 1415; RV64I-NEXT: vsrl.vi v8, v8, 2 1416; RV64I-NEXT: vand.vx v8, v8, a2 1417; RV64I-NEXT: vadd.vv v8, v10, v8 1418; RV64I-NEXT: vsrl.vi v10, v8, 4 1419; RV64I-NEXT: vadd.vv v8, v8, v10 1420; RV64I-NEXT: vand.vx v8, v8, a3 1421; RV64I-NEXT: vmul.vx v8, v8, a4 1422; RV64I-NEXT: li a1, 56 1423; RV64I-NEXT: vsrl.vx v8, v8, a1 1424; RV64I-NEXT: vse64.v v8, (a0) 1425; RV64I-NEXT: ret 1426; 1427; RVF-LABEL: cttz_zero_undef_v4i64: 1428; RVF: # %bb.0: 1429; RVF-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1430; RVF-NEXT: vle64.v v8, (a0) 1431; RVF-NEXT: fsrmi a1, 1 1432; RVF-NEXT: vrsub.vi v10, v8, 0 1433; RVF-NEXT: vand.vv v8, v8, v10 1434; RVF-NEXT: vsetvli zero, zero, e32, m1, ta, ma 1435; RVF-NEXT: vfncvt.f.xu.w v10, v8 1436; RVF-NEXT: fsrm a1 1437; RVF-NEXT: vsrl.vi v8, v10, 23 1438; RVF-NEXT: li a1, 127 1439; RVF-NEXT: vwsubu.vx v10, v8, a1 1440; RVF-NEXT: vse64.v v10, (a0) 1441; RVF-NEXT: ret 1442; 1443; RVD-LABEL: cttz_zero_undef_v4i64: 1444; RVD: # %bb.0: 1445; RVD-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1446; RVD-NEXT: vle64.v v8, (a0) 1447; RVD-NEXT: fsrmi a1, 1 1448; RVD-NEXT: vrsub.vi v10, v8, 0 1449; RVD-NEXT: vand.vv v8, v8, v10 1450; RVD-NEXT: vfcvt.f.xu.v v8, v8 1451; RVD-NEXT: fsrm a1 1452; RVD-NEXT: li a1, 52 1453; RVD-NEXT: vsrl.vx v8, v8, a1 1454; RVD-NEXT: li a1, 1023 1455; RVD-NEXT: vsub.vx v8, v8, a1 1456; RVD-NEXT: vse64.v v8, (a0) 1457; RVD-NEXT: ret 1458; 1459; ZVBB-LABEL: cttz_zero_undef_v4i64: 1460; ZVBB: # %bb.0: 1461; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1462; ZVBB-NEXT: vle64.v v8, (a0) 1463; ZVBB-NEXT: vctz.v v8, v8 1464; ZVBB-NEXT: vse64.v v8, (a0) 1465; ZVBB-NEXT: ret 1466 %a = load <4 x i64>, ptr %x 1467 %b = load <4 x i64>, ptr %y 1468 %c = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true) 1469 store <4 x i64> %c, ptr %x 1470 ret void 1471} 1472;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1473; RV32D: {{.*}} 1474; RV32F: {{.*}} 1475; RV64D: {{.*}} 1476; RV64F: {{.*}} 1477