1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVBB 6 7define void @ctpop_v16i8(ptr %x, ptr %y) { 8; CHECK-LABEL: ctpop_v16i8: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 11; CHECK-NEXT: vle8.v v8, (a0) 12; CHECK-NEXT: li a1, 85 13; CHECK-NEXT: vsrl.vi v9, v8, 1 14; CHECK-NEXT: vand.vx v9, v9, a1 15; CHECK-NEXT: li a1, 51 16; CHECK-NEXT: vsub.vv v8, v8, v9 17; CHECK-NEXT: vand.vx v9, v8, a1 18; CHECK-NEXT: vsrl.vi v8, v8, 2 19; CHECK-NEXT: vand.vx v8, v8, a1 20; CHECK-NEXT: vadd.vv v8, v9, v8 21; CHECK-NEXT: vsrl.vi v9, v8, 4 22; CHECK-NEXT: vadd.vv v8, v8, v9 23; CHECK-NEXT: vand.vi v8, v8, 15 24; CHECK-NEXT: vse8.v v8, (a0) 25; CHECK-NEXT: ret 26; 27; ZVBB-LABEL: ctpop_v16i8: 28; ZVBB: # %bb.0: 29; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma 30; ZVBB-NEXT: vle8.v v8, (a0) 31; ZVBB-NEXT: vcpop.v v8, v8 32; ZVBB-NEXT: vse8.v v8, (a0) 33; ZVBB-NEXT: ret 34 %a = load <16 x i8>, ptr %x 35 %b = load <16 x i8>, ptr %y 36 %c = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) 37 store <16 x i8> %c, ptr %x 38 ret void 39} 40declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) 41 42define void @ctpop_v8i16(ptr %x, ptr %y) { 43; CHECK-LABEL: ctpop_v8i16: 44; CHECK: # %bb.0: 45; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 46; CHECK-NEXT: vle16.v v8, (a0) 47; CHECK-NEXT: lui a1, 5 48; CHECK-NEXT: addi a1, a1, 1365 49; CHECK-NEXT: vsrl.vi v9, v8, 1 50; CHECK-NEXT: vand.vx v9, v9, a1 51; CHECK-NEXT: lui a1, 3 52; CHECK-NEXT: addi a1, a1, 819 53; CHECK-NEXT: vsub.vv v8, v8, v9 54; CHECK-NEXT: vand.vx v9, v8, a1 55; CHECK-NEXT: vsrl.vi v8, v8, 2 56; CHECK-NEXT: vand.vx v8, v8, a1 57; CHECK-NEXT: lui a1, 1 58; CHECK-NEXT: addi a1, a1, -241 59; CHECK-NEXT: vadd.vv v8, v9, v8 60; CHECK-NEXT: vsrl.vi v9, v8, 4 61; CHECK-NEXT: vadd.vv v8, v8, v9 62; CHECK-NEXT: vand.vx v8, v8, a1 63; CHECK-NEXT: li a1, 257 64; CHECK-NEXT: vmul.vx v8, v8, a1 65; CHECK-NEXT: vsrl.vi v8, v8, 8 66; CHECK-NEXT: vse16.v v8, (a0) 67; CHECK-NEXT: ret 68; 69; ZVBB-LABEL: ctpop_v8i16: 70; ZVBB: # %bb.0: 71; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma 72; ZVBB-NEXT: vle16.v v8, (a0) 73; ZVBB-NEXT: vcpop.v v8, v8 74; ZVBB-NEXT: vse16.v v8, (a0) 75; ZVBB-NEXT: ret 76 %a = load <8 x i16>, ptr %x 77 %b = load <8 x i16>, ptr %y 78 %c = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) 79 store <8 x i16> %c, ptr %x 80 ret void 81} 82declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) 83 84define void @ctpop_v4i32(ptr %x, ptr %y) { 85; CHECK-LABEL: ctpop_v4i32: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 88; CHECK-NEXT: vle32.v v8, (a0) 89; CHECK-NEXT: lui a1, 349525 90; CHECK-NEXT: addi a1, a1, 1365 91; CHECK-NEXT: vsrl.vi v9, v8, 1 92; CHECK-NEXT: vand.vx v9, v9, a1 93; CHECK-NEXT: lui a1, 209715 94; CHECK-NEXT: addi a1, a1, 819 95; CHECK-NEXT: vsub.vv v8, v8, v9 96; CHECK-NEXT: vand.vx v9, v8, a1 97; CHECK-NEXT: vsrl.vi v8, v8, 2 98; CHECK-NEXT: vand.vx v8, v8, a1 99; CHECK-NEXT: lui a1, 61681 100; CHECK-NEXT: addi a1, a1, -241 101; CHECK-NEXT: vadd.vv v8, v9, v8 102; CHECK-NEXT: vsrl.vi v9, v8, 4 103; CHECK-NEXT: vadd.vv v8, v8, v9 104; CHECK-NEXT: vand.vx v8, v8, a1 105; CHECK-NEXT: lui a1, 4112 106; CHECK-NEXT: addi a1, a1, 257 107; CHECK-NEXT: vmul.vx v8, v8, a1 108; CHECK-NEXT: vsrl.vi v8, v8, 24 109; CHECK-NEXT: vse32.v v8, (a0) 110; CHECK-NEXT: ret 111; 112; ZVBB-LABEL: ctpop_v4i32: 113; ZVBB: # %bb.0: 114; ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma 115; ZVBB-NEXT: vle32.v v8, (a0) 116; ZVBB-NEXT: vcpop.v v8, v8 117; ZVBB-NEXT: vse32.v v8, (a0) 118; ZVBB-NEXT: ret 119 %a = load <4 x i32>, ptr %x 120 %b = load <4 x i32>, ptr %y 121 %c = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) 122 store <4 x i32> %c, ptr %x 123 ret void 124} 125declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 126 127define void @ctpop_v2i64(ptr %x, ptr %y) { 128; RV32-LABEL: ctpop_v2i64: 129; RV32: # %bb.0: 130; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 131; RV32-NEXT: vle64.v v8, (a0) 132; RV32-NEXT: lui a1, 349525 133; RV32-NEXT: addi a1, a1, 1365 134; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 135; RV32-NEXT: vmv.v.x v9, a1 136; RV32-NEXT: lui a1, 209715 137; RV32-NEXT: addi a1, a1, 819 138; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 139; RV32-NEXT: vsrl.vi v10, v8, 1 140; RV32-NEXT: vand.vv v9, v10, v9 141; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 142; RV32-NEXT: vmv.v.x v10, a1 143; RV32-NEXT: lui a1, 61681 144; RV32-NEXT: addi a1, a1, -241 145; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 146; RV32-NEXT: vsub.vv v8, v8, v9 147; RV32-NEXT: vand.vv v9, v8, v10 148; RV32-NEXT: vsrl.vi v8, v8, 2 149; RV32-NEXT: vand.vv v8, v8, v10 150; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 151; RV32-NEXT: vmv.v.x v10, a1 152; RV32-NEXT: lui a1, 4112 153; RV32-NEXT: addi a1, a1, 257 154; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 155; RV32-NEXT: vadd.vv v8, v9, v8 156; RV32-NEXT: vsrl.vi v9, v8, 4 157; RV32-NEXT: vadd.vv v8, v8, v9 158; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 159; RV32-NEXT: vmv.v.x v9, a1 160; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma 161; RV32-NEXT: vand.vv v8, v8, v10 162; RV32-NEXT: vmul.vv v8, v8, v9 163; RV32-NEXT: li a1, 56 164; RV32-NEXT: vsrl.vx v8, v8, a1 165; RV32-NEXT: vse64.v v8, (a0) 166; RV32-NEXT: ret 167; 168; RV64-LABEL: ctpop_v2i64: 169; RV64: # %bb.0: 170; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma 171; RV64-NEXT: vle64.v v8, (a0) 172; RV64-NEXT: lui a1, 349525 173; RV64-NEXT: lui a2, 209715 174; RV64-NEXT: lui a3, 61681 175; RV64-NEXT: lui a4, 4112 176; RV64-NEXT: addiw a1, a1, 1365 177; RV64-NEXT: addiw a2, a2, 819 178; RV64-NEXT: addiw a3, a3, -241 179; RV64-NEXT: addiw a4, a4, 257 180; RV64-NEXT: slli a5, a1, 32 181; RV64-NEXT: add a1, a1, a5 182; RV64-NEXT: slli a5, a2, 32 183; RV64-NEXT: add a2, a2, a5 184; RV64-NEXT: slli a5, a3, 32 185; RV64-NEXT: add a3, a3, a5 186; RV64-NEXT: slli a5, a4, 32 187; RV64-NEXT: add a4, a4, a5 188; RV64-NEXT: vsrl.vi v9, v8, 1 189; RV64-NEXT: vand.vx v9, v9, a1 190; RV64-NEXT: vsub.vv v8, v8, v9 191; RV64-NEXT: vand.vx v9, v8, a2 192; RV64-NEXT: vsrl.vi v8, v8, 2 193; RV64-NEXT: vand.vx v8, v8, a2 194; RV64-NEXT: vadd.vv v8, v9, v8 195; RV64-NEXT: vsrl.vi v9, v8, 4 196; RV64-NEXT: vadd.vv v8, v8, v9 197; RV64-NEXT: vand.vx v8, v8, a3 198; RV64-NEXT: vmul.vx v8, v8, a4 199; RV64-NEXT: li a1, 56 200; RV64-NEXT: vsrl.vx v8, v8, a1 201; RV64-NEXT: vse64.v v8, (a0) 202; RV64-NEXT: ret 203; 204; ZVBB-LABEL: ctpop_v2i64: 205; ZVBB: # %bb.0: 206; ZVBB-NEXT: vsetivli zero, 2, e64, m1, ta, ma 207; ZVBB-NEXT: vle64.v v8, (a0) 208; ZVBB-NEXT: vcpop.v v8, v8 209; ZVBB-NEXT: vse64.v v8, (a0) 210; ZVBB-NEXT: ret 211 %a = load <2 x i64>, ptr %x 212 %b = load <2 x i64>, ptr %y 213 %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 214 store <2 x i64> %c, ptr %x 215 ret void 216} 217declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 218 219define void @ctpop_v32i8(ptr %x, ptr %y) { 220; CHECK-LABEL: ctpop_v32i8: 221; CHECK: # %bb.0: 222; CHECK-NEXT: li a1, 32 223; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma 224; CHECK-NEXT: vle8.v v8, (a0) 225; CHECK-NEXT: li a1, 85 226; CHECK-NEXT: vsrl.vi v10, v8, 1 227; CHECK-NEXT: vand.vx v10, v10, a1 228; CHECK-NEXT: li a1, 51 229; CHECK-NEXT: vsub.vv v8, v8, v10 230; CHECK-NEXT: vand.vx v10, v8, a1 231; CHECK-NEXT: vsrl.vi v8, v8, 2 232; CHECK-NEXT: vand.vx v8, v8, a1 233; CHECK-NEXT: vadd.vv v8, v10, v8 234; CHECK-NEXT: vsrl.vi v10, v8, 4 235; CHECK-NEXT: vadd.vv v8, v8, v10 236; CHECK-NEXT: vand.vi v8, v8, 15 237; CHECK-NEXT: vse8.v v8, (a0) 238; CHECK-NEXT: ret 239; 240; ZVBB-LABEL: ctpop_v32i8: 241; ZVBB: # %bb.0: 242; ZVBB-NEXT: li a1, 32 243; ZVBB-NEXT: vsetvli zero, a1, e8, m2, ta, ma 244; ZVBB-NEXT: vle8.v v8, (a0) 245; ZVBB-NEXT: vcpop.v v8, v8 246; ZVBB-NEXT: vse8.v v8, (a0) 247; ZVBB-NEXT: ret 248 %a = load <32 x i8>, ptr %x 249 %b = load <32 x i8>, ptr %y 250 %c = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a) 251 store <32 x i8> %c, ptr %x 252 ret void 253} 254declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) 255 256define void @ctpop_v16i16(ptr %x, ptr %y) { 257; CHECK-LABEL: ctpop_v16i16: 258; CHECK: # %bb.0: 259; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 260; CHECK-NEXT: vle16.v v8, (a0) 261; CHECK-NEXT: lui a1, 5 262; CHECK-NEXT: addi a1, a1, 1365 263; CHECK-NEXT: vsrl.vi v10, v8, 1 264; CHECK-NEXT: vand.vx v10, v10, a1 265; CHECK-NEXT: lui a1, 3 266; CHECK-NEXT: addi a1, a1, 819 267; CHECK-NEXT: vsub.vv v8, v8, v10 268; CHECK-NEXT: vand.vx v10, v8, a1 269; CHECK-NEXT: vsrl.vi v8, v8, 2 270; CHECK-NEXT: vand.vx v8, v8, a1 271; CHECK-NEXT: lui a1, 1 272; CHECK-NEXT: addi a1, a1, -241 273; CHECK-NEXT: vadd.vv v8, v10, v8 274; CHECK-NEXT: vsrl.vi v10, v8, 4 275; CHECK-NEXT: vadd.vv v8, v8, v10 276; CHECK-NEXT: vand.vx v8, v8, a1 277; CHECK-NEXT: li a1, 257 278; CHECK-NEXT: vmul.vx v8, v8, a1 279; CHECK-NEXT: vsrl.vi v8, v8, 8 280; CHECK-NEXT: vse16.v v8, (a0) 281; CHECK-NEXT: ret 282; 283; ZVBB-LABEL: ctpop_v16i16: 284; ZVBB: # %bb.0: 285; ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma 286; ZVBB-NEXT: vle16.v v8, (a0) 287; ZVBB-NEXT: vcpop.v v8, v8 288; ZVBB-NEXT: vse16.v v8, (a0) 289; ZVBB-NEXT: ret 290 %a = load <16 x i16>, ptr %x 291 %b = load <16 x i16>, ptr %y 292 %c = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a) 293 store <16 x i16> %c, ptr %x 294 ret void 295} 296declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) 297 298define void @ctpop_v8i32(ptr %x, ptr %y) { 299; CHECK-LABEL: ctpop_v8i32: 300; CHECK: # %bb.0: 301; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 302; CHECK-NEXT: vle32.v v8, (a0) 303; CHECK-NEXT: lui a1, 349525 304; CHECK-NEXT: addi a1, a1, 1365 305; CHECK-NEXT: vsrl.vi v10, v8, 1 306; CHECK-NEXT: vand.vx v10, v10, a1 307; CHECK-NEXT: lui a1, 209715 308; CHECK-NEXT: addi a1, a1, 819 309; CHECK-NEXT: vsub.vv v8, v8, v10 310; CHECK-NEXT: vand.vx v10, v8, a1 311; CHECK-NEXT: vsrl.vi v8, v8, 2 312; CHECK-NEXT: vand.vx v8, v8, a1 313; CHECK-NEXT: lui a1, 61681 314; CHECK-NEXT: addi a1, a1, -241 315; CHECK-NEXT: vadd.vv v8, v10, v8 316; CHECK-NEXT: vsrl.vi v10, v8, 4 317; CHECK-NEXT: vadd.vv v8, v8, v10 318; CHECK-NEXT: vand.vx v8, v8, a1 319; CHECK-NEXT: lui a1, 4112 320; CHECK-NEXT: addi a1, a1, 257 321; CHECK-NEXT: vmul.vx v8, v8, a1 322; CHECK-NEXT: vsrl.vi v8, v8, 24 323; CHECK-NEXT: vse32.v v8, (a0) 324; CHECK-NEXT: ret 325; 326; ZVBB-LABEL: ctpop_v8i32: 327; ZVBB: # %bb.0: 328; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 329; ZVBB-NEXT: vle32.v v8, (a0) 330; ZVBB-NEXT: vcpop.v v8, v8 331; ZVBB-NEXT: vse32.v v8, (a0) 332; ZVBB-NEXT: ret 333 %a = load <8 x i32>, ptr %x 334 %b = load <8 x i32>, ptr %y 335 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) 336 store <8 x i32> %c, ptr %x 337 ret void 338} 339define <8 x i1> @ctpop_v8i32_ult_two(ptr %x, ptr %y) { 340; CHECK-LABEL: ctpop_v8i32_ult_two: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 343; CHECK-NEXT: vle32.v v8, (a0) 344; CHECK-NEXT: vadd.vi v10, v8, -1 345; CHECK-NEXT: vand.vv v8, v8, v10 346; CHECK-NEXT: vmseq.vi v0, v8, 0 347; CHECK-NEXT: ret 348; 349; ZVBB-LABEL: ctpop_v8i32_ult_two: 350; ZVBB: # %bb.0: 351; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 352; ZVBB-NEXT: vle32.v v8, (a0) 353; ZVBB-NEXT: vcpop.v v8, v8 354; ZVBB-NEXT: vmsleu.vi v0, v8, 1 355; ZVBB-NEXT: ret 356 %a = load <8 x i32>, ptr %x 357 %b = load <8 x i32>, ptr %y 358 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) 359 %cmp = icmp ult <8 x i32> %c, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 360 ret <8 x i1> %cmp 361} 362define <8 x i1> @ctpop_v8i32_ugt_one(ptr %x, ptr %y) { 363; CHECK-LABEL: ctpop_v8i32_ugt_one: 364; CHECK: # %bb.0: 365; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 366; CHECK-NEXT: vle32.v v8, (a0) 367; CHECK-NEXT: vadd.vi v10, v8, -1 368; CHECK-NEXT: vand.vv v8, v8, v10 369; CHECK-NEXT: vmsne.vi v0, v8, 0 370; CHECK-NEXT: ret 371; 372; ZVBB-LABEL: ctpop_v8i32_ugt_one: 373; ZVBB: # %bb.0: 374; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 375; ZVBB-NEXT: vle32.v v8, (a0) 376; ZVBB-NEXT: vcpop.v v8, v8 377; ZVBB-NEXT: vmsgtu.vi v0, v8, 1 378; ZVBB-NEXT: ret 379 %a = load <8 x i32>, ptr %x 380 %b = load <8 x i32>, ptr %y 381 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) 382 %cmp = icmp ugt <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 383 ret <8 x i1> %cmp 384} 385define <8 x i1> @ctpop_v8i32_eq_one(ptr %x, ptr %y) { 386; CHECK-LABEL: ctpop_v8i32_eq_one: 387; CHECK: # %bb.0: 388; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 389; CHECK-NEXT: vle32.v v8, (a0) 390; CHECK-NEXT: vadd.vi v10, v8, -1 391; CHECK-NEXT: vxor.vv v8, v8, v10 392; CHECK-NEXT: vmsltu.vv v0, v10, v8 393; CHECK-NEXT: ret 394; 395; ZVBB-LABEL: ctpop_v8i32_eq_one: 396; ZVBB: # %bb.0: 397; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 398; ZVBB-NEXT: vle32.v v8, (a0) 399; ZVBB-NEXT: vcpop.v v8, v8 400; ZVBB-NEXT: vmseq.vi v0, v8, 1 401; ZVBB-NEXT: ret 402 %a = load <8 x i32>, ptr %x 403 %b = load <8 x i32>, ptr %y 404 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) 405 %cmp = icmp eq <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 406 ret <8 x i1> %cmp 407} 408define <8 x i1> @ctpop_v8i32_ne_one(ptr %x, ptr %y) { 409; CHECK-LABEL: ctpop_v8i32_ne_one: 410; CHECK: # %bb.0: 411; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 412; CHECK-NEXT: vle32.v v8, (a0) 413; CHECK-NEXT: vadd.vi v10, v8, -1 414; CHECK-NEXT: vxor.vv v8, v8, v10 415; CHECK-NEXT: vmsleu.vv v0, v8, v10 416; CHECK-NEXT: ret 417; 418; ZVBB-LABEL: ctpop_v8i32_ne_one: 419; ZVBB: # %bb.0: 420; ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma 421; ZVBB-NEXT: vle32.v v8, (a0) 422; ZVBB-NEXT: vcpop.v v8, v8 423; ZVBB-NEXT: vmsne.vi v0, v8, 1 424; ZVBB-NEXT: ret 425 %a = load <8 x i32>, ptr %x 426 %b = load <8 x i32>, ptr %y 427 %c = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) 428 %cmp = icmp ne <8 x i32> %c, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 429 ret <8 x i1> %cmp 430} 431declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) 432 433define void @ctpop_v4i64(ptr %x, ptr %y) { 434; RV32-LABEL: ctpop_v4i64: 435; RV32: # %bb.0: 436; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 437; RV32-NEXT: vle64.v v8, (a0) 438; RV32-NEXT: lui a1, 349525 439; RV32-NEXT: addi a1, a1, 1365 440; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 441; RV32-NEXT: vmv.v.x v10, a1 442; RV32-NEXT: lui a1, 209715 443; RV32-NEXT: addi a1, a1, 819 444; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 445; RV32-NEXT: vsrl.vi v12, v8, 1 446; RV32-NEXT: vand.vv v10, v12, v10 447; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 448; RV32-NEXT: vmv.v.x v12, a1 449; RV32-NEXT: lui a1, 61681 450; RV32-NEXT: addi a1, a1, -241 451; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 452; RV32-NEXT: vsub.vv v8, v8, v10 453; RV32-NEXT: vand.vv v10, v8, v12 454; RV32-NEXT: vsrl.vi v8, v8, 2 455; RV32-NEXT: vand.vv v8, v8, v12 456; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 457; RV32-NEXT: vmv.v.x v12, a1 458; RV32-NEXT: lui a1, 4112 459; RV32-NEXT: addi a1, a1, 257 460; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 461; RV32-NEXT: vadd.vv v8, v10, v8 462; RV32-NEXT: vsrl.vi v10, v8, 4 463; RV32-NEXT: vadd.vv v8, v8, v10 464; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 465; RV32-NEXT: vmv.v.x v10, a1 466; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma 467; RV32-NEXT: vand.vv v8, v8, v12 468; RV32-NEXT: vmul.vv v8, v8, v10 469; RV32-NEXT: li a1, 56 470; RV32-NEXT: vsrl.vx v8, v8, a1 471; RV32-NEXT: vse64.v v8, (a0) 472; RV32-NEXT: ret 473; 474; RV64-LABEL: ctpop_v4i64: 475; RV64: # %bb.0: 476; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma 477; RV64-NEXT: vle64.v v8, (a0) 478; RV64-NEXT: lui a1, 349525 479; RV64-NEXT: lui a2, 209715 480; RV64-NEXT: lui a3, 61681 481; RV64-NEXT: lui a4, 4112 482; RV64-NEXT: addiw a1, a1, 1365 483; RV64-NEXT: addiw a2, a2, 819 484; RV64-NEXT: addiw a3, a3, -241 485; RV64-NEXT: addiw a4, a4, 257 486; RV64-NEXT: slli a5, a1, 32 487; RV64-NEXT: add a1, a1, a5 488; RV64-NEXT: slli a5, a2, 32 489; RV64-NEXT: add a2, a2, a5 490; RV64-NEXT: slli a5, a3, 32 491; RV64-NEXT: add a3, a3, a5 492; RV64-NEXT: slli a5, a4, 32 493; RV64-NEXT: add a4, a4, a5 494; RV64-NEXT: vsrl.vi v10, v8, 1 495; RV64-NEXT: vand.vx v10, v10, a1 496; RV64-NEXT: vsub.vv v8, v8, v10 497; RV64-NEXT: vand.vx v10, v8, a2 498; RV64-NEXT: vsrl.vi v8, v8, 2 499; RV64-NEXT: vand.vx v8, v8, a2 500; RV64-NEXT: vadd.vv v8, v10, v8 501; RV64-NEXT: vsrl.vi v10, v8, 4 502; RV64-NEXT: vadd.vv v8, v8, v10 503; RV64-NEXT: vand.vx v8, v8, a3 504; RV64-NEXT: vmul.vx v8, v8, a4 505; RV64-NEXT: li a1, 56 506; RV64-NEXT: vsrl.vx v8, v8, a1 507; RV64-NEXT: vse64.v v8, (a0) 508; RV64-NEXT: ret 509; 510; ZVBB-LABEL: ctpop_v4i64: 511; ZVBB: # %bb.0: 512; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 513; ZVBB-NEXT: vle64.v v8, (a0) 514; ZVBB-NEXT: vcpop.v v8, v8 515; ZVBB-NEXT: vse64.v v8, (a0) 516; ZVBB-NEXT: ret 517 %a = load <4 x i64>, ptr %x 518 %b = load <4 x i64>, ptr %y 519 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) 520 store <4 x i64> %c, ptr %x 521 ret void 522} 523define <4 x i1> @ctpop_v4i64_ult_two(ptr %x, ptr %y) { 524; CHECK-LABEL: ctpop_v4i64_ult_two: 525; CHECK: # %bb.0: 526; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 527; CHECK-NEXT: vle64.v v8, (a0) 528; CHECK-NEXT: vadd.vi v10, v8, -1 529; CHECK-NEXT: vand.vv v8, v8, v10 530; CHECK-NEXT: vmseq.vi v0, v8, 0 531; CHECK-NEXT: ret 532; 533; ZVBB-LABEL: ctpop_v4i64_ult_two: 534; ZVBB: # %bb.0: 535; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 536; ZVBB-NEXT: vle64.v v8, (a0) 537; ZVBB-NEXT: vcpop.v v8, v8 538; ZVBB-NEXT: vmsleu.vi v0, v8, 1 539; ZVBB-NEXT: ret 540 %a = load <4 x i64>, ptr %x 541 %b = load <4 x i64>, ptr %y 542 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) 543 %cmp = icmp ult <4 x i64> %c, <i64 2, i64 2, i64 2, i64 2> 544 ret <4 x i1> %cmp 545} 546define <4 x i1> @ctpop_v4i64_ugt_one(ptr %x, ptr %y) { 547; CHECK-LABEL: ctpop_v4i64_ugt_one: 548; CHECK: # %bb.0: 549; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 550; CHECK-NEXT: vle64.v v8, (a0) 551; CHECK-NEXT: vadd.vi v10, v8, -1 552; CHECK-NEXT: vand.vv v8, v8, v10 553; CHECK-NEXT: vmsne.vi v0, v8, 0 554; CHECK-NEXT: ret 555; 556; ZVBB-LABEL: ctpop_v4i64_ugt_one: 557; ZVBB: # %bb.0: 558; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 559; ZVBB-NEXT: vle64.v v8, (a0) 560; ZVBB-NEXT: vcpop.v v8, v8 561; ZVBB-NEXT: vmsgtu.vi v0, v8, 1 562; ZVBB-NEXT: ret 563 %a = load <4 x i64>, ptr %x 564 %b = load <4 x i64>, ptr %y 565 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) 566 %cmp = icmp ugt <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1> 567 ret <4 x i1> %cmp 568} 569define <4 x i1> @ctpop_v4i64_eq_one(ptr %x, ptr %y) { 570; CHECK-LABEL: ctpop_v4i64_eq_one: 571; CHECK: # %bb.0: 572; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 573; CHECK-NEXT: vle64.v v8, (a0) 574; CHECK-NEXT: vadd.vi v10, v8, -1 575; CHECK-NEXT: vxor.vv v8, v8, v10 576; CHECK-NEXT: vmsltu.vv v0, v10, v8 577; CHECK-NEXT: ret 578; 579; ZVBB-LABEL: ctpop_v4i64_eq_one: 580; ZVBB: # %bb.0: 581; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 582; ZVBB-NEXT: vle64.v v8, (a0) 583; ZVBB-NEXT: vcpop.v v8, v8 584; ZVBB-NEXT: vmseq.vi v0, v8, 1 585; ZVBB-NEXT: ret 586 %a = load <4 x i64>, ptr %x 587 %b = load <4 x i64>, ptr %y 588 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) 589 %cmp = icmp eq <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1> 590 ret <4 x i1> %cmp 591} 592define <4 x i1> @ctpop_v4i64_ne_one(ptr %x, ptr %y) { 593; CHECK-LABEL: ctpop_v4i64_ne_one: 594; CHECK: # %bb.0: 595; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 596; CHECK-NEXT: vle64.v v8, (a0) 597; CHECK-NEXT: vadd.vi v10, v8, -1 598; CHECK-NEXT: vxor.vv v8, v8, v10 599; CHECK-NEXT: vmsleu.vv v0, v8, v10 600; CHECK-NEXT: ret 601; 602; ZVBB-LABEL: ctpop_v4i64_ne_one: 603; ZVBB: # %bb.0: 604; ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma 605; ZVBB-NEXT: vle64.v v8, (a0) 606; ZVBB-NEXT: vcpop.v v8, v8 607; ZVBB-NEXT: vmsne.vi v0, v8, 1 608; ZVBB-NEXT: ret 609 %a = load <4 x i64>, ptr %x 610 %b = load <4 x i64>, ptr %y 611 %c = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) 612 %cmp = icmp ne <4 x i64> %c, <i64 1, i64 1, i64 1, i64 1> 613 ret <4 x i1> %cmp 614} 615declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) 616