1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb,+m -target-abi=ilp32d \ 7; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 8; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb,+m -target-abi=lp64d \ 9; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 10 11declare <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i1>, i32) 12 13define <vscale x 1 x i8> @vp_ctpop_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 14; CHECK-LABEL: vp_ctpop_nxv1i8: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 17; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 18; CHECK-NEXT: li a0, 85 19; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 20; CHECK-NEXT: li a0, 51 21; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 22; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 23; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 24; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 25; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 26; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 27; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 28; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 29; CHECK-NEXT: ret 30; 31; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8: 32; CHECK-ZVBB: # %bb.0: 33; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 34; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 35; CHECK-ZVBB-NEXT: ret 36 %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl) 37 ret <vscale x 1 x i8> %v 38} 39 40define <vscale x 1 x i8> @vp_ctpop_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) { 41; CHECK-LABEL: vp_ctpop_nxv1i8_unmasked: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 44; CHECK-NEXT: vsrl.vi v9, v8, 1 45; CHECK-NEXT: li a0, 85 46; CHECK-NEXT: vand.vx v9, v9, a0 47; CHECK-NEXT: li a0, 51 48; CHECK-NEXT: vsub.vv v8, v8, v9 49; CHECK-NEXT: vand.vx v9, v8, a0 50; CHECK-NEXT: vsrl.vi v8, v8, 2 51; CHECK-NEXT: vand.vx v8, v8, a0 52; CHECK-NEXT: vadd.vv v8, v9, v8 53; CHECK-NEXT: vsrl.vi v9, v8, 4 54; CHECK-NEXT: vadd.vv v8, v8, v9 55; CHECK-NEXT: vand.vi v8, v8, 15 56; CHECK-NEXT: ret 57; 58; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i8_unmasked: 59; CHECK-ZVBB: # %bb.0: 60; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 61; CHECK-ZVBB-NEXT: vcpop.v v8, v8 62; CHECK-ZVBB-NEXT: ret 63 %v = call <vscale x 1 x i8> @llvm.vp.ctpop.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 64 ret <vscale x 1 x i8> %v 65} 66 67declare <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) 68 69define <vscale x 2 x i8> @vp_ctpop_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 70; CHECK-LABEL: vp_ctpop_nxv2i8: 71; CHECK: # %bb.0: 72; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 73; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 74; CHECK-NEXT: li a0, 85 75; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 76; CHECK-NEXT: li a0, 51 77; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 78; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 79; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 80; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 81; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 82; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 83; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 84; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 85; CHECK-NEXT: ret 86; 87; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8: 88; CHECK-ZVBB: # %bb.0: 89; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 90; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 91; CHECK-ZVBB-NEXT: ret 92 %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl) 93 ret <vscale x 2 x i8> %v 94} 95 96define <vscale x 2 x i8> @vp_ctpop_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) { 97; CHECK-LABEL: vp_ctpop_nxv2i8_unmasked: 98; CHECK: # %bb.0: 99; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 100; CHECK-NEXT: vsrl.vi v9, v8, 1 101; CHECK-NEXT: li a0, 85 102; CHECK-NEXT: vand.vx v9, v9, a0 103; CHECK-NEXT: li a0, 51 104; CHECK-NEXT: vsub.vv v8, v8, v9 105; CHECK-NEXT: vand.vx v9, v8, a0 106; CHECK-NEXT: vsrl.vi v8, v8, 2 107; CHECK-NEXT: vand.vx v8, v8, a0 108; CHECK-NEXT: vadd.vv v8, v9, v8 109; CHECK-NEXT: vsrl.vi v9, v8, 4 110; CHECK-NEXT: vadd.vv v8, v8, v9 111; CHECK-NEXT: vand.vi v8, v8, 15 112; CHECK-NEXT: ret 113; 114; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i8_unmasked: 115; CHECK-ZVBB: # %bb.0: 116; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 117; CHECK-ZVBB-NEXT: vcpop.v v8, v8 118; CHECK-ZVBB-NEXT: ret 119 %v = call <vscale x 2 x i8> @llvm.vp.ctpop.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 120 ret <vscale x 2 x i8> %v 121} 122 123declare <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i32) 124 125define <vscale x 4 x i8> @vp_ctpop_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 126; CHECK-LABEL: vp_ctpop_nxv4i8: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 129; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 130; CHECK-NEXT: li a0, 85 131; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 132; CHECK-NEXT: li a0, 51 133; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 134; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 135; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 136; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 137; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 138; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 139; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 140; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 141; CHECK-NEXT: ret 142; 143; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8: 144; CHECK-ZVBB: # %bb.0: 145; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 146; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 147; CHECK-ZVBB-NEXT: ret 148 %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl) 149 ret <vscale x 4 x i8> %v 150} 151 152define <vscale x 4 x i8> @vp_ctpop_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) { 153; CHECK-LABEL: vp_ctpop_nxv4i8_unmasked: 154; CHECK: # %bb.0: 155; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 156; CHECK-NEXT: vsrl.vi v9, v8, 1 157; CHECK-NEXT: li a0, 85 158; CHECK-NEXT: vand.vx v9, v9, a0 159; CHECK-NEXT: li a0, 51 160; CHECK-NEXT: vsub.vv v8, v8, v9 161; CHECK-NEXT: vand.vx v9, v8, a0 162; CHECK-NEXT: vsrl.vi v8, v8, 2 163; CHECK-NEXT: vand.vx v8, v8, a0 164; CHECK-NEXT: vadd.vv v8, v9, v8 165; CHECK-NEXT: vsrl.vi v9, v8, 4 166; CHECK-NEXT: vadd.vv v8, v8, v9 167; CHECK-NEXT: vand.vi v8, v8, 15 168; CHECK-NEXT: ret 169; 170; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i8_unmasked: 171; CHECK-ZVBB: # %bb.0: 172; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 173; CHECK-ZVBB-NEXT: vcpop.v v8, v8 174; CHECK-ZVBB-NEXT: ret 175 %v = call <vscale x 4 x i8> @llvm.vp.ctpop.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 176 ret <vscale x 4 x i8> %v 177} 178 179declare <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i32) 180 181define <vscale x 8 x i8> @vp_ctpop_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 182; CHECK-LABEL: vp_ctpop_nxv8i8: 183; CHECK: # %bb.0: 184; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 185; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 186; CHECK-NEXT: li a0, 85 187; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 188; CHECK-NEXT: li a0, 51 189; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 190; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 191; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 192; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 193; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 194; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 195; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 196; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 197; CHECK-NEXT: ret 198; 199; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8: 200; CHECK-ZVBB: # %bb.0: 201; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 202; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 203; CHECK-ZVBB-NEXT: ret 204 %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl) 205 ret <vscale x 8 x i8> %v 206} 207 208define <vscale x 8 x i8> @vp_ctpop_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) { 209; CHECK-LABEL: vp_ctpop_nxv8i8_unmasked: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 212; CHECK-NEXT: vsrl.vi v9, v8, 1 213; CHECK-NEXT: li a0, 85 214; CHECK-NEXT: vand.vx v9, v9, a0 215; CHECK-NEXT: li a0, 51 216; CHECK-NEXT: vsub.vv v8, v8, v9 217; CHECK-NEXT: vand.vx v9, v8, a0 218; CHECK-NEXT: vsrl.vi v8, v8, 2 219; CHECK-NEXT: vand.vx v8, v8, a0 220; CHECK-NEXT: vadd.vv v8, v9, v8 221; CHECK-NEXT: vsrl.vi v9, v8, 4 222; CHECK-NEXT: vadd.vv v8, v8, v9 223; CHECK-NEXT: vand.vi v8, v8, 15 224; CHECK-NEXT: ret 225; 226; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i8_unmasked: 227; CHECK-ZVBB: # %bb.0: 228; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma 229; CHECK-ZVBB-NEXT: vcpop.v v8, v8 230; CHECK-ZVBB-NEXT: ret 231 %v = call <vscale x 8 x i8> @llvm.vp.ctpop.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 232 ret <vscale x 8 x i8> %v 233} 234 235declare <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i32) 236 237define <vscale x 16 x i8> @vp_ctpop_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 238; CHECK-LABEL: vp_ctpop_nxv16i8: 239; CHECK: # %bb.0: 240; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 241; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 242; CHECK-NEXT: li a0, 85 243; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 244; CHECK-NEXT: li a0, 51 245; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 246; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 247; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 248; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 249; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 250; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 251; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 252; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 253; CHECK-NEXT: ret 254; 255; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8: 256; CHECK-ZVBB: # %bb.0: 257; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 258; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 259; CHECK-ZVBB-NEXT: ret 260 %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl) 261 ret <vscale x 16 x i8> %v 262} 263 264define <vscale x 16 x i8> @vp_ctpop_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) { 265; CHECK-LABEL: vp_ctpop_nxv16i8_unmasked: 266; CHECK: # %bb.0: 267; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma 268; CHECK-NEXT: vsrl.vi v10, v8, 1 269; CHECK-NEXT: li a0, 85 270; CHECK-NEXT: vand.vx v10, v10, a0 271; CHECK-NEXT: li a0, 51 272; CHECK-NEXT: vsub.vv v8, v8, v10 273; CHECK-NEXT: vand.vx v10, v8, a0 274; CHECK-NEXT: vsrl.vi v8, v8, 2 275; CHECK-NEXT: vand.vx v8, v8, a0 276; CHECK-NEXT: vadd.vv v8, v10, v8 277; CHECK-NEXT: vsrl.vi v10, v8, 4 278; CHECK-NEXT: vadd.vv v8, v8, v10 279; CHECK-NEXT: vand.vi v8, v8, 15 280; CHECK-NEXT: ret 281; 282; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i8_unmasked: 283; CHECK-ZVBB: # %bb.0: 284; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma 285; CHECK-ZVBB-NEXT: vcpop.v v8, v8 286; CHECK-ZVBB-NEXT: ret 287 %v = call <vscale x 16 x i8> @llvm.vp.ctpop.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 288 ret <vscale x 16 x i8> %v 289} 290 291declare <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i1>, i32) 292 293define <vscale x 32 x i8> @vp_ctpop_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 294; CHECK-LABEL: vp_ctpop_nxv32i8: 295; CHECK: # %bb.0: 296; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 297; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 298; CHECK-NEXT: li a0, 85 299; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 300; CHECK-NEXT: li a0, 51 301; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 302; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 303; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 304; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 305; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 306; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 307; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 308; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 309; CHECK-NEXT: ret 310; 311; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8: 312; CHECK-ZVBB: # %bb.0: 313; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 314; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 315; CHECK-ZVBB-NEXT: ret 316 %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl) 317 ret <vscale x 32 x i8> %v 318} 319 320define <vscale x 32 x i8> @vp_ctpop_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) { 321; CHECK-LABEL: vp_ctpop_nxv32i8_unmasked: 322; CHECK: # %bb.0: 323; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma 324; CHECK-NEXT: vsrl.vi v12, v8, 1 325; CHECK-NEXT: li a0, 85 326; CHECK-NEXT: vand.vx v12, v12, a0 327; CHECK-NEXT: li a0, 51 328; CHECK-NEXT: vsub.vv v8, v8, v12 329; CHECK-NEXT: vand.vx v12, v8, a0 330; CHECK-NEXT: vsrl.vi v8, v8, 2 331; CHECK-NEXT: vand.vx v8, v8, a0 332; CHECK-NEXT: vadd.vv v8, v12, v8 333; CHECK-NEXT: vsrl.vi v12, v8, 4 334; CHECK-NEXT: vadd.vv v8, v8, v12 335; CHECK-NEXT: vand.vi v8, v8, 15 336; CHECK-NEXT: ret 337; 338; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i8_unmasked: 339; CHECK-ZVBB: # %bb.0: 340; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma 341; CHECK-ZVBB-NEXT: vcpop.v v8, v8 342; CHECK-ZVBB-NEXT: ret 343 %v = call <vscale x 32 x i8> @llvm.vp.ctpop.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 344 ret <vscale x 32 x i8> %v 345} 346 347declare <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i1>, i32) 348 349define <vscale x 64 x i8> @vp_ctpop_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) { 350; CHECK-LABEL: vp_ctpop_nxv64i8: 351; CHECK: # %bb.0: 352; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 353; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 354; CHECK-NEXT: li a0, 85 355; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 356; CHECK-NEXT: li a0, 51 357; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t 358; CHECK-NEXT: vand.vx v16, v8, a0, v0.t 359; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 360; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 361; CHECK-NEXT: vadd.vv v8, v16, v8, v0.t 362; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 363; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 364; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 365; CHECK-NEXT: ret 366; 367; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8: 368; CHECK-ZVBB: # %bb.0: 369; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 370; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 371; CHECK-ZVBB-NEXT: ret 372 %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl) 373 ret <vscale x 64 x i8> %v 374} 375 376define <vscale x 64 x i8> @vp_ctpop_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) { 377; CHECK-LABEL: vp_ctpop_nxv64i8_unmasked: 378; CHECK: # %bb.0: 379; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma 380; CHECK-NEXT: vsrl.vi v16, v8, 1 381; CHECK-NEXT: li a0, 85 382; CHECK-NEXT: vand.vx v16, v16, a0 383; CHECK-NEXT: li a0, 51 384; CHECK-NEXT: vsub.vv v8, v8, v16 385; CHECK-NEXT: vand.vx v16, v8, a0 386; CHECK-NEXT: vsrl.vi v8, v8, 2 387; CHECK-NEXT: vand.vx v8, v8, a0 388; CHECK-NEXT: vadd.vv v8, v16, v8 389; CHECK-NEXT: vsrl.vi v16, v8, 4 390; CHECK-NEXT: vadd.vv v8, v8, v16 391; CHECK-NEXT: vand.vi v8, v8, 15 392; CHECK-NEXT: ret 393; 394; CHECK-ZVBB-LABEL: vp_ctpop_nxv64i8_unmasked: 395; CHECK-ZVBB: # %bb.0: 396; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma 397; CHECK-ZVBB-NEXT: vcpop.v v8, v8 398; CHECK-ZVBB-NEXT: ret 399 %v = call <vscale x 64 x i8> @llvm.vp.ctpop.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> splat (i1 true), i32 %evl) 400 ret <vscale x 64 x i8> %v 401} 402 403declare <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i1>, i32) 404 405define <vscale x 1 x i16> @vp_ctpop_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 406; CHECK-LABEL: vp_ctpop_nxv1i16: 407; CHECK: # %bb.0: 408; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 409; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 410; CHECK-NEXT: lui a0, 5 411; CHECK-NEXT: addi a0, a0, 1365 412; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 413; CHECK-NEXT: lui a0, 3 414; CHECK-NEXT: addi a0, a0, 819 415; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 416; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 417; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 418; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 419; CHECK-NEXT: lui a0, 1 420; CHECK-NEXT: addi a0, a0, -241 421; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 422; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 423; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 424; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 425; CHECK-NEXT: li a0, 257 426; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 427; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 428; CHECK-NEXT: ret 429; 430; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16: 431; CHECK-ZVBB: # %bb.0: 432; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 433; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 434; CHECK-ZVBB-NEXT: ret 435 %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl) 436 ret <vscale x 1 x i16> %v 437} 438 439define <vscale x 1 x i16> @vp_ctpop_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) { 440; CHECK-LABEL: vp_ctpop_nxv1i16_unmasked: 441; CHECK: # %bb.0: 442; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 443; CHECK-NEXT: vsrl.vi v9, v8, 1 444; CHECK-NEXT: lui a0, 5 445; CHECK-NEXT: addi a0, a0, 1365 446; CHECK-NEXT: vand.vx v9, v9, a0 447; CHECK-NEXT: lui a0, 3 448; CHECK-NEXT: addi a0, a0, 819 449; CHECK-NEXT: vsub.vv v8, v8, v9 450; CHECK-NEXT: vand.vx v9, v8, a0 451; CHECK-NEXT: vsrl.vi v8, v8, 2 452; CHECK-NEXT: vand.vx v8, v8, a0 453; CHECK-NEXT: lui a0, 1 454; CHECK-NEXT: addi a0, a0, -241 455; CHECK-NEXT: vadd.vv v8, v9, v8 456; CHECK-NEXT: vsrl.vi v9, v8, 4 457; CHECK-NEXT: vadd.vv v8, v8, v9 458; CHECK-NEXT: vand.vx v8, v8, a0 459; CHECK-NEXT: li a0, 257 460; CHECK-NEXT: vmul.vx v8, v8, a0 461; CHECK-NEXT: vsrl.vi v8, v8, 8 462; CHECK-NEXT: ret 463; 464; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i16_unmasked: 465; CHECK-ZVBB: # %bb.0: 466; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 467; CHECK-ZVBB-NEXT: vcpop.v v8, v8 468; CHECK-ZVBB-NEXT: ret 469 %v = call <vscale x 1 x i16> @llvm.vp.ctpop.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 470 ret <vscale x 1 x i16> %v 471} 472 473declare <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i32) 474 475define <vscale x 2 x i16> @vp_ctpop_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 476; CHECK-LABEL: vp_ctpop_nxv2i16: 477; CHECK: # %bb.0: 478; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 479; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 480; CHECK-NEXT: lui a0, 5 481; CHECK-NEXT: addi a0, a0, 1365 482; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 483; CHECK-NEXT: lui a0, 3 484; CHECK-NEXT: addi a0, a0, 819 485; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 486; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 487; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 488; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 489; CHECK-NEXT: lui a0, 1 490; CHECK-NEXT: addi a0, a0, -241 491; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 492; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 493; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 494; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 495; CHECK-NEXT: li a0, 257 496; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 497; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 498; CHECK-NEXT: ret 499; 500; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16: 501; CHECK-ZVBB: # %bb.0: 502; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 503; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 504; CHECK-ZVBB-NEXT: ret 505 %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl) 506 ret <vscale x 2 x i16> %v 507} 508 509define <vscale x 2 x i16> @vp_ctpop_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) { 510; CHECK-LABEL: vp_ctpop_nxv2i16_unmasked: 511; CHECK: # %bb.0: 512; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 513; CHECK-NEXT: vsrl.vi v9, v8, 1 514; CHECK-NEXT: lui a0, 5 515; CHECK-NEXT: addi a0, a0, 1365 516; CHECK-NEXT: vand.vx v9, v9, a0 517; CHECK-NEXT: lui a0, 3 518; CHECK-NEXT: addi a0, a0, 819 519; CHECK-NEXT: vsub.vv v8, v8, v9 520; CHECK-NEXT: vand.vx v9, v8, a0 521; CHECK-NEXT: vsrl.vi v8, v8, 2 522; CHECK-NEXT: vand.vx v8, v8, a0 523; CHECK-NEXT: lui a0, 1 524; CHECK-NEXT: addi a0, a0, -241 525; CHECK-NEXT: vadd.vv v8, v9, v8 526; CHECK-NEXT: vsrl.vi v9, v8, 4 527; CHECK-NEXT: vadd.vv v8, v8, v9 528; CHECK-NEXT: vand.vx v8, v8, a0 529; CHECK-NEXT: li a0, 257 530; CHECK-NEXT: vmul.vx v8, v8, a0 531; CHECK-NEXT: vsrl.vi v8, v8, 8 532; CHECK-NEXT: ret 533; 534; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i16_unmasked: 535; CHECK-ZVBB: # %bb.0: 536; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 537; CHECK-ZVBB-NEXT: vcpop.v v8, v8 538; CHECK-ZVBB-NEXT: ret 539 %v = call <vscale x 2 x i16> @llvm.vp.ctpop.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 540 ret <vscale x 2 x i16> %v 541} 542 543declare <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i32) 544 545define <vscale x 4 x i16> @vp_ctpop_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 546; CHECK-LABEL: vp_ctpop_nxv4i16: 547; CHECK: # %bb.0: 548; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 549; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 550; CHECK-NEXT: lui a0, 5 551; CHECK-NEXT: addi a0, a0, 1365 552; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 553; CHECK-NEXT: lui a0, 3 554; CHECK-NEXT: addi a0, a0, 819 555; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 556; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 557; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 558; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 559; CHECK-NEXT: lui a0, 1 560; CHECK-NEXT: addi a0, a0, -241 561; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 562; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 563; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 564; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 565; CHECK-NEXT: li a0, 257 566; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 567; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 568; CHECK-NEXT: ret 569; 570; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16: 571; CHECK-ZVBB: # %bb.0: 572; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 573; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 574; CHECK-ZVBB-NEXT: ret 575 %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl) 576 ret <vscale x 4 x i16> %v 577} 578 579define <vscale x 4 x i16> @vp_ctpop_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) { 580; CHECK-LABEL: vp_ctpop_nxv4i16_unmasked: 581; CHECK: # %bb.0: 582; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 583; CHECK-NEXT: vsrl.vi v9, v8, 1 584; CHECK-NEXT: lui a0, 5 585; CHECK-NEXT: addi a0, a0, 1365 586; CHECK-NEXT: vand.vx v9, v9, a0 587; CHECK-NEXT: lui a0, 3 588; CHECK-NEXT: addi a0, a0, 819 589; CHECK-NEXT: vsub.vv v8, v8, v9 590; CHECK-NEXT: vand.vx v9, v8, a0 591; CHECK-NEXT: vsrl.vi v8, v8, 2 592; CHECK-NEXT: vand.vx v8, v8, a0 593; CHECK-NEXT: lui a0, 1 594; CHECK-NEXT: addi a0, a0, -241 595; CHECK-NEXT: vadd.vv v8, v9, v8 596; CHECK-NEXT: vsrl.vi v9, v8, 4 597; CHECK-NEXT: vadd.vv v8, v8, v9 598; CHECK-NEXT: vand.vx v8, v8, a0 599; CHECK-NEXT: li a0, 257 600; CHECK-NEXT: vmul.vx v8, v8, a0 601; CHECK-NEXT: vsrl.vi v8, v8, 8 602; CHECK-NEXT: ret 603; 604; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i16_unmasked: 605; CHECK-ZVBB: # %bb.0: 606; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma 607; CHECK-ZVBB-NEXT: vcpop.v v8, v8 608; CHECK-ZVBB-NEXT: ret 609 %v = call <vscale x 4 x i16> @llvm.vp.ctpop.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 610 ret <vscale x 4 x i16> %v 611} 612 613declare <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i32) 614 615define <vscale x 8 x i16> @vp_ctpop_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 616; CHECK-LABEL: vp_ctpop_nxv8i16: 617; CHECK: # %bb.0: 618; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 619; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 620; CHECK-NEXT: lui a0, 5 621; CHECK-NEXT: addi a0, a0, 1365 622; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 623; CHECK-NEXT: lui a0, 3 624; CHECK-NEXT: addi a0, a0, 819 625; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 626; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 627; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 628; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 629; CHECK-NEXT: lui a0, 1 630; CHECK-NEXT: addi a0, a0, -241 631; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 632; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 633; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 634; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 635; CHECK-NEXT: li a0, 257 636; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 637; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 638; CHECK-NEXT: ret 639; 640; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16: 641; CHECK-ZVBB: # %bb.0: 642; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 643; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 644; CHECK-ZVBB-NEXT: ret 645 %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl) 646 ret <vscale x 8 x i16> %v 647} 648 649define <vscale x 8 x i16> @vp_ctpop_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) { 650; CHECK-LABEL: vp_ctpop_nxv8i16_unmasked: 651; CHECK: # %bb.0: 652; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 653; CHECK-NEXT: vsrl.vi v10, v8, 1 654; CHECK-NEXT: lui a0, 5 655; CHECK-NEXT: addi a0, a0, 1365 656; CHECK-NEXT: vand.vx v10, v10, a0 657; CHECK-NEXT: lui a0, 3 658; CHECK-NEXT: addi a0, a0, 819 659; CHECK-NEXT: vsub.vv v8, v8, v10 660; CHECK-NEXT: vand.vx v10, v8, a0 661; CHECK-NEXT: vsrl.vi v8, v8, 2 662; CHECK-NEXT: vand.vx v8, v8, a0 663; CHECK-NEXT: lui a0, 1 664; CHECK-NEXT: addi a0, a0, -241 665; CHECK-NEXT: vadd.vv v8, v10, v8 666; CHECK-NEXT: vsrl.vi v10, v8, 4 667; CHECK-NEXT: vadd.vv v8, v8, v10 668; CHECK-NEXT: vand.vx v8, v8, a0 669; CHECK-NEXT: li a0, 257 670; CHECK-NEXT: vmul.vx v8, v8, a0 671; CHECK-NEXT: vsrl.vi v8, v8, 8 672; CHECK-NEXT: ret 673; 674; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i16_unmasked: 675; CHECK-ZVBB: # %bb.0: 676; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma 677; CHECK-ZVBB-NEXT: vcpop.v v8, v8 678; CHECK-ZVBB-NEXT: ret 679 %v = call <vscale x 8 x i16> @llvm.vp.ctpop.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 680 ret <vscale x 8 x i16> %v 681} 682 683declare <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i1>, i32) 684 685define <vscale x 16 x i16> @vp_ctpop_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 686; CHECK-LABEL: vp_ctpop_nxv16i16: 687; CHECK: # %bb.0: 688; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 689; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 690; CHECK-NEXT: lui a0, 5 691; CHECK-NEXT: addi a0, a0, 1365 692; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 693; CHECK-NEXT: lui a0, 3 694; CHECK-NEXT: addi a0, a0, 819 695; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 696; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 697; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 698; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 699; CHECK-NEXT: lui a0, 1 700; CHECK-NEXT: addi a0, a0, -241 701; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 702; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 703; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 704; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 705; CHECK-NEXT: li a0, 257 706; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 707; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 708; CHECK-NEXT: ret 709; 710; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16: 711; CHECK-ZVBB: # %bb.0: 712; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 713; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 714; CHECK-ZVBB-NEXT: ret 715 %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl) 716 ret <vscale x 16 x i16> %v 717} 718 719define <vscale x 16 x i16> @vp_ctpop_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) { 720; CHECK-LABEL: vp_ctpop_nxv16i16_unmasked: 721; CHECK: # %bb.0: 722; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma 723; CHECK-NEXT: vsrl.vi v12, v8, 1 724; CHECK-NEXT: lui a0, 5 725; CHECK-NEXT: addi a0, a0, 1365 726; CHECK-NEXT: vand.vx v12, v12, a0 727; CHECK-NEXT: lui a0, 3 728; CHECK-NEXT: addi a0, a0, 819 729; CHECK-NEXT: vsub.vv v8, v8, v12 730; CHECK-NEXT: vand.vx v12, v8, a0 731; CHECK-NEXT: vsrl.vi v8, v8, 2 732; CHECK-NEXT: vand.vx v8, v8, a0 733; CHECK-NEXT: lui a0, 1 734; CHECK-NEXT: addi a0, a0, -241 735; CHECK-NEXT: vadd.vv v8, v12, v8 736; CHECK-NEXT: vsrl.vi v12, v8, 4 737; CHECK-NEXT: vadd.vv v8, v8, v12 738; CHECK-NEXT: vand.vx v8, v8, a0 739; CHECK-NEXT: li a0, 257 740; CHECK-NEXT: vmul.vx v8, v8, a0 741; CHECK-NEXT: vsrl.vi v8, v8, 8 742; CHECK-NEXT: ret 743; 744; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i16_unmasked: 745; CHECK-ZVBB: # %bb.0: 746; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma 747; CHECK-ZVBB-NEXT: vcpop.v v8, v8 748; CHECK-ZVBB-NEXT: ret 749 %v = call <vscale x 16 x i16> @llvm.vp.ctpop.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 750 ret <vscale x 16 x i16> %v 751} 752 753declare <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i1>, i32) 754 755define <vscale x 32 x i16> @vp_ctpop_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) { 756; CHECK-LABEL: vp_ctpop_nxv32i16: 757; CHECK: # %bb.0: 758; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 759; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 760; CHECK-NEXT: lui a0, 5 761; CHECK-NEXT: addi a0, a0, 1365 762; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 763; CHECK-NEXT: lui a0, 3 764; CHECK-NEXT: addi a0, a0, 819 765; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t 766; CHECK-NEXT: vand.vx v8, v16, a0, v0.t 767; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t 768; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 769; CHECK-NEXT: lui a0, 1 770; CHECK-NEXT: addi a0, a0, -241 771; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 772; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 773; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 774; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 775; CHECK-NEXT: li a0, 257 776; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 777; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 778; CHECK-NEXT: ret 779; 780; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16: 781; CHECK-ZVBB: # %bb.0: 782; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 783; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 784; CHECK-ZVBB-NEXT: ret 785 %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl) 786 ret <vscale x 32 x i16> %v 787} 788 789define <vscale x 32 x i16> @vp_ctpop_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) { 790; CHECK-LABEL: vp_ctpop_nxv32i16_unmasked: 791; CHECK: # %bb.0: 792; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma 793; CHECK-NEXT: vsrl.vi v16, v8, 1 794; CHECK-NEXT: lui a0, 5 795; CHECK-NEXT: addi a0, a0, 1365 796; CHECK-NEXT: vand.vx v16, v16, a0 797; CHECK-NEXT: lui a0, 3 798; CHECK-NEXT: addi a0, a0, 819 799; CHECK-NEXT: vsub.vv v8, v8, v16 800; CHECK-NEXT: vand.vx v16, v8, a0 801; CHECK-NEXT: vsrl.vi v8, v8, 2 802; CHECK-NEXT: vand.vx v8, v8, a0 803; CHECK-NEXT: lui a0, 1 804; CHECK-NEXT: addi a0, a0, -241 805; CHECK-NEXT: vadd.vv v8, v16, v8 806; CHECK-NEXT: vsrl.vi v16, v8, 4 807; CHECK-NEXT: vadd.vv v8, v8, v16 808; CHECK-NEXT: vand.vx v8, v8, a0 809; CHECK-NEXT: li a0, 257 810; CHECK-NEXT: vmul.vx v8, v8, a0 811; CHECK-NEXT: vsrl.vi v8, v8, 8 812; CHECK-NEXT: ret 813; 814; CHECK-ZVBB-LABEL: vp_ctpop_nxv32i16_unmasked: 815; CHECK-ZVBB: # %bb.0: 816; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma 817; CHECK-ZVBB-NEXT: vcpop.v v8, v8 818; CHECK-ZVBB-NEXT: ret 819 %v = call <vscale x 32 x i16> @llvm.vp.ctpop.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl) 820 ret <vscale x 32 x i16> %v 821} 822 823declare <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i1>, i32) 824 825define <vscale x 1 x i32> @vp_ctpop_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 826; CHECK-LABEL: vp_ctpop_nxv1i32: 827; CHECK: # %bb.0: 828; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 829; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 830; CHECK-NEXT: lui a0, 349525 831; CHECK-NEXT: addi a0, a0, 1365 832; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 833; CHECK-NEXT: lui a0, 209715 834; CHECK-NEXT: addi a0, a0, 819 835; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 836; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 837; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 838; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 839; CHECK-NEXT: lui a0, 61681 840; CHECK-NEXT: addi a0, a0, -241 841; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 842; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 843; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 844; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 845; CHECK-NEXT: lui a0, 4112 846; CHECK-NEXT: addi a0, a0, 257 847; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 848; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 849; CHECK-NEXT: ret 850; 851; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32: 852; CHECK-ZVBB: # %bb.0: 853; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 854; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 855; CHECK-ZVBB-NEXT: ret 856 %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl) 857 ret <vscale x 1 x i32> %v 858} 859 860define <vscale x 1 x i32> @vp_ctpop_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) { 861; CHECK-LABEL: vp_ctpop_nxv1i32_unmasked: 862; CHECK: # %bb.0: 863; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 864; CHECK-NEXT: vsrl.vi v9, v8, 1 865; CHECK-NEXT: lui a0, 349525 866; CHECK-NEXT: addi a0, a0, 1365 867; CHECK-NEXT: vand.vx v9, v9, a0 868; CHECK-NEXT: lui a0, 209715 869; CHECK-NEXT: addi a0, a0, 819 870; CHECK-NEXT: vsub.vv v8, v8, v9 871; CHECK-NEXT: vand.vx v9, v8, a0 872; CHECK-NEXT: vsrl.vi v8, v8, 2 873; CHECK-NEXT: vand.vx v8, v8, a0 874; CHECK-NEXT: lui a0, 61681 875; CHECK-NEXT: addi a0, a0, -241 876; CHECK-NEXT: vadd.vv v8, v9, v8 877; CHECK-NEXT: vsrl.vi v9, v8, 4 878; CHECK-NEXT: vadd.vv v8, v8, v9 879; CHECK-NEXT: vand.vx v8, v8, a0 880; CHECK-NEXT: lui a0, 4112 881; CHECK-NEXT: addi a0, a0, 257 882; CHECK-NEXT: vmul.vx v8, v8, a0 883; CHECK-NEXT: vsrl.vi v8, v8, 24 884; CHECK-NEXT: ret 885; 886; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i32_unmasked: 887; CHECK-ZVBB: # %bb.0: 888; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 889; CHECK-ZVBB-NEXT: vcpop.v v8, v8 890; CHECK-ZVBB-NEXT: ret 891 %v = call <vscale x 1 x i32> @llvm.vp.ctpop.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 892 ret <vscale x 1 x i32> %v 893} 894 895declare <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32) 896 897define <vscale x 2 x i32> @vp_ctpop_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 898; CHECK-LABEL: vp_ctpop_nxv2i32: 899; CHECK: # %bb.0: 900; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 901; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 902; CHECK-NEXT: lui a0, 349525 903; CHECK-NEXT: addi a0, a0, 1365 904; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 905; CHECK-NEXT: lui a0, 209715 906; CHECK-NEXT: addi a0, a0, 819 907; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 908; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 909; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 910; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 911; CHECK-NEXT: lui a0, 61681 912; CHECK-NEXT: addi a0, a0, -241 913; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 914; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 915; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 916; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 917; CHECK-NEXT: lui a0, 4112 918; CHECK-NEXT: addi a0, a0, 257 919; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 920; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 921; CHECK-NEXT: ret 922; 923; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32: 924; CHECK-ZVBB: # %bb.0: 925; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 926; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 927; CHECK-ZVBB-NEXT: ret 928 %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl) 929 ret <vscale x 2 x i32> %v 930} 931 932define <vscale x 2 x i32> @vp_ctpop_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) { 933; CHECK-LABEL: vp_ctpop_nxv2i32_unmasked: 934; CHECK: # %bb.0: 935; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 936; CHECK-NEXT: vsrl.vi v9, v8, 1 937; CHECK-NEXT: lui a0, 349525 938; CHECK-NEXT: addi a0, a0, 1365 939; CHECK-NEXT: vand.vx v9, v9, a0 940; CHECK-NEXT: lui a0, 209715 941; CHECK-NEXT: addi a0, a0, 819 942; CHECK-NEXT: vsub.vv v8, v8, v9 943; CHECK-NEXT: vand.vx v9, v8, a0 944; CHECK-NEXT: vsrl.vi v8, v8, 2 945; CHECK-NEXT: vand.vx v8, v8, a0 946; CHECK-NEXT: lui a0, 61681 947; CHECK-NEXT: addi a0, a0, -241 948; CHECK-NEXT: vadd.vv v8, v9, v8 949; CHECK-NEXT: vsrl.vi v9, v8, 4 950; CHECK-NEXT: vadd.vv v8, v8, v9 951; CHECK-NEXT: vand.vx v8, v8, a0 952; CHECK-NEXT: lui a0, 4112 953; CHECK-NEXT: addi a0, a0, 257 954; CHECK-NEXT: vmul.vx v8, v8, a0 955; CHECK-NEXT: vsrl.vi v8, v8, 24 956; CHECK-NEXT: ret 957; 958; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i32_unmasked: 959; CHECK-ZVBB: # %bb.0: 960; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma 961; CHECK-ZVBB-NEXT: vcpop.v v8, v8 962; CHECK-ZVBB-NEXT: ret 963 %v = call <vscale x 2 x i32> @llvm.vp.ctpop.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 964 ret <vscale x 2 x i32> %v 965} 966 967declare <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) 968 969define <vscale x 4 x i32> @vp_ctpop_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 970; CHECK-LABEL: vp_ctpop_nxv4i32: 971; CHECK: # %bb.0: 972; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 973; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 974; CHECK-NEXT: lui a0, 349525 975; CHECK-NEXT: addi a0, a0, 1365 976; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 977; CHECK-NEXT: lui a0, 209715 978; CHECK-NEXT: addi a0, a0, 819 979; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 980; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 981; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 982; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 983; CHECK-NEXT: lui a0, 61681 984; CHECK-NEXT: addi a0, a0, -241 985; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 986; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 987; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 988; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 989; CHECK-NEXT: lui a0, 4112 990; CHECK-NEXT: addi a0, a0, 257 991; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 992; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 993; CHECK-NEXT: ret 994; 995; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32: 996; CHECK-ZVBB: # %bb.0: 997; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 998; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 999; CHECK-ZVBB-NEXT: ret 1000 %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl) 1001 ret <vscale x 4 x i32> %v 1002} 1003 1004define <vscale x 4 x i32> @vp_ctpop_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) { 1005; CHECK-LABEL: vp_ctpop_nxv4i32_unmasked: 1006; CHECK: # %bb.0: 1007; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1008; CHECK-NEXT: vsrl.vi v10, v8, 1 1009; CHECK-NEXT: lui a0, 349525 1010; CHECK-NEXT: addi a0, a0, 1365 1011; CHECK-NEXT: vand.vx v10, v10, a0 1012; CHECK-NEXT: lui a0, 209715 1013; CHECK-NEXT: addi a0, a0, 819 1014; CHECK-NEXT: vsub.vv v8, v8, v10 1015; CHECK-NEXT: vand.vx v10, v8, a0 1016; CHECK-NEXT: vsrl.vi v8, v8, 2 1017; CHECK-NEXT: vand.vx v8, v8, a0 1018; CHECK-NEXT: lui a0, 61681 1019; CHECK-NEXT: addi a0, a0, -241 1020; CHECK-NEXT: vadd.vv v8, v10, v8 1021; CHECK-NEXT: vsrl.vi v10, v8, 4 1022; CHECK-NEXT: vadd.vv v8, v8, v10 1023; CHECK-NEXT: vand.vx v8, v8, a0 1024; CHECK-NEXT: lui a0, 4112 1025; CHECK-NEXT: addi a0, a0, 257 1026; CHECK-NEXT: vmul.vx v8, v8, a0 1027; CHECK-NEXT: vsrl.vi v8, v8, 24 1028; CHECK-NEXT: ret 1029; 1030; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i32_unmasked: 1031; CHECK-ZVBB: # %bb.0: 1032; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma 1033; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1034; CHECK-ZVBB-NEXT: ret 1035 %v = call <vscale x 4 x i32> @llvm.vp.ctpop.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1036 ret <vscale x 4 x i32> %v 1037} 1038 1039declare <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i1>, i32) 1040 1041define <vscale x 8 x i32> @vp_ctpop_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1042; CHECK-LABEL: vp_ctpop_nxv8i32: 1043; CHECK: # %bb.0: 1044; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1045; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 1046; CHECK-NEXT: lui a0, 349525 1047; CHECK-NEXT: addi a0, a0, 1365 1048; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 1049; CHECK-NEXT: lui a0, 209715 1050; CHECK-NEXT: addi a0, a0, 819 1051; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 1052; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 1053; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 1054; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1055; CHECK-NEXT: lui a0, 61681 1056; CHECK-NEXT: addi a0, a0, -241 1057; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 1058; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 1059; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 1060; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1061; CHECK-NEXT: lui a0, 4112 1062; CHECK-NEXT: addi a0, a0, 257 1063; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 1064; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 1065; CHECK-NEXT: ret 1066; 1067; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32: 1068; CHECK-ZVBB: # %bb.0: 1069; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1070; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1071; CHECK-ZVBB-NEXT: ret 1072 %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl) 1073 ret <vscale x 8 x i32> %v 1074} 1075 1076define <vscale x 8 x i32> @vp_ctpop_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) { 1077; CHECK-LABEL: vp_ctpop_nxv8i32_unmasked: 1078; CHECK: # %bb.0: 1079; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1080; CHECK-NEXT: vsrl.vi v12, v8, 1 1081; CHECK-NEXT: lui a0, 349525 1082; CHECK-NEXT: addi a0, a0, 1365 1083; CHECK-NEXT: vand.vx v12, v12, a0 1084; CHECK-NEXT: lui a0, 209715 1085; CHECK-NEXT: addi a0, a0, 819 1086; CHECK-NEXT: vsub.vv v8, v8, v12 1087; CHECK-NEXT: vand.vx v12, v8, a0 1088; CHECK-NEXT: vsrl.vi v8, v8, 2 1089; CHECK-NEXT: vand.vx v8, v8, a0 1090; CHECK-NEXT: lui a0, 61681 1091; CHECK-NEXT: addi a0, a0, -241 1092; CHECK-NEXT: vadd.vv v8, v12, v8 1093; CHECK-NEXT: vsrl.vi v12, v8, 4 1094; CHECK-NEXT: vadd.vv v8, v8, v12 1095; CHECK-NEXT: vand.vx v8, v8, a0 1096; CHECK-NEXT: lui a0, 4112 1097; CHECK-NEXT: addi a0, a0, 257 1098; CHECK-NEXT: vmul.vx v8, v8, a0 1099; CHECK-NEXT: vsrl.vi v8, v8, 24 1100; CHECK-NEXT: ret 1101; 1102; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i32_unmasked: 1103; CHECK-ZVBB: # %bb.0: 1104; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma 1105; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1106; CHECK-ZVBB-NEXT: ret 1107 %v = call <vscale x 8 x i32> @llvm.vp.ctpop.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 1108 ret <vscale x 8 x i32> %v 1109} 1110 1111declare <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i1>, i32) 1112 1113define <vscale x 16 x i32> @vp_ctpop_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 1114; CHECK-LABEL: vp_ctpop_nxv16i32: 1115; CHECK: # %bb.0: 1116; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1117; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t 1118; CHECK-NEXT: lui a0, 349525 1119; CHECK-NEXT: addi a0, a0, 1365 1120; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1121; CHECK-NEXT: lui a0, 209715 1122; CHECK-NEXT: addi a0, a0, 819 1123; CHECK-NEXT: vsub.vv v16, v8, v16, v0.t 1124; CHECK-NEXT: vand.vx v8, v16, a0, v0.t 1125; CHECK-NEXT: vsrl.vi v16, v16, 2, v0.t 1126; CHECK-NEXT: vand.vx v16, v16, a0, v0.t 1127; CHECK-NEXT: lui a0, 61681 1128; CHECK-NEXT: addi a0, a0, -241 1129; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 1130; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t 1131; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t 1132; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 1133; CHECK-NEXT: lui a0, 4112 1134; CHECK-NEXT: addi a0, a0, 257 1135; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 1136; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 1137; CHECK-NEXT: ret 1138; 1139; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32: 1140; CHECK-ZVBB: # %bb.0: 1141; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1142; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1143; CHECK-ZVBB-NEXT: ret 1144 %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl) 1145 ret <vscale x 16 x i32> %v 1146} 1147 1148define <vscale x 16 x i32> @vp_ctpop_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) { 1149; CHECK-LABEL: vp_ctpop_nxv16i32_unmasked: 1150; CHECK: # %bb.0: 1151; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1152; CHECK-NEXT: vsrl.vi v16, v8, 1 1153; CHECK-NEXT: lui a0, 349525 1154; CHECK-NEXT: addi a0, a0, 1365 1155; CHECK-NEXT: vand.vx v16, v16, a0 1156; CHECK-NEXT: lui a0, 209715 1157; CHECK-NEXT: addi a0, a0, 819 1158; CHECK-NEXT: vsub.vv v8, v8, v16 1159; CHECK-NEXT: vand.vx v16, v8, a0 1160; CHECK-NEXT: vsrl.vi v8, v8, 2 1161; CHECK-NEXT: vand.vx v8, v8, a0 1162; CHECK-NEXT: lui a0, 61681 1163; CHECK-NEXT: addi a0, a0, -241 1164; CHECK-NEXT: vadd.vv v8, v16, v8 1165; CHECK-NEXT: vsrl.vi v16, v8, 4 1166; CHECK-NEXT: vadd.vv v8, v8, v16 1167; CHECK-NEXT: vand.vx v8, v8, a0 1168; CHECK-NEXT: lui a0, 4112 1169; CHECK-NEXT: addi a0, a0, 257 1170; CHECK-NEXT: vmul.vx v8, v8, a0 1171; CHECK-NEXT: vsrl.vi v8, v8, 24 1172; CHECK-NEXT: ret 1173; 1174; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i32_unmasked: 1175; CHECK-ZVBB: # %bb.0: 1176; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma 1177; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1178; CHECK-ZVBB-NEXT: ret 1179 %v = call <vscale x 16 x i32> @llvm.vp.ctpop.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 1180 ret <vscale x 16 x i32> %v 1181} 1182 1183declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i1>, i32) 1184 1185define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 1186; RV32-LABEL: vp_ctpop_nxv1i64: 1187; RV32: # %bb.0: 1188; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1189; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 1190; RV32-NEXT: lui a1, 349525 1191; RV32-NEXT: addi a1, a1, 1365 1192; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1193; RV32-NEXT: vmv.v.x v10, a1 1194; RV32-NEXT: lui a1, 209715 1195; RV32-NEXT: addi a1, a1, 819 1196; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1197; RV32-NEXT: vand.vv v9, v9, v10, v0.t 1198; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1199; RV32-NEXT: vmv.v.x v10, a1 1200; RV32-NEXT: lui a1, 61681 1201; RV32-NEXT: addi a1, a1, -241 1202; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1203; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 1204; RV32-NEXT: vand.vv v9, v8, v10, v0.t 1205; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1206; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1207; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1208; RV32-NEXT: vmv.v.x v10, a1 1209; RV32-NEXT: lui a1, 4112 1210; RV32-NEXT: addi a1, a1, 257 1211; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1212; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 1213; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 1214; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 1215; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1216; RV32-NEXT: vmv.v.x v9, a1 1217; RV32-NEXT: li a1, 56 1218; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1219; RV32-NEXT: vand.vv v8, v8, v10, v0.t 1220; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 1221; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1222; RV32-NEXT: ret 1223; 1224; RV64-LABEL: vp_ctpop_nxv1i64: 1225; RV64: # %bb.0: 1226; RV64-NEXT: lui a1, 349525 1227; RV64-NEXT: lui a2, 209715 1228; RV64-NEXT: lui a3, 61681 1229; RV64-NEXT: lui a4, 4112 1230; RV64-NEXT: addiw a1, a1, 1365 1231; RV64-NEXT: addiw a2, a2, 819 1232; RV64-NEXT: addiw a3, a3, -241 1233; RV64-NEXT: addiw a4, a4, 257 1234; RV64-NEXT: slli a5, a1, 32 1235; RV64-NEXT: add a1, a1, a5 1236; RV64-NEXT: slli a5, a2, 32 1237; RV64-NEXT: add a2, a2, a5 1238; RV64-NEXT: slli a5, a3, 32 1239; RV64-NEXT: add a3, a3, a5 1240; RV64-NEXT: slli a5, a4, 32 1241; RV64-NEXT: add a4, a4, a5 1242; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1243; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 1244; RV64-NEXT: vand.vx v9, v9, a1, v0.t 1245; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 1246; RV64-NEXT: vand.vx v9, v8, a2, v0.t 1247; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1248; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1249; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 1250; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 1251; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 1252; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1253; RV64-NEXT: li a0, 56 1254; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1255; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1256; RV64-NEXT: ret 1257; 1258; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64: 1259; CHECK-ZVBB: # %bb.0: 1260; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1261; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1262; CHECK-ZVBB-NEXT: ret 1263 %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl) 1264 ret <vscale x 1 x i64> %v 1265} 1266 1267define <vscale x 1 x i64> @vp_ctpop_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) { 1268; RV32-LABEL: vp_ctpop_nxv1i64_unmasked: 1269; RV32: # %bb.0: 1270; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1271; RV32-NEXT: vsrl.vi v9, v8, 1 1272; RV32-NEXT: lui a1, 349525 1273; RV32-NEXT: addi a1, a1, 1365 1274; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1275; RV32-NEXT: vmv.v.x v10, a1 1276; RV32-NEXT: lui a1, 209715 1277; RV32-NEXT: addi a1, a1, 819 1278; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1279; RV32-NEXT: vand.vv v9, v9, v10 1280; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1281; RV32-NEXT: vmv.v.x v10, a1 1282; RV32-NEXT: lui a1, 61681 1283; RV32-NEXT: addi a1, a1, -241 1284; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1285; RV32-NEXT: vsub.vv v8, v8, v9 1286; RV32-NEXT: vand.vv v9, v8, v10 1287; RV32-NEXT: vsrl.vi v8, v8, 2 1288; RV32-NEXT: vand.vv v8, v8, v10 1289; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1290; RV32-NEXT: vmv.v.x v10, a1 1291; RV32-NEXT: lui a1, 4112 1292; RV32-NEXT: addi a1, a1, 257 1293; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1294; RV32-NEXT: vadd.vv v8, v9, v8 1295; RV32-NEXT: vsrl.vi v9, v8, 4 1296; RV32-NEXT: vadd.vv v8, v8, v9 1297; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma 1298; RV32-NEXT: vmv.v.x v9, a1 1299; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1300; RV32-NEXT: vand.vv v8, v8, v10 1301; RV32-NEXT: vmul.vv v8, v8, v9 1302; RV32-NEXT: li a0, 56 1303; RV32-NEXT: vsrl.vx v8, v8, a0 1304; RV32-NEXT: ret 1305; 1306; RV64-LABEL: vp_ctpop_nxv1i64_unmasked: 1307; RV64: # %bb.0: 1308; RV64-NEXT: lui a1, 349525 1309; RV64-NEXT: lui a2, 209715 1310; RV64-NEXT: lui a3, 61681 1311; RV64-NEXT: lui a4, 4112 1312; RV64-NEXT: addiw a1, a1, 1365 1313; RV64-NEXT: addiw a2, a2, 819 1314; RV64-NEXT: addiw a3, a3, -241 1315; RV64-NEXT: addiw a4, a4, 257 1316; RV64-NEXT: slli a5, a1, 32 1317; RV64-NEXT: add a1, a1, a5 1318; RV64-NEXT: slli a5, a2, 32 1319; RV64-NEXT: add a2, a2, a5 1320; RV64-NEXT: slli a5, a3, 32 1321; RV64-NEXT: add a3, a3, a5 1322; RV64-NEXT: slli a5, a4, 32 1323; RV64-NEXT: add a4, a4, a5 1324; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1325; RV64-NEXT: vsrl.vi v9, v8, 1 1326; RV64-NEXT: vand.vx v9, v9, a1 1327; RV64-NEXT: vsub.vv v8, v8, v9 1328; RV64-NEXT: vand.vx v9, v8, a2 1329; RV64-NEXT: vsrl.vi v8, v8, 2 1330; RV64-NEXT: vand.vx v8, v8, a2 1331; RV64-NEXT: vadd.vv v8, v9, v8 1332; RV64-NEXT: vsrl.vi v9, v8, 4 1333; RV64-NEXT: vadd.vv v8, v8, v9 1334; RV64-NEXT: vand.vx v8, v8, a3 1335; RV64-NEXT: vmul.vx v8, v8, a4 1336; RV64-NEXT: li a0, 56 1337; RV64-NEXT: vsrl.vx v8, v8, a0 1338; RV64-NEXT: ret 1339; 1340; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i64_unmasked: 1341; CHECK-ZVBB: # %bb.0: 1342; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma 1343; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1344; CHECK-ZVBB-NEXT: ret 1345 %v = call <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl) 1346 ret <vscale x 1 x i64> %v 1347} 1348 1349declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) 1350 1351define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) { 1352; RV32-LABEL: vp_ctpop_nxv2i64: 1353; RV32: # %bb.0: 1354; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1355; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 1356; RV32-NEXT: lui a1, 349525 1357; RV32-NEXT: addi a1, a1, 1365 1358; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1359; RV32-NEXT: vmv.v.x v12, a1 1360; RV32-NEXT: lui a1, 209715 1361; RV32-NEXT: addi a1, a1, 819 1362; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1363; RV32-NEXT: vand.vv v10, v10, v12, v0.t 1364; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1365; RV32-NEXT: vmv.v.x v12, a1 1366; RV32-NEXT: lui a1, 61681 1367; RV32-NEXT: addi a1, a1, -241 1368; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1369; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 1370; RV32-NEXT: vand.vv v10, v8, v12, v0.t 1371; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1372; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1373; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1374; RV32-NEXT: vmv.v.x v12, a1 1375; RV32-NEXT: lui a1, 4112 1376; RV32-NEXT: addi a1, a1, 257 1377; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1378; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 1379; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 1380; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 1381; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1382; RV32-NEXT: vmv.v.x v10, a1 1383; RV32-NEXT: li a1, 56 1384; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1385; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1386; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 1387; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1388; RV32-NEXT: ret 1389; 1390; RV64-LABEL: vp_ctpop_nxv2i64: 1391; RV64: # %bb.0: 1392; RV64-NEXT: lui a1, 349525 1393; RV64-NEXT: lui a2, 209715 1394; RV64-NEXT: lui a3, 61681 1395; RV64-NEXT: lui a4, 4112 1396; RV64-NEXT: addiw a1, a1, 1365 1397; RV64-NEXT: addiw a2, a2, 819 1398; RV64-NEXT: addiw a3, a3, -241 1399; RV64-NEXT: addiw a4, a4, 257 1400; RV64-NEXT: slli a5, a1, 32 1401; RV64-NEXT: add a1, a1, a5 1402; RV64-NEXT: slli a5, a2, 32 1403; RV64-NEXT: add a2, a2, a5 1404; RV64-NEXT: slli a5, a3, 32 1405; RV64-NEXT: add a3, a3, a5 1406; RV64-NEXT: slli a5, a4, 32 1407; RV64-NEXT: add a4, a4, a5 1408; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1409; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 1410; RV64-NEXT: vand.vx v10, v10, a1, v0.t 1411; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 1412; RV64-NEXT: vand.vx v10, v8, a2, v0.t 1413; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1414; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1415; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 1416; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 1417; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 1418; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1419; RV64-NEXT: li a0, 56 1420; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1421; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1422; RV64-NEXT: ret 1423; 1424; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64: 1425; CHECK-ZVBB: # %bb.0: 1426; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1427; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1428; CHECK-ZVBB-NEXT: ret 1429 %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl) 1430 ret <vscale x 2 x i64> %v 1431} 1432 1433define <vscale x 2 x i64> @vp_ctpop_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) { 1434; RV32-LABEL: vp_ctpop_nxv2i64_unmasked: 1435; RV32: # %bb.0: 1436; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1437; RV32-NEXT: vsrl.vi v10, v8, 1 1438; RV32-NEXT: lui a1, 349525 1439; RV32-NEXT: addi a1, a1, 1365 1440; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1441; RV32-NEXT: vmv.v.x v12, a1 1442; RV32-NEXT: lui a1, 209715 1443; RV32-NEXT: addi a1, a1, 819 1444; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1445; RV32-NEXT: vand.vv v10, v10, v12 1446; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1447; RV32-NEXT: vmv.v.x v12, a1 1448; RV32-NEXT: lui a1, 61681 1449; RV32-NEXT: addi a1, a1, -241 1450; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1451; RV32-NEXT: vsub.vv v8, v8, v10 1452; RV32-NEXT: vand.vv v10, v8, v12 1453; RV32-NEXT: vsrl.vi v8, v8, 2 1454; RV32-NEXT: vand.vv v8, v8, v12 1455; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1456; RV32-NEXT: vmv.v.x v12, a1 1457; RV32-NEXT: lui a1, 4112 1458; RV32-NEXT: addi a1, a1, 257 1459; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1460; RV32-NEXT: vadd.vv v8, v10, v8 1461; RV32-NEXT: vsrl.vi v10, v8, 4 1462; RV32-NEXT: vadd.vv v8, v8, v10 1463; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma 1464; RV32-NEXT: vmv.v.x v10, a1 1465; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1466; RV32-NEXT: vand.vv v8, v8, v12 1467; RV32-NEXT: vmul.vv v8, v8, v10 1468; RV32-NEXT: li a0, 56 1469; RV32-NEXT: vsrl.vx v8, v8, a0 1470; RV32-NEXT: ret 1471; 1472; RV64-LABEL: vp_ctpop_nxv2i64_unmasked: 1473; RV64: # %bb.0: 1474; RV64-NEXT: lui a1, 349525 1475; RV64-NEXT: lui a2, 209715 1476; RV64-NEXT: lui a3, 61681 1477; RV64-NEXT: lui a4, 4112 1478; RV64-NEXT: addiw a1, a1, 1365 1479; RV64-NEXT: addiw a2, a2, 819 1480; RV64-NEXT: addiw a3, a3, -241 1481; RV64-NEXT: addiw a4, a4, 257 1482; RV64-NEXT: slli a5, a1, 32 1483; RV64-NEXT: add a1, a1, a5 1484; RV64-NEXT: slli a5, a2, 32 1485; RV64-NEXT: add a2, a2, a5 1486; RV64-NEXT: slli a5, a3, 32 1487; RV64-NEXT: add a3, a3, a5 1488; RV64-NEXT: slli a5, a4, 32 1489; RV64-NEXT: add a4, a4, a5 1490; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1491; RV64-NEXT: vsrl.vi v10, v8, 1 1492; RV64-NEXT: vand.vx v10, v10, a1 1493; RV64-NEXT: vsub.vv v8, v8, v10 1494; RV64-NEXT: vand.vx v10, v8, a2 1495; RV64-NEXT: vsrl.vi v8, v8, 2 1496; RV64-NEXT: vand.vx v8, v8, a2 1497; RV64-NEXT: vadd.vv v8, v10, v8 1498; RV64-NEXT: vsrl.vi v10, v8, 4 1499; RV64-NEXT: vadd.vv v8, v8, v10 1500; RV64-NEXT: vand.vx v8, v8, a3 1501; RV64-NEXT: vmul.vx v8, v8, a4 1502; RV64-NEXT: li a0, 56 1503; RV64-NEXT: vsrl.vx v8, v8, a0 1504; RV64-NEXT: ret 1505; 1506; CHECK-ZVBB-LABEL: vp_ctpop_nxv2i64_unmasked: 1507; CHECK-ZVBB: # %bb.0: 1508; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma 1509; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1510; CHECK-ZVBB-NEXT: ret 1511 %v = call <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl) 1512 ret <vscale x 2 x i64> %v 1513} 1514 1515declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i1>, i32) 1516 1517define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) { 1518; RV32-LABEL: vp_ctpop_nxv4i64: 1519; RV32: # %bb.0: 1520; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1521; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t 1522; RV32-NEXT: lui a1, 349525 1523; RV32-NEXT: addi a1, a1, 1365 1524; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1525; RV32-NEXT: vmv.v.x v16, a1 1526; RV32-NEXT: lui a1, 209715 1527; RV32-NEXT: addi a1, a1, 819 1528; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1529; RV32-NEXT: vand.vv v16, v12, v16, v0.t 1530; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1531; RV32-NEXT: vmv.v.x v12, a1 1532; RV32-NEXT: lui a1, 61681 1533; RV32-NEXT: addi a1, a1, -241 1534; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1535; RV32-NEXT: vsub.vv v8, v8, v16, v0.t 1536; RV32-NEXT: vand.vv v16, v8, v12, v0.t 1537; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1538; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1539; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1540; RV32-NEXT: vmv.v.x v12, a1 1541; RV32-NEXT: lui a1, 4112 1542; RV32-NEXT: addi a1, a1, 257 1543; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1544; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 1545; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1546; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1547; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1548; RV32-NEXT: vmv.v.x v16, a1 1549; RV32-NEXT: li a1, 56 1550; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1551; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1552; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1553; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1554; RV32-NEXT: ret 1555; 1556; RV64-LABEL: vp_ctpop_nxv4i64: 1557; RV64: # %bb.0: 1558; RV64-NEXT: lui a1, 349525 1559; RV64-NEXT: lui a2, 209715 1560; RV64-NEXT: lui a3, 61681 1561; RV64-NEXT: lui a4, 4112 1562; RV64-NEXT: addiw a1, a1, 1365 1563; RV64-NEXT: addiw a2, a2, 819 1564; RV64-NEXT: addiw a3, a3, -241 1565; RV64-NEXT: addiw a4, a4, 257 1566; RV64-NEXT: slli a5, a1, 32 1567; RV64-NEXT: add a1, a1, a5 1568; RV64-NEXT: slli a5, a2, 32 1569; RV64-NEXT: add a2, a2, a5 1570; RV64-NEXT: slli a5, a3, 32 1571; RV64-NEXT: add a3, a3, a5 1572; RV64-NEXT: slli a5, a4, 32 1573; RV64-NEXT: add a4, a4, a5 1574; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1575; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1576; RV64-NEXT: vand.vx v12, v12, a1, v0.t 1577; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 1578; RV64-NEXT: vand.vx v12, v8, a2, v0.t 1579; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1580; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1581; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 1582; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1583; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 1584; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1585; RV64-NEXT: li a0, 56 1586; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1587; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1588; RV64-NEXT: ret 1589; 1590; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64: 1591; CHECK-ZVBB: # %bb.0: 1592; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1593; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1594; CHECK-ZVBB-NEXT: ret 1595 %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl) 1596 ret <vscale x 4 x i64> %v 1597} 1598 1599define <vscale x 4 x i64> @vp_ctpop_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) { 1600; RV32-LABEL: vp_ctpop_nxv4i64_unmasked: 1601; RV32: # %bb.0: 1602; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1603; RV32-NEXT: vsrl.vi v12, v8, 1 1604; RV32-NEXT: lui a1, 349525 1605; RV32-NEXT: addi a1, a1, 1365 1606; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1607; RV32-NEXT: vmv.v.x v16, a1 1608; RV32-NEXT: lui a1, 209715 1609; RV32-NEXT: addi a1, a1, 819 1610; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1611; RV32-NEXT: vand.vv v12, v12, v16 1612; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1613; RV32-NEXT: vmv.v.x v16, a1 1614; RV32-NEXT: lui a1, 61681 1615; RV32-NEXT: addi a1, a1, -241 1616; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1617; RV32-NEXT: vsub.vv v8, v8, v12 1618; RV32-NEXT: vand.vv v12, v8, v16 1619; RV32-NEXT: vsrl.vi v8, v8, 2 1620; RV32-NEXT: vand.vv v8, v8, v16 1621; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1622; RV32-NEXT: vmv.v.x v16, a1 1623; RV32-NEXT: lui a1, 4112 1624; RV32-NEXT: addi a1, a1, 257 1625; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1626; RV32-NEXT: vadd.vv v8, v12, v8 1627; RV32-NEXT: vsrl.vi v12, v8, 4 1628; RV32-NEXT: vadd.vv v8, v8, v12 1629; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma 1630; RV32-NEXT: vmv.v.x v12, a1 1631; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1632; RV32-NEXT: vand.vv v8, v8, v16 1633; RV32-NEXT: vmul.vv v8, v8, v12 1634; RV32-NEXT: li a0, 56 1635; RV32-NEXT: vsrl.vx v8, v8, a0 1636; RV32-NEXT: ret 1637; 1638; RV64-LABEL: vp_ctpop_nxv4i64_unmasked: 1639; RV64: # %bb.0: 1640; RV64-NEXT: lui a1, 349525 1641; RV64-NEXT: lui a2, 209715 1642; RV64-NEXT: lui a3, 61681 1643; RV64-NEXT: lui a4, 4112 1644; RV64-NEXT: addiw a1, a1, 1365 1645; RV64-NEXT: addiw a2, a2, 819 1646; RV64-NEXT: addiw a3, a3, -241 1647; RV64-NEXT: addiw a4, a4, 257 1648; RV64-NEXT: slli a5, a1, 32 1649; RV64-NEXT: add a1, a1, a5 1650; RV64-NEXT: slli a5, a2, 32 1651; RV64-NEXT: add a2, a2, a5 1652; RV64-NEXT: slli a5, a3, 32 1653; RV64-NEXT: add a3, a3, a5 1654; RV64-NEXT: slli a5, a4, 32 1655; RV64-NEXT: add a4, a4, a5 1656; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1657; RV64-NEXT: vsrl.vi v12, v8, 1 1658; RV64-NEXT: vand.vx v12, v12, a1 1659; RV64-NEXT: vsub.vv v8, v8, v12 1660; RV64-NEXT: vand.vx v12, v8, a2 1661; RV64-NEXT: vsrl.vi v8, v8, 2 1662; RV64-NEXT: vand.vx v8, v8, a2 1663; RV64-NEXT: vadd.vv v8, v12, v8 1664; RV64-NEXT: vsrl.vi v12, v8, 4 1665; RV64-NEXT: vadd.vv v8, v8, v12 1666; RV64-NEXT: vand.vx v8, v8, a3 1667; RV64-NEXT: vmul.vx v8, v8, a4 1668; RV64-NEXT: li a0, 56 1669; RV64-NEXT: vsrl.vx v8, v8, a0 1670; RV64-NEXT: ret 1671; 1672; CHECK-ZVBB-LABEL: vp_ctpop_nxv4i64_unmasked: 1673; CHECK-ZVBB: # %bb.0: 1674; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1675; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1676; CHECK-ZVBB-NEXT: ret 1677 %v = call <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl) 1678 ret <vscale x 4 x i64> %v 1679} 1680 1681declare <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64>, <vscale x 7 x i1>, i32) 1682 1683define <vscale x 7 x i64> @vp_ctpop_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) { 1684; RV32-LABEL: vp_ctpop_nxv7i64: 1685; RV32: # %bb.0: 1686; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1687; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 1688; RV32-NEXT: lui a1, 349525 1689; RV32-NEXT: addi a1, a1, 1365 1690; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1691; RV32-NEXT: vmv.v.x v24, a1 1692; RV32-NEXT: lui a1, 209715 1693; RV32-NEXT: addi a1, a1, 819 1694; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1695; RV32-NEXT: vand.vv v24, v16, v24, v0.t 1696; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1697; RV32-NEXT: vmv.v.x v16, a1 1698; RV32-NEXT: lui a1, 61681 1699; RV32-NEXT: addi a1, a1, -241 1700; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1701; RV32-NEXT: vsub.vv v8, v8, v24, v0.t 1702; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1703; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1704; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1705; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1706; RV32-NEXT: vmv.v.x v8, a1 1707; RV32-NEXT: lui a1, 4112 1708; RV32-NEXT: addi a1, a1, 257 1709; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1710; RV32-NEXT: vadd.vv v16, v24, v16, v0.t 1711; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t 1712; RV32-NEXT: vadd.vv v16, v16, v24, v0.t 1713; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1714; RV32-NEXT: vmv.v.x v24, a1 1715; RV32-NEXT: li a1, 56 1716; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1717; RV32-NEXT: vand.vv v8, v16, v8, v0.t 1718; RV32-NEXT: vmul.vv v8, v8, v24, v0.t 1719; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1720; RV32-NEXT: ret 1721; 1722; RV64-LABEL: vp_ctpop_nxv7i64: 1723; RV64: # %bb.0: 1724; RV64-NEXT: lui a1, 349525 1725; RV64-NEXT: lui a2, 209715 1726; RV64-NEXT: lui a3, 61681 1727; RV64-NEXT: lui a4, 4112 1728; RV64-NEXT: addiw a1, a1, 1365 1729; RV64-NEXT: addiw a2, a2, 819 1730; RV64-NEXT: addiw a3, a3, -241 1731; RV64-NEXT: addiw a4, a4, 257 1732; RV64-NEXT: slli a5, a1, 32 1733; RV64-NEXT: add a1, a1, a5 1734; RV64-NEXT: slli a5, a2, 32 1735; RV64-NEXT: add a2, a2, a5 1736; RV64-NEXT: slli a5, a3, 32 1737; RV64-NEXT: add a3, a3, a5 1738; RV64-NEXT: slli a5, a4, 32 1739; RV64-NEXT: add a4, a4, a5 1740; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1741; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1742; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1743; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1744; RV64-NEXT: vand.vx v16, v8, a2, v0.t 1745; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1746; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1747; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1748; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1749; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1750; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1751; RV64-NEXT: li a0, 56 1752; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1753; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1754; RV64-NEXT: ret 1755; 1756; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64: 1757; CHECK-ZVBB: # %bb.0: 1758; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1759; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1760; CHECK-ZVBB-NEXT: ret 1761 %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 %evl) 1762 ret <vscale x 7 x i64> %v 1763} 1764 1765define <vscale x 7 x i64> @vp_ctpop_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) { 1766; RV32-LABEL: vp_ctpop_nxv7i64_unmasked: 1767; RV32: # %bb.0: 1768; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1769; RV32-NEXT: vsrl.vi v16, v8, 1 1770; RV32-NEXT: lui a1, 349525 1771; RV32-NEXT: addi a1, a1, 1365 1772; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1773; RV32-NEXT: vmv.v.x v24, a1 1774; RV32-NEXT: lui a1, 209715 1775; RV32-NEXT: addi a1, a1, 819 1776; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1777; RV32-NEXT: vand.vv v24, v16, v24 1778; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1779; RV32-NEXT: vmv.v.x v16, a1 1780; RV32-NEXT: lui a1, 61681 1781; RV32-NEXT: addi a1, a1, -241 1782; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1783; RV32-NEXT: vsub.vv v8, v8, v24 1784; RV32-NEXT: vand.vv v24, v8, v16 1785; RV32-NEXT: vsrl.vi v8, v8, 2 1786; RV32-NEXT: vand.vv v8, v8, v16 1787; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1788; RV32-NEXT: vmv.v.x v16, a1 1789; RV32-NEXT: lui a1, 4112 1790; RV32-NEXT: addi a1, a1, 257 1791; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1792; RV32-NEXT: vadd.vv v8, v24, v8 1793; RV32-NEXT: vsrl.vi v24, v8, 4 1794; RV32-NEXT: vadd.vv v8, v8, v24 1795; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1796; RV32-NEXT: vmv.v.x v24, a1 1797; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1798; RV32-NEXT: vand.vv v8, v8, v16 1799; RV32-NEXT: vmul.vv v8, v8, v24 1800; RV32-NEXT: li a0, 56 1801; RV32-NEXT: vsrl.vx v8, v8, a0 1802; RV32-NEXT: ret 1803; 1804; RV64-LABEL: vp_ctpop_nxv7i64_unmasked: 1805; RV64: # %bb.0: 1806; RV64-NEXT: lui a1, 349525 1807; RV64-NEXT: lui a2, 209715 1808; RV64-NEXT: lui a3, 61681 1809; RV64-NEXT: lui a4, 4112 1810; RV64-NEXT: addiw a1, a1, 1365 1811; RV64-NEXT: addiw a2, a2, 819 1812; RV64-NEXT: addiw a3, a3, -241 1813; RV64-NEXT: addiw a4, a4, 257 1814; RV64-NEXT: slli a5, a1, 32 1815; RV64-NEXT: add a1, a1, a5 1816; RV64-NEXT: slli a5, a2, 32 1817; RV64-NEXT: add a2, a2, a5 1818; RV64-NEXT: slli a5, a3, 32 1819; RV64-NEXT: add a3, a3, a5 1820; RV64-NEXT: slli a5, a4, 32 1821; RV64-NEXT: add a4, a4, a5 1822; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1823; RV64-NEXT: vsrl.vi v16, v8, 1 1824; RV64-NEXT: vand.vx v16, v16, a1 1825; RV64-NEXT: vsub.vv v8, v8, v16 1826; RV64-NEXT: vand.vx v16, v8, a2 1827; RV64-NEXT: vsrl.vi v8, v8, 2 1828; RV64-NEXT: vand.vx v8, v8, a2 1829; RV64-NEXT: vadd.vv v8, v16, v8 1830; RV64-NEXT: vsrl.vi v16, v8, 4 1831; RV64-NEXT: vadd.vv v8, v8, v16 1832; RV64-NEXT: vand.vx v8, v8, a3 1833; RV64-NEXT: vmul.vx v8, v8, a4 1834; RV64-NEXT: li a0, 56 1835; RV64-NEXT: vsrl.vx v8, v8, a0 1836; RV64-NEXT: ret 1837; 1838; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64_unmasked: 1839; CHECK-ZVBB: # %bb.0: 1840; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1841; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1842; CHECK-ZVBB-NEXT: ret 1843 %v = call <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> splat (i1 true), i32 %evl) 1844 ret <vscale x 7 x i64> %v 1845} 1846 1847declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i1>, i32) 1848 1849define <vscale x 8 x i64> @vp_ctpop_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) { 1850; RV32-LABEL: vp_ctpop_nxv8i64: 1851; RV32: # %bb.0: 1852; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1853; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t 1854; RV32-NEXT: lui a1, 349525 1855; RV32-NEXT: addi a1, a1, 1365 1856; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1857; RV32-NEXT: vmv.v.x v24, a1 1858; RV32-NEXT: lui a1, 209715 1859; RV32-NEXT: addi a1, a1, 819 1860; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1861; RV32-NEXT: vand.vv v24, v16, v24, v0.t 1862; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1863; RV32-NEXT: vmv.v.x v16, a1 1864; RV32-NEXT: lui a1, 61681 1865; RV32-NEXT: addi a1, a1, -241 1866; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1867; RV32-NEXT: vsub.vv v8, v8, v24, v0.t 1868; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1869; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1870; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1871; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1872; RV32-NEXT: vmv.v.x v8, a1 1873; RV32-NEXT: lui a1, 4112 1874; RV32-NEXT: addi a1, a1, 257 1875; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1876; RV32-NEXT: vadd.vv v16, v24, v16, v0.t 1877; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t 1878; RV32-NEXT: vadd.vv v16, v16, v24, v0.t 1879; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1880; RV32-NEXT: vmv.v.x v24, a1 1881; RV32-NEXT: li a1, 56 1882; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1883; RV32-NEXT: vand.vv v8, v16, v8, v0.t 1884; RV32-NEXT: vmul.vv v8, v8, v24, v0.t 1885; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1886; RV32-NEXT: ret 1887; 1888; RV64-LABEL: vp_ctpop_nxv8i64: 1889; RV64: # %bb.0: 1890; RV64-NEXT: lui a1, 349525 1891; RV64-NEXT: lui a2, 209715 1892; RV64-NEXT: lui a3, 61681 1893; RV64-NEXT: lui a4, 4112 1894; RV64-NEXT: addiw a1, a1, 1365 1895; RV64-NEXT: addiw a2, a2, 819 1896; RV64-NEXT: addiw a3, a3, -241 1897; RV64-NEXT: addiw a4, a4, 257 1898; RV64-NEXT: slli a5, a1, 32 1899; RV64-NEXT: add a1, a1, a5 1900; RV64-NEXT: slli a5, a2, 32 1901; RV64-NEXT: add a2, a2, a5 1902; RV64-NEXT: slli a5, a3, 32 1903; RV64-NEXT: add a3, a3, a5 1904; RV64-NEXT: slli a5, a4, 32 1905; RV64-NEXT: add a4, a4, a5 1906; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1907; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1908; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1909; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1910; RV64-NEXT: vand.vx v16, v8, a2, v0.t 1911; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1912; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1913; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1914; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1915; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1916; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1917; RV64-NEXT: li a0, 56 1918; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1919; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1920; RV64-NEXT: ret 1921; 1922; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64: 1923; CHECK-ZVBB: # %bb.0: 1924; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1925; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 1926; CHECK-ZVBB-NEXT: ret 1927 %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl) 1928 ret <vscale x 8 x i64> %v 1929} 1930 1931define <vscale x 8 x i64> @vp_ctpop_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) { 1932; RV32-LABEL: vp_ctpop_nxv8i64_unmasked: 1933; RV32: # %bb.0: 1934; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1935; RV32-NEXT: vsrl.vi v16, v8, 1 1936; RV32-NEXT: lui a1, 349525 1937; RV32-NEXT: addi a1, a1, 1365 1938; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1939; RV32-NEXT: vmv.v.x v24, a1 1940; RV32-NEXT: lui a1, 209715 1941; RV32-NEXT: addi a1, a1, 819 1942; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1943; RV32-NEXT: vand.vv v24, v16, v24 1944; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1945; RV32-NEXT: vmv.v.x v16, a1 1946; RV32-NEXT: lui a1, 61681 1947; RV32-NEXT: addi a1, a1, -241 1948; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1949; RV32-NEXT: vsub.vv v8, v8, v24 1950; RV32-NEXT: vand.vv v24, v8, v16 1951; RV32-NEXT: vsrl.vi v8, v8, 2 1952; RV32-NEXT: vand.vv v8, v8, v16 1953; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1954; RV32-NEXT: vmv.v.x v16, a1 1955; RV32-NEXT: lui a1, 4112 1956; RV32-NEXT: addi a1, a1, 257 1957; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1958; RV32-NEXT: vadd.vv v8, v24, v8 1959; RV32-NEXT: vsrl.vi v24, v8, 4 1960; RV32-NEXT: vadd.vv v8, v8, v24 1961; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 1962; RV32-NEXT: vmv.v.x v24, a1 1963; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1964; RV32-NEXT: vand.vv v8, v8, v16 1965; RV32-NEXT: vmul.vv v8, v8, v24 1966; RV32-NEXT: li a0, 56 1967; RV32-NEXT: vsrl.vx v8, v8, a0 1968; RV32-NEXT: ret 1969; 1970; RV64-LABEL: vp_ctpop_nxv8i64_unmasked: 1971; RV64: # %bb.0: 1972; RV64-NEXT: lui a1, 349525 1973; RV64-NEXT: lui a2, 209715 1974; RV64-NEXT: lui a3, 61681 1975; RV64-NEXT: lui a4, 4112 1976; RV64-NEXT: addiw a1, a1, 1365 1977; RV64-NEXT: addiw a2, a2, 819 1978; RV64-NEXT: addiw a3, a3, -241 1979; RV64-NEXT: addiw a4, a4, 257 1980; RV64-NEXT: slli a5, a1, 32 1981; RV64-NEXT: add a1, a1, a5 1982; RV64-NEXT: slli a5, a2, 32 1983; RV64-NEXT: add a2, a2, a5 1984; RV64-NEXT: slli a5, a3, 32 1985; RV64-NEXT: add a3, a3, a5 1986; RV64-NEXT: slli a5, a4, 32 1987; RV64-NEXT: add a4, a4, a5 1988; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1989; RV64-NEXT: vsrl.vi v16, v8, 1 1990; RV64-NEXT: vand.vx v16, v16, a1 1991; RV64-NEXT: vsub.vv v8, v8, v16 1992; RV64-NEXT: vand.vx v16, v8, a2 1993; RV64-NEXT: vsrl.vi v8, v8, 2 1994; RV64-NEXT: vand.vx v8, v8, a2 1995; RV64-NEXT: vadd.vv v8, v16, v8 1996; RV64-NEXT: vsrl.vi v16, v8, 4 1997; RV64-NEXT: vadd.vv v8, v8, v16 1998; RV64-NEXT: vand.vx v8, v8, a3 1999; RV64-NEXT: vmul.vx v8, v8, a4 2000; RV64-NEXT: li a0, 56 2001; RV64-NEXT: vsrl.vx v8, v8, a0 2002; RV64-NEXT: ret 2003; 2004; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64_unmasked: 2005; CHECK-ZVBB: # %bb.0: 2006; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2007; CHECK-ZVBB-NEXT: vcpop.v v8, v8 2008; CHECK-ZVBB-NEXT: ret 2009 %v = call <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl) 2010 ret <vscale x 8 x i64> %v 2011} 2012 2013declare <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64>, <vscale x 16 x i1>, i32) 2014 2015define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) { 2016; RV32-LABEL: vp_ctpop_nxv16i64: 2017; RV32: # %bb.0: 2018; RV32-NEXT: addi sp, sp, -16 2019; RV32-NEXT: .cfi_def_cfa_offset 16 2020; RV32-NEXT: csrr a1, vlenb 2021; RV32-NEXT: li a2, 48 2022; RV32-NEXT: mul a1, a1, a2 2023; RV32-NEXT: sub sp, sp, a1 2024; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb 2025; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2026; RV32-NEXT: vmv1r.v v7, v0 2027; RV32-NEXT: csrr a1, vlenb 2028; RV32-NEXT: li a2, 24 2029; RV32-NEXT: mul a1, a1, a2 2030; RV32-NEXT: add a1, sp, a1 2031; RV32-NEXT: addi a1, a1, 16 2032; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2033; RV32-NEXT: csrr a1, vlenb 2034; RV32-NEXT: lui a2, 349525 2035; RV32-NEXT: srli a3, a1, 3 2036; RV32-NEXT: vslidedown.vx v0, v0, a3 2037; RV32-NEXT: sub a3, a0, a1 2038; RV32-NEXT: addi a2, a2, 1365 2039; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2040; RV32-NEXT: vmv.v.x v8, a2 2041; RV32-NEXT: csrr a2, vlenb 2042; RV32-NEXT: slli a2, a2, 5 2043; RV32-NEXT: add a2, sp, a2 2044; RV32-NEXT: addi a2, a2, 16 2045; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 2046; RV32-NEXT: sltu a2, a0, a3 2047; RV32-NEXT: addi a2, a2, -1 2048; RV32-NEXT: and a2, a2, a3 2049; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2050; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t 2051; RV32-NEXT: csrr a3, vlenb 2052; RV32-NEXT: li a4, 40 2053; RV32-NEXT: mul a3, a3, a4 2054; RV32-NEXT: add a3, sp, a3 2055; RV32-NEXT: addi a3, a3, 16 2056; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2057; RV32-NEXT: csrr a3, vlenb 2058; RV32-NEXT: slli a3, a3, 5 2059; RV32-NEXT: add a3, sp, a3 2060; RV32-NEXT: addi a3, a3, 16 2061; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 2062; RV32-NEXT: csrr a3, vlenb 2063; RV32-NEXT: li a4, 40 2064; RV32-NEXT: mul a3, a3, a4 2065; RV32-NEXT: add a3, sp, a3 2066; RV32-NEXT: addi a3, a3, 16 2067; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 2068; RV32-NEXT: vand.vv v8, v24, v8, v0.t 2069; RV32-NEXT: vsub.vv v16, v16, v8, v0.t 2070; RV32-NEXT: lui a3, 209715 2071; RV32-NEXT: addi a3, a3, 819 2072; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2073; RV32-NEXT: vmv.v.x v8, a3 2074; RV32-NEXT: csrr a3, vlenb 2075; RV32-NEXT: li a4, 40 2076; RV32-NEXT: mul a3, a3, a4 2077; RV32-NEXT: add a3, sp, a3 2078; RV32-NEXT: addi a3, a3, 16 2079; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2080; RV32-NEXT: csrr a3, vlenb 2081; RV32-NEXT: li a4, 40 2082; RV32-NEXT: mul a3, a3, a4 2083; RV32-NEXT: add a3, sp, a3 2084; RV32-NEXT: addi a3, a3, 16 2085; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 2086; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2087; RV32-NEXT: vand.vv v8, v16, v8, v0.t 2088; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t 2089; RV32-NEXT: csrr a3, vlenb 2090; RV32-NEXT: li a4, 40 2091; RV32-NEXT: mul a3, a3, a4 2092; RV32-NEXT: add a3, sp, a3 2093; RV32-NEXT: addi a3, a3, 16 2094; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 2095; RV32-NEXT: vand.vv v16, v16, v24, v0.t 2096; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2097; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2098; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2099; RV32-NEXT: lui a3, 61681 2100; RV32-NEXT: addi a3, a3, -241 2101; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2102; RV32-NEXT: vmv.v.x v16, a3 2103; RV32-NEXT: csrr a3, vlenb 2104; RV32-NEXT: slli a3, a3, 4 2105; RV32-NEXT: add a3, sp, a3 2106; RV32-NEXT: addi a3, a3, 16 2107; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2108; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2109; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2110; RV32-NEXT: lui a3, 4112 2111; RV32-NEXT: addi a3, a3, 257 2112; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2113; RV32-NEXT: vmv.v.x v16, a3 2114; RV32-NEXT: csrr a3, vlenb 2115; RV32-NEXT: slli a3, a3, 3 2116; RV32-NEXT: add a3, sp, a3 2117; RV32-NEXT: addi a3, a3, 16 2118; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 2119; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2120; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 2121; RV32-NEXT: li a2, 56 2122; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t 2123; RV32-NEXT: addi a3, sp, 16 2124; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill 2125; RV32-NEXT: bltu a0, a1, .LBB46_2 2126; RV32-NEXT: # %bb.1: 2127; RV32-NEXT: mv a0, a1 2128; RV32-NEXT: .LBB46_2: 2129; RV32-NEXT: vmv1r.v v0, v7 2130; RV32-NEXT: li a3, 24 2131; RV32-NEXT: mul a1, a1, a3 2132; RV32-NEXT: add a1, sp, a1 2133; RV32-NEXT: addi a1, a1, 16 2134; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload 2135; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2136; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 2137; RV32-NEXT: csrr a0, vlenb 2138; RV32-NEXT: slli a0, a0, 5 2139; RV32-NEXT: add a0, sp, a0 2140; RV32-NEXT: addi a0, a0, 16 2141; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2142; RV32-NEXT: vand.vv v8, v24, v8, v0.t 2143; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 2144; RV32-NEXT: csrr a0, vlenb 2145; RV32-NEXT: slli a0, a0, 5 2146; RV32-NEXT: add a0, sp, a0 2147; RV32-NEXT: addi a0, a0, 16 2148; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill 2149; RV32-NEXT: csrr a0, vlenb 2150; RV32-NEXT: li a1, 40 2151; RV32-NEXT: mul a0, a0, a1 2152; RV32-NEXT: add a0, sp, a0 2153; RV32-NEXT: addi a0, a0, 16 2154; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2155; RV32-NEXT: csrr a0, vlenb 2156; RV32-NEXT: slli a0, a0, 5 2157; RV32-NEXT: add a0, sp, a0 2158; RV32-NEXT: addi a0, a0, 16 2159; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2160; RV32-NEXT: vand.vv v16, v16, v8, v0.t 2161; RV32-NEXT: csrr a0, vlenb 2162; RV32-NEXT: li a1, 24 2163; RV32-NEXT: mul a0, a0, a1 2164; RV32-NEXT: add a0, sp, a0 2165; RV32-NEXT: addi a0, a0, 16 2166; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill 2167; RV32-NEXT: vmv8r.v v16, v8 2168; RV32-NEXT: csrr a0, vlenb 2169; RV32-NEXT: slli a0, a0, 5 2170; RV32-NEXT: add a0, sp, a0 2171; RV32-NEXT: addi a0, a0, 16 2172; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2173; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 2174; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2175; RV32-NEXT: csrr a0, vlenb 2176; RV32-NEXT: li a1, 24 2177; RV32-NEXT: mul a0, a0, a1 2178; RV32-NEXT: add a0, sp, a0 2179; RV32-NEXT: addi a0, a0, 16 2180; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2181; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 2182; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 2183; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 2184; RV32-NEXT: csrr a0, vlenb 2185; RV32-NEXT: slli a0, a0, 4 2186; RV32-NEXT: add a0, sp, a0 2187; RV32-NEXT: addi a0, a0, 16 2188; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2189; RV32-NEXT: vand.vv v8, v8, v16, v0.t 2190; RV32-NEXT: csrr a0, vlenb 2191; RV32-NEXT: slli a0, a0, 3 2192; RV32-NEXT: add a0, sp, a0 2193; RV32-NEXT: addi a0, a0, 16 2194; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2195; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 2196; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t 2197; RV32-NEXT: addi a0, sp, 16 2198; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 2199; RV32-NEXT: csrr a0, vlenb 2200; RV32-NEXT: li a1, 48 2201; RV32-NEXT: mul a0, a0, a1 2202; RV32-NEXT: add sp, sp, a0 2203; RV32-NEXT: .cfi_def_cfa sp, 16 2204; RV32-NEXT: addi sp, sp, 16 2205; RV32-NEXT: .cfi_def_cfa_offset 0 2206; RV32-NEXT: ret 2207; 2208; RV64-LABEL: vp_ctpop_nxv16i64: 2209; RV64: # %bb.0: 2210; RV64-NEXT: addi sp, sp, -16 2211; RV64-NEXT: .cfi_def_cfa_offset 16 2212; RV64-NEXT: csrr a1, vlenb 2213; RV64-NEXT: slli a1, a1, 4 2214; RV64-NEXT: sub sp, sp, a1 2215; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 2216; RV64-NEXT: csrr a1, vlenb 2217; RV64-NEXT: slli a1, a1, 3 2218; RV64-NEXT: add a1, sp, a1 2219; RV64-NEXT: addi a1, a1, 16 2220; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 2221; RV64-NEXT: csrr a1, vlenb 2222; RV64-NEXT: srli a2, a1, 3 2223; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma 2224; RV64-NEXT: vslidedown.vx v24, v0, a2 2225; RV64-NEXT: mv a2, a0 2226; RV64-NEXT: bltu a0, a1, .LBB46_2 2227; RV64-NEXT: # %bb.1: 2228; RV64-NEXT: mv a2, a1 2229; RV64-NEXT: .LBB46_2: 2230; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2231; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2232; RV64-NEXT: lui a2, 349525 2233; RV64-NEXT: lui a3, 209715 2234; RV64-NEXT: lui a4, 61681 2235; RV64-NEXT: lui a5, 4112 2236; RV64-NEXT: addiw a2, a2, 1365 2237; RV64-NEXT: addiw a3, a3, 819 2238; RV64-NEXT: addiw a4, a4, -241 2239; RV64-NEXT: addiw a5, a5, 257 2240; RV64-NEXT: slli a6, a2, 32 2241; RV64-NEXT: add a6, a2, a6 2242; RV64-NEXT: slli a2, a3, 32 2243; RV64-NEXT: add a7, a3, a2 2244; RV64-NEXT: slli a2, a4, 32 2245; RV64-NEXT: add a2, a4, a2 2246; RV64-NEXT: slli a3, a5, 32 2247; RV64-NEXT: add a3, a5, a3 2248; RV64-NEXT: li a4, 56 2249; RV64-NEXT: sub a1, a0, a1 2250; RV64-NEXT: sltu a0, a0, a1 2251; RV64-NEXT: addi a0, a0, -1 2252; RV64-NEXT: and a0, a0, a1 2253; RV64-NEXT: vand.vx v16, v16, a6, v0.t 2254; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 2255; RV64-NEXT: vand.vx v16, v8, a7, v0.t 2256; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 2257; RV64-NEXT: vand.vx v8, v8, a7, v0.t 2258; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 2259; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2260; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2261; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2262; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2263; RV64-NEXT: vsrl.vx v8, v8, a4, v0.t 2264; RV64-NEXT: addi a1, sp, 16 2265; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 2266; RV64-NEXT: vmv1r.v v0, v24 2267; RV64-NEXT: csrr a1, vlenb 2268; RV64-NEXT: slli a1, a1, 3 2269; RV64-NEXT: add a1, sp, a1 2270; RV64-NEXT: addi a1, a1, 16 2271; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 2272; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2273; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 2274; RV64-NEXT: vand.vx v16, v16, a6, v0.t 2275; RV64-NEXT: vsub.vv v16, v8, v16, v0.t 2276; RV64-NEXT: vand.vx v8, v16, a7, v0.t 2277; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t 2278; RV64-NEXT: vand.vx v16, v16, a7, v0.t 2279; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2280; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 2281; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 2282; RV64-NEXT: vand.vx v8, v8, a2, v0.t 2283; RV64-NEXT: vmul.vx v8, v8, a3, v0.t 2284; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t 2285; RV64-NEXT: addi a0, sp, 16 2286; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 2287; RV64-NEXT: csrr a0, vlenb 2288; RV64-NEXT: slli a0, a0, 4 2289; RV64-NEXT: add sp, sp, a0 2290; RV64-NEXT: .cfi_def_cfa sp, 16 2291; RV64-NEXT: addi sp, sp, 16 2292; RV64-NEXT: .cfi_def_cfa_offset 0 2293; RV64-NEXT: ret 2294; 2295; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64: 2296; CHECK-ZVBB: # %bb.0: 2297; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma 2298; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 2299; CHECK-ZVBB-NEXT: csrr a1, vlenb 2300; CHECK-ZVBB-NEXT: srli a2, a1, 3 2301; CHECK-ZVBB-NEXT: sub a3, a0, a1 2302; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 2303; CHECK-ZVBB-NEXT: sltu a2, a0, a3 2304; CHECK-ZVBB-NEXT: addi a2, a2, -1 2305; CHECK-ZVBB-NEXT: and a2, a2, a3 2306; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2307; CHECK-ZVBB-NEXT: vcpop.v v16, v16, v0.t 2308; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB46_2 2309; CHECK-ZVBB-NEXT: # %bb.1: 2310; CHECK-ZVBB-NEXT: mv a0, a1 2311; CHECK-ZVBB-NEXT: .LBB46_2: 2312; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 2313; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2314; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 2315; CHECK-ZVBB-NEXT: ret 2316 %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl) 2317 ret <vscale x 16 x i64> %v 2318} 2319 2320define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va, i32 zeroext %evl) { 2321; RV32-LABEL: vp_ctpop_nxv16i64_unmasked: 2322; RV32: # %bb.0: 2323; RV32-NEXT: addi sp, sp, -16 2324; RV32-NEXT: .cfi_def_cfa_offset 16 2325; RV32-NEXT: csrr a1, vlenb 2326; RV32-NEXT: slli a1, a1, 5 2327; RV32-NEXT: sub sp, sp, a1 2328; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb 2329; RV32-NEXT: csrr a1, vlenb 2330; RV32-NEXT: lui a2, 349525 2331; RV32-NEXT: lui a3, 209715 2332; RV32-NEXT: sub a4, a0, a1 2333; RV32-NEXT: addi a2, a2, 1365 2334; RV32-NEXT: addi a3, a3, 819 2335; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2336; RV32-NEXT: vmv.v.x v0, a2 2337; RV32-NEXT: sltu a2, a0, a4 2338; RV32-NEXT: addi a2, a2, -1 2339; RV32-NEXT: and a2, a2, a4 2340; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2341; RV32-NEXT: vsrl.vi v24, v16, 1 2342; RV32-NEXT: csrr a4, vlenb 2343; RV32-NEXT: li a5, 24 2344; RV32-NEXT: mul a4, a4, a5 2345; RV32-NEXT: add a4, sp, a4 2346; RV32-NEXT: addi a4, a4, 16 2347; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill 2348; RV32-NEXT: vand.vv v24, v24, v0 2349; RV32-NEXT: vsub.vv v16, v16, v24 2350; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma 2351; RV32-NEXT: vmv.v.x v0, a3 2352; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2353; RV32-NEXT: vand.vv v24, v16, v0 2354; RV32-NEXT: vsrl.vi v16, v16, 2 2355; RV32-NEXT: csrr a3, vlenb 2356; RV32-NEXT: slli a3, a3, 4 2357; RV32-NEXT: add a3, sp, a3 2358; RV32-NEXT: addi a3, a3, 16 2359; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill 2360; RV32-NEXT: vand.vv v16, v16, v0 2361; RV32-NEXT: vadd.vv v16, v24, v16 2362; RV32-NEXT: vsrl.vi v24, v16, 4 2363; RV32-NEXT: vadd.vv v16, v16, v24 2364; RV32-NEXT: lui a3, 61681 2365; RV32-NEXT: lui a4, 4112 2366; RV32-NEXT: addi a3, a3, -241 2367; RV32-NEXT: addi a4, a4, 257 2368; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma 2369; RV32-NEXT: vmv.v.x v24, a3 2370; RV32-NEXT: csrr a3, vlenb 2371; RV32-NEXT: slli a3, a3, 3 2372; RV32-NEXT: add a3, sp, a3 2373; RV32-NEXT: addi a3, a3, 16 2374; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 2375; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2376; RV32-NEXT: vand.vv v16, v16, v24 2377; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma 2378; RV32-NEXT: vmv.v.x v24, a4 2379; RV32-NEXT: addi a3, sp, 16 2380; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill 2381; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2382; RV32-NEXT: vmul.vv v16, v16, v24 2383; RV32-NEXT: li a2, 56 2384; RV32-NEXT: vsrl.vx v16, v16, a2 2385; RV32-NEXT: bltu a0, a1, .LBB47_2 2386; RV32-NEXT: # %bb.1: 2387; RV32-NEXT: mv a0, a1 2388; RV32-NEXT: .LBB47_2: 2389; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2390; RV32-NEXT: vsrl.vi v24, v8, 1 2391; RV32-NEXT: csrr a0, vlenb 2392; RV32-NEXT: li a1, 24 2393; RV32-NEXT: mul a0, a0, a1 2394; RV32-NEXT: add a0, sp, a0 2395; RV32-NEXT: addi a0, a0, 16 2396; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 2397; RV32-NEXT: vand.vv v24, v24, v0 2398; RV32-NEXT: vsub.vv v8, v8, v24 2399; RV32-NEXT: csrr a0, vlenb 2400; RV32-NEXT: slli a0, a0, 4 2401; RV32-NEXT: add a0, sp, a0 2402; RV32-NEXT: addi a0, a0, 16 2403; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload 2404; RV32-NEXT: vand.vv v24, v8, v0 2405; RV32-NEXT: vsrl.vi v8, v8, 2 2406; RV32-NEXT: vand.vv v8, v8, v0 2407; RV32-NEXT: vadd.vv v8, v24, v8 2408; RV32-NEXT: vsrl.vi v24, v8, 4 2409; RV32-NEXT: vadd.vv v8, v8, v24 2410; RV32-NEXT: csrr a0, vlenb 2411; RV32-NEXT: slli a0, a0, 3 2412; RV32-NEXT: add a0, sp, a0 2413; RV32-NEXT: addi a0, a0, 16 2414; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2415; RV32-NEXT: vand.vv v8, v8, v24 2416; RV32-NEXT: addi a0, sp, 16 2417; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload 2418; RV32-NEXT: vmul.vv v8, v8, v24 2419; RV32-NEXT: vsrl.vx v8, v8, a2 2420; RV32-NEXT: csrr a0, vlenb 2421; RV32-NEXT: slli a0, a0, 5 2422; RV32-NEXT: add sp, sp, a0 2423; RV32-NEXT: .cfi_def_cfa sp, 16 2424; RV32-NEXT: addi sp, sp, 16 2425; RV32-NEXT: .cfi_def_cfa_offset 0 2426; RV32-NEXT: ret 2427; 2428; RV64-LABEL: vp_ctpop_nxv16i64_unmasked: 2429; RV64: # %bb.0: 2430; RV64-NEXT: csrr a2, vlenb 2431; RV64-NEXT: mv a1, a0 2432; RV64-NEXT: bltu a0, a2, .LBB47_2 2433; RV64-NEXT: # %bb.1: 2434; RV64-NEXT: mv a1, a2 2435; RV64-NEXT: .LBB47_2: 2436; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2437; RV64-NEXT: vsrl.vi v24, v8, 1 2438; RV64-NEXT: lui a3, 349525 2439; RV64-NEXT: lui a4, 209715 2440; RV64-NEXT: lui a5, 61681 2441; RV64-NEXT: lui a6, 4112 2442; RV64-NEXT: addiw a3, a3, 1365 2443; RV64-NEXT: addiw a4, a4, 819 2444; RV64-NEXT: addiw a5, a5, -241 2445; RV64-NEXT: addiw a6, a6, 257 2446; RV64-NEXT: slli a7, a3, 32 2447; RV64-NEXT: add a3, a3, a7 2448; RV64-NEXT: slli a7, a4, 32 2449; RV64-NEXT: add a4, a4, a7 2450; RV64-NEXT: slli a7, a5, 32 2451; RV64-NEXT: add a5, a5, a7 2452; RV64-NEXT: slli a7, a6, 32 2453; RV64-NEXT: add a6, a6, a7 2454; RV64-NEXT: li a7, 56 2455; RV64-NEXT: sub a2, a0, a2 2456; RV64-NEXT: sltu a0, a0, a2 2457; RV64-NEXT: addi a0, a0, -1 2458; RV64-NEXT: and a0, a0, a2 2459; RV64-NEXT: vand.vx v24, v24, a3 2460; RV64-NEXT: vsub.vv v8, v8, v24 2461; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2462; RV64-NEXT: vsrl.vi v24, v16, 1 2463; RV64-NEXT: vand.vx v24, v24, a3 2464; RV64-NEXT: vsub.vv v16, v16, v24 2465; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2466; RV64-NEXT: vand.vx v24, v8, a4 2467; RV64-NEXT: vsrl.vi v8, v8, 2 2468; RV64-NEXT: vand.vx v8, v8, a4 2469; RV64-NEXT: vadd.vv v8, v24, v8 2470; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2471; RV64-NEXT: vand.vx v24, v16, a4 2472; RV64-NEXT: vsrl.vi v16, v16, 2 2473; RV64-NEXT: vand.vx v16, v16, a4 2474; RV64-NEXT: vadd.vv v16, v24, v16 2475; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2476; RV64-NEXT: vsrl.vi v24, v8, 4 2477; RV64-NEXT: vadd.vv v8, v8, v24 2478; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2479; RV64-NEXT: vsrl.vi v24, v16, 4 2480; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2481; RV64-NEXT: vand.vx v8, v8, a5 2482; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2483; RV64-NEXT: vadd.vv v16, v16, v24 2484; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2485; RV64-NEXT: vmul.vx v8, v8, a6 2486; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2487; RV64-NEXT: vand.vx v16, v16, a5 2488; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2489; RV64-NEXT: vsrl.vx v8, v8, a7 2490; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2491; RV64-NEXT: vmul.vx v16, v16, a6 2492; RV64-NEXT: vsrl.vx v16, v16, a7 2493; RV64-NEXT: ret 2494; 2495; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked: 2496; CHECK-ZVBB: # %bb.0: 2497; CHECK-ZVBB-NEXT: csrr a1, vlenb 2498; CHECK-ZVBB-NEXT: sub a2, a0, a1 2499; CHECK-ZVBB-NEXT: sltu a3, a0, a2 2500; CHECK-ZVBB-NEXT: addi a3, a3, -1 2501; CHECK-ZVBB-NEXT: and a2, a3, a2 2502; CHECK-ZVBB-NEXT: vsetvli zero, a2, e64, m8, ta, ma 2503; CHECK-ZVBB-NEXT: vcpop.v v16, v16 2504; CHECK-ZVBB-NEXT: bltu a0, a1, .LBB47_2 2505; CHECK-ZVBB-NEXT: # %bb.1: 2506; CHECK-ZVBB-NEXT: mv a0, a1 2507; CHECK-ZVBB-NEXT: .LBB47_2: 2508; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2509; CHECK-ZVBB-NEXT: vcpop.v v8, v8 2510; CHECK-ZVBB-NEXT: ret 2511 %v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl) 2512 ret <vscale x 16 x i64> %v 2513} 2514 2515; Test promotion. 2516declare <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32) 2517 2518define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) { 2519; CHECK-LABEL: vp_ctpop_nxv1i9: 2520; CHECK: # %bb.0: 2521; CHECK-NEXT: li a1, 511 2522; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 2523; CHECK-NEXT: vand.vx v8, v8, a1, v0.t 2524; CHECK-NEXT: lui a0, 5 2525; CHECK-NEXT: addi a0, a0, 1365 2526; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 2527; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 2528; CHECK-NEXT: lui a0, 3 2529; CHECK-NEXT: addi a0, a0, 819 2530; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 2531; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 2532; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 2533; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2534; CHECK-NEXT: lui a0, 1 2535; CHECK-NEXT: addi a0, a0, -241 2536; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 2537; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 2538; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 2539; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 2540; CHECK-NEXT: li a0, 257 2541; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 2542; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 2543; CHECK-NEXT: ret 2544; 2545; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9: 2546; CHECK-ZVBB: # %bb.0: 2547; CHECK-ZVBB-NEXT: li a1, 511 2548; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 2549; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1, v0.t 2550; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t 2551; CHECK-ZVBB-NEXT: ret 2552 %v = call <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl) 2553 ret <vscale x 1 x i9> %v 2554} 2555