1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 5; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB 6 7define <vscale x 1 x i8> @ctpop_nxv1i8(<vscale x 1 x i8> %va) { 8; CHECK-LABEL: ctpop_nxv1i8: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 11; CHECK-NEXT: vsrl.vi v9, v8, 1 12; CHECK-NEXT: li a0, 85 13; CHECK-NEXT: vand.vx v9, v9, a0 14; CHECK-NEXT: li a0, 51 15; CHECK-NEXT: vsub.vv v8, v8, v9 16; CHECK-NEXT: vand.vx v9, v8, a0 17; CHECK-NEXT: vsrl.vi v8, v8, 2 18; CHECK-NEXT: vand.vx v8, v8, a0 19; CHECK-NEXT: vadd.vv v8, v9, v8 20; CHECK-NEXT: vsrl.vi v9, v8, 4 21; CHECK-NEXT: vadd.vv v8, v8, v9 22; CHECK-NEXT: vand.vi v8, v8, 15 23; CHECK-NEXT: ret 24; 25; CHECK-ZVBB-LABEL: ctpop_nxv1i8: 26; CHECK-ZVBB: # %bb.0: 27; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf8, ta, ma 28; CHECK-ZVBB-NEXT: vcpop.v v8, v8 29; CHECK-ZVBB-NEXT: ret 30 %a = call <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8> %va) 31 ret <vscale x 1 x i8> %a 32} 33declare <vscale x 1 x i8> @llvm.ctpop.nxv1i8(<vscale x 1 x i8>) 34 35define <vscale x 2 x i8> @ctpop_nxv2i8(<vscale x 2 x i8> %va) { 36; CHECK-LABEL: ctpop_nxv2i8: 37; CHECK: # %bb.0: 38; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 39; CHECK-NEXT: vsrl.vi v9, v8, 1 40; CHECK-NEXT: li a0, 85 41; CHECK-NEXT: vand.vx v9, v9, a0 42; CHECK-NEXT: li a0, 51 43; CHECK-NEXT: vsub.vv v8, v8, v9 44; CHECK-NEXT: vand.vx v9, v8, a0 45; CHECK-NEXT: vsrl.vi v8, v8, 2 46; CHECK-NEXT: vand.vx v8, v8, a0 47; CHECK-NEXT: vadd.vv v8, v9, v8 48; CHECK-NEXT: vsrl.vi v9, v8, 4 49; CHECK-NEXT: vadd.vv v8, v8, v9 50; CHECK-NEXT: vand.vi v8, v8, 15 51; CHECK-NEXT: ret 52; 53; CHECK-ZVBB-LABEL: ctpop_nxv2i8: 54; CHECK-ZVBB: # %bb.0: 55; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf4, ta, ma 56; CHECK-ZVBB-NEXT: vcpop.v v8, v8 57; CHECK-ZVBB-NEXT: ret 58 %a = call <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8> %va) 59 ret <vscale x 2 x i8> %a 60} 61declare <vscale x 2 x i8> @llvm.ctpop.nxv2i8(<vscale x 2 x i8>) 62 63define <vscale x 4 x i8> @ctpop_nxv4i8(<vscale x 4 x i8> %va) { 64; CHECK-LABEL: ctpop_nxv4i8: 65; CHECK: # %bb.0: 66; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 67; CHECK-NEXT: vsrl.vi v9, v8, 1 68; CHECK-NEXT: li a0, 85 69; CHECK-NEXT: vand.vx v9, v9, a0 70; CHECK-NEXT: li a0, 51 71; CHECK-NEXT: vsub.vv v8, v8, v9 72; CHECK-NEXT: vand.vx v9, v8, a0 73; CHECK-NEXT: vsrl.vi v8, v8, 2 74; CHECK-NEXT: vand.vx v8, v8, a0 75; CHECK-NEXT: vadd.vv v8, v9, v8 76; CHECK-NEXT: vsrl.vi v9, v8, 4 77; CHECK-NEXT: vadd.vv v8, v8, v9 78; CHECK-NEXT: vand.vi v8, v8, 15 79; CHECK-NEXT: ret 80; 81; CHECK-ZVBB-LABEL: ctpop_nxv4i8: 82; CHECK-ZVBB: # %bb.0: 83; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma 84; CHECK-ZVBB-NEXT: vcpop.v v8, v8 85; CHECK-ZVBB-NEXT: ret 86 %a = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> %va) 87 ret <vscale x 4 x i8> %a 88} 89declare <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8>) 90 91define <vscale x 8 x i8> @ctpop_nxv8i8(<vscale x 8 x i8> %va) { 92; CHECK-LABEL: ctpop_nxv8i8: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma 95; CHECK-NEXT: vsrl.vi v9, v8, 1 96; CHECK-NEXT: li a0, 85 97; CHECK-NEXT: vand.vx v9, v9, a0 98; CHECK-NEXT: li a0, 51 99; CHECK-NEXT: vsub.vv v8, v8, v9 100; CHECK-NEXT: vand.vx v9, v8, a0 101; CHECK-NEXT: vsrl.vi v8, v8, 2 102; CHECK-NEXT: vand.vx v8, v8, a0 103; CHECK-NEXT: vadd.vv v8, v9, v8 104; CHECK-NEXT: vsrl.vi v9, v8, 4 105; CHECK-NEXT: vadd.vv v8, v8, v9 106; CHECK-NEXT: vand.vi v8, v8, 15 107; CHECK-NEXT: ret 108; 109; CHECK-ZVBB-LABEL: ctpop_nxv8i8: 110; CHECK-ZVBB: # %bb.0: 111; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma 112; CHECK-ZVBB-NEXT: vcpop.v v8, v8 113; CHECK-ZVBB-NEXT: ret 114 %a = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> %va) 115 ret <vscale x 8 x i8> %a 116} 117declare <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8>) 118 119define <vscale x 16 x i8> @ctpop_nxv16i8(<vscale x 16 x i8> %va) { 120; CHECK-LABEL: ctpop_nxv16i8: 121; CHECK: # %bb.0: 122; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma 123; CHECK-NEXT: vsrl.vi v10, v8, 1 124; CHECK-NEXT: li a0, 85 125; CHECK-NEXT: vand.vx v10, v10, a0 126; CHECK-NEXT: li a0, 51 127; CHECK-NEXT: vsub.vv v8, v8, v10 128; CHECK-NEXT: vand.vx v10, v8, a0 129; CHECK-NEXT: vsrl.vi v8, v8, 2 130; CHECK-NEXT: vand.vx v8, v8, a0 131; CHECK-NEXT: vadd.vv v8, v10, v8 132; CHECK-NEXT: vsrl.vi v10, v8, 4 133; CHECK-NEXT: vadd.vv v8, v8, v10 134; CHECK-NEXT: vand.vi v8, v8, 15 135; CHECK-NEXT: ret 136; 137; CHECK-ZVBB-LABEL: ctpop_nxv16i8: 138; CHECK-ZVBB: # %bb.0: 139; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma 140; CHECK-ZVBB-NEXT: vcpop.v v8, v8 141; CHECK-ZVBB-NEXT: ret 142 %a = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> %va) 143 ret <vscale x 16 x i8> %a 144} 145declare <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8>) 146 147define <vscale x 32 x i8> @ctpop_nxv32i8(<vscale x 32 x i8> %va) { 148; CHECK-LABEL: ctpop_nxv32i8: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma 151; CHECK-NEXT: vsrl.vi v12, v8, 1 152; CHECK-NEXT: li a0, 85 153; CHECK-NEXT: vand.vx v12, v12, a0 154; CHECK-NEXT: li a0, 51 155; CHECK-NEXT: vsub.vv v8, v8, v12 156; CHECK-NEXT: vand.vx v12, v8, a0 157; CHECK-NEXT: vsrl.vi v8, v8, 2 158; CHECK-NEXT: vand.vx v8, v8, a0 159; CHECK-NEXT: vadd.vv v8, v12, v8 160; CHECK-NEXT: vsrl.vi v12, v8, 4 161; CHECK-NEXT: vadd.vv v8, v8, v12 162; CHECK-NEXT: vand.vi v8, v8, 15 163; CHECK-NEXT: ret 164; 165; CHECK-ZVBB-LABEL: ctpop_nxv32i8: 166; CHECK-ZVBB: # %bb.0: 167; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma 168; CHECK-ZVBB-NEXT: vcpop.v v8, v8 169; CHECK-ZVBB-NEXT: ret 170 %a = call <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8> %va) 171 ret <vscale x 32 x i8> %a 172} 173declare <vscale x 32 x i8> @llvm.ctpop.nxv32i8(<vscale x 32 x i8>) 174 175define <vscale x 64 x i8> @ctpop_nxv64i8(<vscale x 64 x i8> %va) { 176; CHECK-LABEL: ctpop_nxv64i8: 177; CHECK: # %bb.0: 178; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma 179; CHECK-NEXT: vsrl.vi v16, v8, 1 180; CHECK-NEXT: li a0, 85 181; CHECK-NEXT: vand.vx v16, v16, a0 182; CHECK-NEXT: li a0, 51 183; CHECK-NEXT: vsub.vv v8, v8, v16 184; CHECK-NEXT: vand.vx v16, v8, a0 185; CHECK-NEXT: vsrl.vi v8, v8, 2 186; CHECK-NEXT: vand.vx v8, v8, a0 187; CHECK-NEXT: vadd.vv v8, v16, v8 188; CHECK-NEXT: vsrl.vi v16, v8, 4 189; CHECK-NEXT: vadd.vv v8, v8, v16 190; CHECK-NEXT: vand.vi v8, v8, 15 191; CHECK-NEXT: ret 192; 193; CHECK-ZVBB-LABEL: ctpop_nxv64i8: 194; CHECK-ZVBB: # %bb.0: 195; CHECK-ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma 196; CHECK-ZVBB-NEXT: vcpop.v v8, v8 197; CHECK-ZVBB-NEXT: ret 198 %a = call <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8> %va) 199 ret <vscale x 64 x i8> %a 200} 201declare <vscale x 64 x i8> @llvm.ctpop.nxv64i8(<vscale x 64 x i8>) 202 203define <vscale x 1 x i16> @ctpop_nxv1i16(<vscale x 1 x i16> %va) { 204; CHECK-LABEL: ctpop_nxv1i16: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 207; CHECK-NEXT: vsrl.vi v9, v8, 1 208; CHECK-NEXT: lui a0, 5 209; CHECK-NEXT: addi a0, a0, 1365 210; CHECK-NEXT: vand.vx v9, v9, a0 211; CHECK-NEXT: lui a0, 3 212; CHECK-NEXT: addi a0, a0, 819 213; CHECK-NEXT: vsub.vv v8, v8, v9 214; CHECK-NEXT: vand.vx v9, v8, a0 215; CHECK-NEXT: vsrl.vi v8, v8, 2 216; CHECK-NEXT: vand.vx v8, v8, a0 217; CHECK-NEXT: lui a0, 1 218; CHECK-NEXT: addi a0, a0, -241 219; CHECK-NEXT: vadd.vv v8, v9, v8 220; CHECK-NEXT: vsrl.vi v9, v8, 4 221; CHECK-NEXT: vadd.vv v8, v8, v9 222; CHECK-NEXT: vand.vx v8, v8, a0 223; CHECK-NEXT: li a0, 257 224; CHECK-NEXT: vmul.vx v8, v8, a0 225; CHECK-NEXT: vsrl.vi v8, v8, 8 226; CHECK-NEXT: ret 227; 228; CHECK-ZVBB-LABEL: ctpop_nxv1i16: 229; CHECK-ZVBB: # %bb.0: 230; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf4, ta, ma 231; CHECK-ZVBB-NEXT: vcpop.v v8, v8 232; CHECK-ZVBB-NEXT: ret 233 %a = call <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16> %va) 234 ret <vscale x 1 x i16> %a 235} 236declare <vscale x 1 x i16> @llvm.ctpop.nxv1i16(<vscale x 1 x i16>) 237 238define <vscale x 2 x i16> @ctpop_nxv2i16(<vscale x 2 x i16> %va) { 239; CHECK-LABEL: ctpop_nxv2i16: 240; CHECK: # %bb.0: 241; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 242; CHECK-NEXT: vsrl.vi v9, v8, 1 243; CHECK-NEXT: lui a0, 5 244; CHECK-NEXT: addi a0, a0, 1365 245; CHECK-NEXT: vand.vx v9, v9, a0 246; CHECK-NEXT: lui a0, 3 247; CHECK-NEXT: addi a0, a0, 819 248; CHECK-NEXT: vsub.vv v8, v8, v9 249; CHECK-NEXT: vand.vx v9, v8, a0 250; CHECK-NEXT: vsrl.vi v8, v8, 2 251; CHECK-NEXT: vand.vx v8, v8, a0 252; CHECK-NEXT: lui a0, 1 253; CHECK-NEXT: addi a0, a0, -241 254; CHECK-NEXT: vadd.vv v8, v9, v8 255; CHECK-NEXT: vsrl.vi v9, v8, 4 256; CHECK-NEXT: vadd.vv v8, v8, v9 257; CHECK-NEXT: vand.vx v8, v8, a0 258; CHECK-NEXT: li a0, 257 259; CHECK-NEXT: vmul.vx v8, v8, a0 260; CHECK-NEXT: vsrl.vi v8, v8, 8 261; CHECK-NEXT: ret 262; 263; CHECK-ZVBB-LABEL: ctpop_nxv2i16: 264; CHECK-ZVBB: # %bb.0: 265; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma 266; CHECK-ZVBB-NEXT: vcpop.v v8, v8 267; CHECK-ZVBB-NEXT: ret 268 %a = call <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16> %va) 269 ret <vscale x 2 x i16> %a 270} 271declare <vscale x 2 x i16> @llvm.ctpop.nxv2i16(<vscale x 2 x i16>) 272 273define <vscale x 4 x i16> @ctpop_nxv4i16(<vscale x 4 x i16> %va) { 274; CHECK-LABEL: ctpop_nxv4i16: 275; CHECK: # %bb.0: 276; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma 277; CHECK-NEXT: vsrl.vi v9, v8, 1 278; CHECK-NEXT: lui a0, 5 279; CHECK-NEXT: addi a0, a0, 1365 280; CHECK-NEXT: vand.vx v9, v9, a0 281; CHECK-NEXT: lui a0, 3 282; CHECK-NEXT: addi a0, a0, 819 283; CHECK-NEXT: vsub.vv v8, v8, v9 284; CHECK-NEXT: vand.vx v9, v8, a0 285; CHECK-NEXT: vsrl.vi v8, v8, 2 286; CHECK-NEXT: vand.vx v8, v8, a0 287; CHECK-NEXT: lui a0, 1 288; CHECK-NEXT: addi a0, a0, -241 289; CHECK-NEXT: vadd.vv v8, v9, v8 290; CHECK-NEXT: vsrl.vi v9, v8, 4 291; CHECK-NEXT: vadd.vv v8, v8, v9 292; CHECK-NEXT: vand.vx v8, v8, a0 293; CHECK-NEXT: li a0, 257 294; CHECK-NEXT: vmul.vx v8, v8, a0 295; CHECK-NEXT: vsrl.vi v8, v8, 8 296; CHECK-NEXT: ret 297; 298; CHECK-ZVBB-LABEL: ctpop_nxv4i16: 299; CHECK-ZVBB: # %bb.0: 300; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma 301; CHECK-ZVBB-NEXT: vcpop.v v8, v8 302; CHECK-ZVBB-NEXT: ret 303 %a = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> %va) 304 ret <vscale x 4 x i16> %a 305} 306declare <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16>) 307 308define <vscale x 8 x i16> @ctpop_nxv8i16(<vscale x 8 x i16> %va) { 309; CHECK-LABEL: ctpop_nxv8i16: 310; CHECK: # %bb.0: 311; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma 312; CHECK-NEXT: vsrl.vi v10, v8, 1 313; CHECK-NEXT: lui a0, 5 314; CHECK-NEXT: addi a0, a0, 1365 315; CHECK-NEXT: vand.vx v10, v10, a0 316; CHECK-NEXT: lui a0, 3 317; CHECK-NEXT: addi a0, a0, 819 318; CHECK-NEXT: vsub.vv v8, v8, v10 319; CHECK-NEXT: vand.vx v10, v8, a0 320; CHECK-NEXT: vsrl.vi v8, v8, 2 321; CHECK-NEXT: vand.vx v8, v8, a0 322; CHECK-NEXT: lui a0, 1 323; CHECK-NEXT: addi a0, a0, -241 324; CHECK-NEXT: vadd.vv v8, v10, v8 325; CHECK-NEXT: vsrl.vi v10, v8, 4 326; CHECK-NEXT: vadd.vv v8, v8, v10 327; CHECK-NEXT: vand.vx v8, v8, a0 328; CHECK-NEXT: li a0, 257 329; CHECK-NEXT: vmul.vx v8, v8, a0 330; CHECK-NEXT: vsrl.vi v8, v8, 8 331; CHECK-NEXT: ret 332; 333; CHECK-ZVBB-LABEL: ctpop_nxv8i16: 334; CHECK-ZVBB: # %bb.0: 335; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma 336; CHECK-ZVBB-NEXT: vcpop.v v8, v8 337; CHECK-ZVBB-NEXT: ret 338 %a = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> %va) 339 ret <vscale x 8 x i16> %a 340} 341declare <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16>) 342 343define <vscale x 16 x i16> @ctpop_nxv16i16(<vscale x 16 x i16> %va) { 344; CHECK-LABEL: ctpop_nxv16i16: 345; CHECK: # %bb.0: 346; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma 347; CHECK-NEXT: vsrl.vi v12, v8, 1 348; CHECK-NEXT: lui a0, 5 349; CHECK-NEXT: addi a0, a0, 1365 350; CHECK-NEXT: vand.vx v12, v12, a0 351; CHECK-NEXT: lui a0, 3 352; CHECK-NEXT: addi a0, a0, 819 353; CHECK-NEXT: vsub.vv v8, v8, v12 354; CHECK-NEXT: vand.vx v12, v8, a0 355; CHECK-NEXT: vsrl.vi v8, v8, 2 356; CHECK-NEXT: vand.vx v8, v8, a0 357; CHECK-NEXT: lui a0, 1 358; CHECK-NEXT: addi a0, a0, -241 359; CHECK-NEXT: vadd.vv v8, v12, v8 360; CHECK-NEXT: vsrl.vi v12, v8, 4 361; CHECK-NEXT: vadd.vv v8, v8, v12 362; CHECK-NEXT: vand.vx v8, v8, a0 363; CHECK-NEXT: li a0, 257 364; CHECK-NEXT: vmul.vx v8, v8, a0 365; CHECK-NEXT: vsrl.vi v8, v8, 8 366; CHECK-NEXT: ret 367; 368; CHECK-ZVBB-LABEL: ctpop_nxv16i16: 369; CHECK-ZVBB: # %bb.0: 370; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma 371; CHECK-ZVBB-NEXT: vcpop.v v8, v8 372; CHECK-ZVBB-NEXT: ret 373 %a = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> %va) 374 ret <vscale x 16 x i16> %a 375} 376declare <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16>) 377 378define <vscale x 32 x i16> @ctpop_nxv32i16(<vscale x 32 x i16> %va) { 379; CHECK-LABEL: ctpop_nxv32i16: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma 382; CHECK-NEXT: vsrl.vi v16, v8, 1 383; CHECK-NEXT: lui a0, 5 384; CHECK-NEXT: addi a0, a0, 1365 385; CHECK-NEXT: vand.vx v16, v16, a0 386; CHECK-NEXT: lui a0, 3 387; CHECK-NEXT: addi a0, a0, 819 388; CHECK-NEXT: vsub.vv v8, v8, v16 389; CHECK-NEXT: vand.vx v16, v8, a0 390; CHECK-NEXT: vsrl.vi v8, v8, 2 391; CHECK-NEXT: vand.vx v8, v8, a0 392; CHECK-NEXT: lui a0, 1 393; CHECK-NEXT: addi a0, a0, -241 394; CHECK-NEXT: vadd.vv v8, v16, v8 395; CHECK-NEXT: vsrl.vi v16, v8, 4 396; CHECK-NEXT: vadd.vv v8, v8, v16 397; CHECK-NEXT: vand.vx v8, v8, a0 398; CHECK-NEXT: li a0, 257 399; CHECK-NEXT: vmul.vx v8, v8, a0 400; CHECK-NEXT: vsrl.vi v8, v8, 8 401; CHECK-NEXT: ret 402; 403; CHECK-ZVBB-LABEL: ctpop_nxv32i16: 404; CHECK-ZVBB: # %bb.0: 405; CHECK-ZVBB-NEXT: vsetvli a0, zero, e16, m8, ta, ma 406; CHECK-ZVBB-NEXT: vcpop.v v8, v8 407; CHECK-ZVBB-NEXT: ret 408 %a = call <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16> %va) 409 ret <vscale x 32 x i16> %a 410} 411declare <vscale x 32 x i16> @llvm.ctpop.nxv32i16(<vscale x 32 x i16>) 412 413define <vscale x 1 x i32> @ctpop_nxv1i32(<vscale x 1 x i32> %va) { 414; CHECK-LABEL: ctpop_nxv1i32: 415; CHECK: # %bb.0: 416; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 417; CHECK-NEXT: vsrl.vi v9, v8, 1 418; CHECK-NEXT: lui a0, 349525 419; CHECK-NEXT: addi a0, a0, 1365 420; CHECK-NEXT: vand.vx v9, v9, a0 421; CHECK-NEXT: lui a0, 209715 422; CHECK-NEXT: addi a0, a0, 819 423; CHECK-NEXT: vsub.vv v8, v8, v9 424; CHECK-NEXT: vand.vx v9, v8, a0 425; CHECK-NEXT: vsrl.vi v8, v8, 2 426; CHECK-NEXT: vand.vx v8, v8, a0 427; CHECK-NEXT: lui a0, 61681 428; CHECK-NEXT: addi a0, a0, -241 429; CHECK-NEXT: vadd.vv v8, v9, v8 430; CHECK-NEXT: vsrl.vi v9, v8, 4 431; CHECK-NEXT: vadd.vv v8, v8, v9 432; CHECK-NEXT: vand.vx v8, v8, a0 433; CHECK-NEXT: lui a0, 4112 434; CHECK-NEXT: addi a0, a0, 257 435; CHECK-NEXT: vmul.vx v8, v8, a0 436; CHECK-NEXT: vsrl.vi v8, v8, 24 437; CHECK-NEXT: ret 438; 439; CHECK-ZVBB-LABEL: ctpop_nxv1i32: 440; CHECK-ZVBB: # %bb.0: 441; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, mf2, ta, ma 442; CHECK-ZVBB-NEXT: vcpop.v v8, v8 443; CHECK-ZVBB-NEXT: ret 444 %a = call <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32> %va) 445 ret <vscale x 1 x i32> %a 446} 447declare <vscale x 1 x i32> @llvm.ctpop.nxv1i32(<vscale x 1 x i32>) 448 449define <vscale x 2 x i32> @ctpop_nxv2i32(<vscale x 2 x i32> %va) { 450; CHECK-LABEL: ctpop_nxv2i32: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma 453; CHECK-NEXT: vsrl.vi v9, v8, 1 454; CHECK-NEXT: lui a0, 349525 455; CHECK-NEXT: addi a0, a0, 1365 456; CHECK-NEXT: vand.vx v9, v9, a0 457; CHECK-NEXT: lui a0, 209715 458; CHECK-NEXT: addi a0, a0, 819 459; CHECK-NEXT: vsub.vv v8, v8, v9 460; CHECK-NEXT: vand.vx v9, v8, a0 461; CHECK-NEXT: vsrl.vi v8, v8, 2 462; CHECK-NEXT: vand.vx v8, v8, a0 463; CHECK-NEXT: lui a0, 61681 464; CHECK-NEXT: addi a0, a0, -241 465; CHECK-NEXT: vadd.vv v8, v9, v8 466; CHECK-NEXT: vsrl.vi v9, v8, 4 467; CHECK-NEXT: vadd.vv v8, v8, v9 468; CHECK-NEXT: vand.vx v8, v8, a0 469; CHECK-NEXT: lui a0, 4112 470; CHECK-NEXT: addi a0, a0, 257 471; CHECK-NEXT: vmul.vx v8, v8, a0 472; CHECK-NEXT: vsrl.vi v8, v8, 24 473; CHECK-NEXT: ret 474; 475; CHECK-ZVBB-LABEL: ctpop_nxv2i32: 476; CHECK-ZVBB: # %bb.0: 477; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma 478; CHECK-ZVBB-NEXT: vcpop.v v8, v8 479; CHECK-ZVBB-NEXT: ret 480 %a = call <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32> %va) 481 ret <vscale x 2 x i32> %a 482} 483declare <vscale x 2 x i32> @llvm.ctpop.nxv2i32(<vscale x 2 x i32>) 484 485define <vscale x 4 x i32> @ctpop_nxv4i32(<vscale x 4 x i32> %va) { 486; CHECK-LABEL: ctpop_nxv4i32: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma 489; CHECK-NEXT: vsrl.vi v10, v8, 1 490; CHECK-NEXT: lui a0, 349525 491; CHECK-NEXT: addi a0, a0, 1365 492; CHECK-NEXT: vand.vx v10, v10, a0 493; CHECK-NEXT: lui a0, 209715 494; CHECK-NEXT: addi a0, a0, 819 495; CHECK-NEXT: vsub.vv v8, v8, v10 496; CHECK-NEXT: vand.vx v10, v8, a0 497; CHECK-NEXT: vsrl.vi v8, v8, 2 498; CHECK-NEXT: vand.vx v8, v8, a0 499; CHECK-NEXT: lui a0, 61681 500; CHECK-NEXT: addi a0, a0, -241 501; CHECK-NEXT: vadd.vv v8, v10, v8 502; CHECK-NEXT: vsrl.vi v10, v8, 4 503; CHECK-NEXT: vadd.vv v8, v8, v10 504; CHECK-NEXT: vand.vx v8, v8, a0 505; CHECK-NEXT: lui a0, 4112 506; CHECK-NEXT: addi a0, a0, 257 507; CHECK-NEXT: vmul.vx v8, v8, a0 508; CHECK-NEXT: vsrl.vi v8, v8, 24 509; CHECK-NEXT: ret 510; 511; CHECK-ZVBB-LABEL: ctpop_nxv4i32: 512; CHECK-ZVBB: # %bb.0: 513; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m2, ta, ma 514; CHECK-ZVBB-NEXT: vcpop.v v8, v8 515; CHECK-ZVBB-NEXT: ret 516 %a = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> %va) 517 ret <vscale x 4 x i32> %a 518} 519declare <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32>) 520 521define <vscale x 8 x i32> @ctpop_nxv8i32(<vscale x 8 x i32> %va) { 522; CHECK-LABEL: ctpop_nxv8i32: 523; CHECK: # %bb.0: 524; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma 525; CHECK-NEXT: vsrl.vi v12, v8, 1 526; CHECK-NEXT: lui a0, 349525 527; CHECK-NEXT: addi a0, a0, 1365 528; CHECK-NEXT: vand.vx v12, v12, a0 529; CHECK-NEXT: lui a0, 209715 530; CHECK-NEXT: addi a0, a0, 819 531; CHECK-NEXT: vsub.vv v8, v8, v12 532; CHECK-NEXT: vand.vx v12, v8, a0 533; CHECK-NEXT: vsrl.vi v8, v8, 2 534; CHECK-NEXT: vand.vx v8, v8, a0 535; CHECK-NEXT: lui a0, 61681 536; CHECK-NEXT: addi a0, a0, -241 537; CHECK-NEXT: vadd.vv v8, v12, v8 538; CHECK-NEXT: vsrl.vi v12, v8, 4 539; CHECK-NEXT: vadd.vv v8, v8, v12 540; CHECK-NEXT: vand.vx v8, v8, a0 541; CHECK-NEXT: lui a0, 4112 542; CHECK-NEXT: addi a0, a0, 257 543; CHECK-NEXT: vmul.vx v8, v8, a0 544; CHECK-NEXT: vsrl.vi v8, v8, 24 545; CHECK-NEXT: ret 546; 547; CHECK-ZVBB-LABEL: ctpop_nxv8i32: 548; CHECK-ZVBB: # %bb.0: 549; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m4, ta, ma 550; CHECK-ZVBB-NEXT: vcpop.v v8, v8 551; CHECK-ZVBB-NEXT: ret 552 %a = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> %va) 553 ret <vscale x 8 x i32> %a 554} 555declare <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32>) 556 557define <vscale x 16 x i32> @ctpop_nxv16i32(<vscale x 16 x i32> %va) { 558; CHECK-LABEL: ctpop_nxv16i32: 559; CHECK: # %bb.0: 560; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 561; CHECK-NEXT: vsrl.vi v16, v8, 1 562; CHECK-NEXT: lui a0, 349525 563; CHECK-NEXT: addi a0, a0, 1365 564; CHECK-NEXT: vand.vx v16, v16, a0 565; CHECK-NEXT: lui a0, 209715 566; CHECK-NEXT: addi a0, a0, 819 567; CHECK-NEXT: vsub.vv v8, v8, v16 568; CHECK-NEXT: vand.vx v16, v8, a0 569; CHECK-NEXT: vsrl.vi v8, v8, 2 570; CHECK-NEXT: vand.vx v8, v8, a0 571; CHECK-NEXT: lui a0, 61681 572; CHECK-NEXT: addi a0, a0, -241 573; CHECK-NEXT: vadd.vv v8, v16, v8 574; CHECK-NEXT: vsrl.vi v16, v8, 4 575; CHECK-NEXT: vadd.vv v8, v8, v16 576; CHECK-NEXT: vand.vx v8, v8, a0 577; CHECK-NEXT: lui a0, 4112 578; CHECK-NEXT: addi a0, a0, 257 579; CHECK-NEXT: vmul.vx v8, v8, a0 580; CHECK-NEXT: vsrl.vi v8, v8, 24 581; CHECK-NEXT: ret 582; 583; CHECK-ZVBB-LABEL: ctpop_nxv16i32: 584; CHECK-ZVBB: # %bb.0: 585; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 586; CHECK-ZVBB-NEXT: vcpop.v v8, v8 587; CHECK-ZVBB-NEXT: ret 588 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va) 589 ret <vscale x 16 x i32> %a 590} 591 592; We always emit vcpop.v for the scalable vector 593define <vscale x 16 x i1> @ctpop_nxv16i32_ult_two(<vscale x 16 x i32> %va) { 594; CHECK-LABEL: ctpop_nxv16i32_ult_two: 595; CHECK: # %bb.0: 596; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 597; CHECK-NEXT: vadd.vi v16, v8, -1 598; CHECK-NEXT: vand.vv v8, v8, v16 599; CHECK-NEXT: vmseq.vi v0, v8, 0 600; CHECK-NEXT: ret 601; 602; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ult_two: 603; CHECK-ZVBB: # %bb.0: 604; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 605; CHECK-ZVBB-NEXT: vcpop.v v8, v8 606; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1 607; CHECK-ZVBB-NEXT: ret 608 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va) 609 %cmp = icmp ult <vscale x 16 x i32> %a, splat (i32 2) 610 ret <vscale x 16 x i1> %cmp 611} 612 613define <vscale x 16 x i1> @ctpop_nxv16i32_ugt_one(<vscale x 16 x i32> %va) { 614; CHECK-LABEL: ctpop_nxv16i32_ugt_one: 615; CHECK: # %bb.0: 616; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 617; CHECK-NEXT: vadd.vi v16, v8, -1 618; CHECK-NEXT: vand.vv v8, v8, v16 619; CHECK-NEXT: vmsne.vi v0, v8, 0 620; CHECK-NEXT: ret 621; 622; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ugt_one: 623; CHECK-ZVBB: # %bb.0: 624; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 625; CHECK-ZVBB-NEXT: vcpop.v v8, v8 626; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1 627; CHECK-ZVBB-NEXT: ret 628 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va) 629 %cmp = icmp ugt <vscale x 16 x i32> %a, splat (i32 1) 630 ret <vscale x 16 x i1> %cmp 631} 632 633define <vscale x 16 x i1> @ctpop_nxv16i32_eq_one(<vscale x 16 x i32> %va) { 634; CHECK-LABEL: ctpop_nxv16i32_eq_one: 635; CHECK: # %bb.0: 636; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 637; CHECK-NEXT: vadd.vi v16, v8, -1 638; CHECK-NEXT: vxor.vv v8, v8, v16 639; CHECK-NEXT: vmsltu.vv v0, v16, v8 640; CHECK-NEXT: ret 641; 642; CHECK-ZVBB-LABEL: ctpop_nxv16i32_eq_one: 643; CHECK-ZVBB: # %bb.0: 644; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 645; CHECK-ZVBB-NEXT: vcpop.v v8, v8 646; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1 647; CHECK-ZVBB-NEXT: ret 648 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va) 649 %cmp = icmp eq <vscale x 16 x i32> %a, splat (i32 1) 650 ret <vscale x 16 x i1> %cmp 651} 652 653define <vscale x 16 x i1> @ctpop_nxv16i32_ne_one(<vscale x 16 x i32> %va) { 654; CHECK-LABEL: ctpop_nxv16i32_ne_one: 655; CHECK: # %bb.0: 656; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 657; CHECK-NEXT: vadd.vi v16, v8, -1 658; CHECK-NEXT: vxor.vv v8, v8, v16 659; CHECK-NEXT: vmsleu.vv v0, v8, v16 660; CHECK-NEXT: ret 661; 662; CHECK-ZVBB-LABEL: ctpop_nxv16i32_ne_one: 663; CHECK-ZVBB: # %bb.0: 664; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m8, ta, ma 665; CHECK-ZVBB-NEXT: vcpop.v v8, v8 666; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1 667; CHECK-ZVBB-NEXT: ret 668 %a = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> %va) 669 %cmp = icmp ne <vscale x 16 x i32> %a, splat (i32 1) 670 ret <vscale x 16 x i1> %cmp 671} 672 673declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>) 674 675define <vscale x 1 x i64> @ctpop_nxv1i64(<vscale x 1 x i64> %va) { 676; RV32-LABEL: ctpop_nxv1i64: 677; RV32: # %bb.0: 678; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 679; RV32-NEXT: vsrl.vi v9, v8, 1 680; RV32-NEXT: lui a0, 349525 681; RV32-NEXT: addi a0, a0, 1365 682; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 683; RV32-NEXT: vmv.v.x v10, a0 684; RV32-NEXT: lui a0, 209715 685; RV32-NEXT: addi a0, a0, 819 686; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma 687; RV32-NEXT: vand.vv v9, v9, v10 688; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 689; RV32-NEXT: vmv.v.x v10, a0 690; RV32-NEXT: lui a0, 61681 691; RV32-NEXT: addi a0, a0, -241 692; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma 693; RV32-NEXT: vsub.vv v8, v8, v9 694; RV32-NEXT: vand.vv v9, v8, v10 695; RV32-NEXT: vsrl.vi v8, v8, 2 696; RV32-NEXT: vand.vv v8, v8, v10 697; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 698; RV32-NEXT: vmv.v.x v10, a0 699; RV32-NEXT: lui a0, 4112 700; RV32-NEXT: addi a0, a0, 257 701; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma 702; RV32-NEXT: vadd.vv v8, v9, v8 703; RV32-NEXT: vsrl.vi v9, v8, 4 704; RV32-NEXT: vadd.vv v8, v8, v9 705; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma 706; RV32-NEXT: vmv.v.x v9, a0 707; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma 708; RV32-NEXT: vand.vv v8, v8, v10 709; RV32-NEXT: vmul.vv v8, v8, v9 710; RV32-NEXT: li a0, 56 711; RV32-NEXT: vsrl.vx v8, v8, a0 712; RV32-NEXT: ret 713; 714; RV64-LABEL: ctpop_nxv1i64: 715; RV64: # %bb.0: 716; RV64-NEXT: lui a0, 349525 717; RV64-NEXT: lui a1, 209715 718; RV64-NEXT: lui a2, 61681 719; RV64-NEXT: lui a3, 4112 720; RV64-NEXT: addiw a0, a0, 1365 721; RV64-NEXT: addiw a1, a1, 819 722; RV64-NEXT: addiw a2, a2, -241 723; RV64-NEXT: addiw a3, a3, 257 724; RV64-NEXT: slli a4, a0, 32 725; RV64-NEXT: add a0, a0, a4 726; RV64-NEXT: slli a4, a1, 32 727; RV64-NEXT: add a1, a1, a4 728; RV64-NEXT: slli a4, a2, 32 729; RV64-NEXT: add a2, a2, a4 730; RV64-NEXT: slli a4, a3, 32 731; RV64-NEXT: add a3, a3, a4 732; RV64-NEXT: vsetvli a4, zero, e64, m1, ta, ma 733; RV64-NEXT: vsrl.vi v9, v8, 1 734; RV64-NEXT: vand.vx v9, v9, a0 735; RV64-NEXT: vsub.vv v8, v8, v9 736; RV64-NEXT: vand.vx v9, v8, a1 737; RV64-NEXT: vsrl.vi v8, v8, 2 738; RV64-NEXT: vand.vx v8, v8, a1 739; RV64-NEXT: vadd.vv v8, v9, v8 740; RV64-NEXT: vsrl.vi v9, v8, 4 741; RV64-NEXT: vadd.vv v8, v8, v9 742; RV64-NEXT: vand.vx v8, v8, a2 743; RV64-NEXT: vmul.vx v8, v8, a3 744; RV64-NEXT: li a0, 56 745; RV64-NEXT: vsrl.vx v8, v8, a0 746; RV64-NEXT: ret 747; 748; CHECK-ZVBB-LABEL: ctpop_nxv1i64: 749; CHECK-ZVBB: # %bb.0: 750; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m1, ta, ma 751; CHECK-ZVBB-NEXT: vcpop.v v8, v8 752; CHECK-ZVBB-NEXT: ret 753 %a = call <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64> %va) 754 ret <vscale x 1 x i64> %a 755} 756declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>) 757 758define <vscale x 2 x i64> @ctpop_nxv2i64(<vscale x 2 x i64> %va) { 759; RV32-LABEL: ctpop_nxv2i64: 760; RV32: # %bb.0: 761; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 762; RV32-NEXT: vsrl.vi v10, v8, 1 763; RV32-NEXT: lui a0, 349525 764; RV32-NEXT: addi a0, a0, 1365 765; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 766; RV32-NEXT: vmv.v.x v12, a0 767; RV32-NEXT: lui a0, 209715 768; RV32-NEXT: addi a0, a0, 819 769; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma 770; RV32-NEXT: vand.vv v10, v10, v12 771; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 772; RV32-NEXT: vmv.v.x v12, a0 773; RV32-NEXT: lui a0, 61681 774; RV32-NEXT: addi a0, a0, -241 775; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma 776; RV32-NEXT: vsub.vv v8, v8, v10 777; RV32-NEXT: vand.vv v10, v8, v12 778; RV32-NEXT: vsrl.vi v8, v8, 2 779; RV32-NEXT: vand.vv v8, v8, v12 780; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 781; RV32-NEXT: vmv.v.x v12, a0 782; RV32-NEXT: lui a0, 4112 783; RV32-NEXT: addi a0, a0, 257 784; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma 785; RV32-NEXT: vadd.vv v8, v10, v8 786; RV32-NEXT: vsrl.vi v10, v8, 4 787; RV32-NEXT: vadd.vv v8, v8, v10 788; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma 789; RV32-NEXT: vmv.v.x v10, a0 790; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma 791; RV32-NEXT: vand.vv v8, v8, v12 792; RV32-NEXT: vmul.vv v8, v8, v10 793; RV32-NEXT: li a0, 56 794; RV32-NEXT: vsrl.vx v8, v8, a0 795; RV32-NEXT: ret 796; 797; RV64-LABEL: ctpop_nxv2i64: 798; RV64: # %bb.0: 799; RV64-NEXT: lui a0, 349525 800; RV64-NEXT: lui a1, 209715 801; RV64-NEXT: lui a2, 61681 802; RV64-NEXT: lui a3, 4112 803; RV64-NEXT: addiw a0, a0, 1365 804; RV64-NEXT: addiw a1, a1, 819 805; RV64-NEXT: addiw a2, a2, -241 806; RV64-NEXT: addiw a3, a3, 257 807; RV64-NEXT: slli a4, a0, 32 808; RV64-NEXT: add a0, a0, a4 809; RV64-NEXT: slli a4, a1, 32 810; RV64-NEXT: add a1, a1, a4 811; RV64-NEXT: slli a4, a2, 32 812; RV64-NEXT: add a2, a2, a4 813; RV64-NEXT: slli a4, a3, 32 814; RV64-NEXT: add a3, a3, a4 815; RV64-NEXT: vsetvli a4, zero, e64, m2, ta, ma 816; RV64-NEXT: vsrl.vi v10, v8, 1 817; RV64-NEXT: vand.vx v10, v10, a0 818; RV64-NEXT: vsub.vv v8, v8, v10 819; RV64-NEXT: vand.vx v10, v8, a1 820; RV64-NEXT: vsrl.vi v8, v8, 2 821; RV64-NEXT: vand.vx v8, v8, a1 822; RV64-NEXT: vadd.vv v8, v10, v8 823; RV64-NEXT: vsrl.vi v10, v8, 4 824; RV64-NEXT: vadd.vv v8, v8, v10 825; RV64-NEXT: vand.vx v8, v8, a2 826; RV64-NEXT: vmul.vx v8, v8, a3 827; RV64-NEXT: li a0, 56 828; RV64-NEXT: vsrl.vx v8, v8, a0 829; RV64-NEXT: ret 830; 831; CHECK-ZVBB-LABEL: ctpop_nxv2i64: 832; CHECK-ZVBB: # %bb.0: 833; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma 834; CHECK-ZVBB-NEXT: vcpop.v v8, v8 835; CHECK-ZVBB-NEXT: ret 836 %a = call <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64> %va) 837 ret <vscale x 2 x i64> %a 838} 839declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>) 840 841define <vscale x 4 x i64> @ctpop_nxv4i64(<vscale x 4 x i64> %va) { 842; RV32-LABEL: ctpop_nxv4i64: 843; RV32: # %bb.0: 844; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 845; RV32-NEXT: vsrl.vi v12, v8, 1 846; RV32-NEXT: lui a0, 349525 847; RV32-NEXT: addi a0, a0, 1365 848; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 849; RV32-NEXT: vmv.v.x v16, a0 850; RV32-NEXT: lui a0, 209715 851; RV32-NEXT: addi a0, a0, 819 852; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma 853; RV32-NEXT: vand.vv v12, v12, v16 854; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 855; RV32-NEXT: vmv.v.x v16, a0 856; RV32-NEXT: lui a0, 61681 857; RV32-NEXT: addi a0, a0, -241 858; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma 859; RV32-NEXT: vsub.vv v8, v8, v12 860; RV32-NEXT: vand.vv v12, v8, v16 861; RV32-NEXT: vsrl.vi v8, v8, 2 862; RV32-NEXT: vand.vv v8, v8, v16 863; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 864; RV32-NEXT: vmv.v.x v16, a0 865; RV32-NEXT: lui a0, 4112 866; RV32-NEXT: addi a0, a0, 257 867; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma 868; RV32-NEXT: vadd.vv v8, v12, v8 869; RV32-NEXT: vsrl.vi v12, v8, 4 870; RV32-NEXT: vadd.vv v8, v8, v12 871; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma 872; RV32-NEXT: vmv.v.x v12, a0 873; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma 874; RV32-NEXT: vand.vv v8, v8, v16 875; RV32-NEXT: vmul.vv v8, v8, v12 876; RV32-NEXT: li a0, 56 877; RV32-NEXT: vsrl.vx v8, v8, a0 878; RV32-NEXT: ret 879; 880; RV64-LABEL: ctpop_nxv4i64: 881; RV64: # %bb.0: 882; RV64-NEXT: lui a0, 349525 883; RV64-NEXT: lui a1, 209715 884; RV64-NEXT: lui a2, 61681 885; RV64-NEXT: lui a3, 4112 886; RV64-NEXT: addiw a0, a0, 1365 887; RV64-NEXT: addiw a1, a1, 819 888; RV64-NEXT: addiw a2, a2, -241 889; RV64-NEXT: addiw a3, a3, 257 890; RV64-NEXT: slli a4, a0, 32 891; RV64-NEXT: add a0, a0, a4 892; RV64-NEXT: slli a4, a1, 32 893; RV64-NEXT: add a1, a1, a4 894; RV64-NEXT: slli a4, a2, 32 895; RV64-NEXT: add a2, a2, a4 896; RV64-NEXT: slli a4, a3, 32 897; RV64-NEXT: add a3, a3, a4 898; RV64-NEXT: vsetvli a4, zero, e64, m4, ta, ma 899; RV64-NEXT: vsrl.vi v12, v8, 1 900; RV64-NEXT: vand.vx v12, v12, a0 901; RV64-NEXT: vsub.vv v8, v8, v12 902; RV64-NEXT: vand.vx v12, v8, a1 903; RV64-NEXT: vsrl.vi v8, v8, 2 904; RV64-NEXT: vand.vx v8, v8, a1 905; RV64-NEXT: vadd.vv v8, v12, v8 906; RV64-NEXT: vsrl.vi v12, v8, 4 907; RV64-NEXT: vadd.vv v8, v8, v12 908; RV64-NEXT: vand.vx v8, v8, a2 909; RV64-NEXT: vmul.vx v8, v8, a3 910; RV64-NEXT: li a0, 56 911; RV64-NEXT: vsrl.vx v8, v8, a0 912; RV64-NEXT: ret 913; 914; CHECK-ZVBB-LABEL: ctpop_nxv4i64: 915; CHECK-ZVBB: # %bb.0: 916; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m4, ta, ma 917; CHECK-ZVBB-NEXT: vcpop.v v8, v8 918; CHECK-ZVBB-NEXT: ret 919 %a = call <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64> %va) 920 ret <vscale x 4 x i64> %a 921} 922declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>) 923 924define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) { 925; RV32-LABEL: ctpop_nxv8i64: 926; RV32: # %bb.0: 927; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 928; RV32-NEXT: vsrl.vi v16, v8, 1 929; RV32-NEXT: lui a0, 349525 930; RV32-NEXT: addi a0, a0, 1365 931; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 932; RV32-NEXT: vmv.v.x v24, a0 933; RV32-NEXT: lui a0, 209715 934; RV32-NEXT: addi a0, a0, 819 935; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma 936; RV32-NEXT: vand.vv v24, v16, v24 937; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 938; RV32-NEXT: vmv.v.x v16, a0 939; RV32-NEXT: lui a0, 61681 940; RV32-NEXT: addi a0, a0, -241 941; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma 942; RV32-NEXT: vsub.vv v8, v8, v24 943; RV32-NEXT: vand.vv v24, v8, v16 944; RV32-NEXT: vsrl.vi v8, v8, 2 945; RV32-NEXT: vand.vv v8, v8, v16 946; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 947; RV32-NEXT: vmv.v.x v16, a0 948; RV32-NEXT: lui a0, 4112 949; RV32-NEXT: addi a0, a0, 257 950; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma 951; RV32-NEXT: vadd.vv v8, v24, v8 952; RV32-NEXT: vsrl.vi v24, v8, 4 953; RV32-NEXT: vadd.vv v8, v8, v24 954; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma 955; RV32-NEXT: vmv.v.x v24, a0 956; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma 957; RV32-NEXT: vand.vv v8, v8, v16 958; RV32-NEXT: vmul.vv v8, v8, v24 959; RV32-NEXT: li a0, 56 960; RV32-NEXT: vsrl.vx v8, v8, a0 961; RV32-NEXT: ret 962; 963; RV64-LABEL: ctpop_nxv8i64: 964; RV64: # %bb.0: 965; RV64-NEXT: lui a0, 349525 966; RV64-NEXT: lui a1, 209715 967; RV64-NEXT: lui a2, 61681 968; RV64-NEXT: lui a3, 4112 969; RV64-NEXT: addiw a0, a0, 1365 970; RV64-NEXT: addiw a1, a1, 819 971; RV64-NEXT: addiw a2, a2, -241 972; RV64-NEXT: addiw a3, a3, 257 973; RV64-NEXT: slli a4, a0, 32 974; RV64-NEXT: add a0, a0, a4 975; RV64-NEXT: slli a4, a1, 32 976; RV64-NEXT: add a1, a1, a4 977; RV64-NEXT: slli a4, a2, 32 978; RV64-NEXT: add a2, a2, a4 979; RV64-NEXT: slli a4, a3, 32 980; RV64-NEXT: add a3, a3, a4 981; RV64-NEXT: vsetvli a4, zero, e64, m8, ta, ma 982; RV64-NEXT: vsrl.vi v16, v8, 1 983; RV64-NEXT: vand.vx v16, v16, a0 984; RV64-NEXT: vsub.vv v8, v8, v16 985; RV64-NEXT: vand.vx v16, v8, a1 986; RV64-NEXT: vsrl.vi v8, v8, 2 987; RV64-NEXT: vand.vx v8, v8, a1 988; RV64-NEXT: vadd.vv v8, v16, v8 989; RV64-NEXT: vsrl.vi v16, v8, 4 990; RV64-NEXT: vadd.vv v8, v8, v16 991; RV64-NEXT: vand.vx v8, v8, a2 992; RV64-NEXT: vmul.vx v8, v8, a3 993; RV64-NEXT: li a0, 56 994; RV64-NEXT: vsrl.vx v8, v8, a0 995; RV64-NEXT: ret 996; 997; CHECK-ZVBB-LABEL: ctpop_nxv8i64: 998; CHECK-ZVBB: # %bb.0: 999; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1000; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1001; CHECK-ZVBB-NEXT: ret 1002 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va) 1003 ret <vscale x 8 x i64> %a 1004} 1005 1006; We always emit vcpop.v for the scalable vector 1007define <vscale x 8 x i1> @ctpop_nxv8i64_ult_two(<vscale x 8 x i64> %va) { 1008; CHECK-LABEL: ctpop_nxv8i64_ult_two: 1009; CHECK: # %bb.0: 1010; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1011; CHECK-NEXT: vadd.vi v16, v8, -1 1012; CHECK-NEXT: vand.vv v8, v8, v16 1013; CHECK-NEXT: vmseq.vi v0, v8, 0 1014; CHECK-NEXT: ret 1015; 1016; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ult_two: 1017; CHECK-ZVBB: # %bb.0: 1018; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1019; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1020; CHECK-ZVBB-NEXT: vmsleu.vi v0, v8, 1 1021; CHECK-ZVBB-NEXT: ret 1022 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va) 1023 %cmp = icmp ult <vscale x 8 x i64> %a, splat (i64 2) 1024 ret <vscale x 8 x i1> %cmp 1025} 1026 1027define <vscale x 8 x i1> @ctpop_nxv8i64_ugt_one(<vscale x 8 x i64> %va) { 1028; CHECK-LABEL: ctpop_nxv8i64_ugt_one: 1029; CHECK: # %bb.0: 1030; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1031; CHECK-NEXT: vadd.vi v16, v8, -1 1032; CHECK-NEXT: vand.vv v8, v8, v16 1033; CHECK-NEXT: vmsne.vi v0, v8, 0 1034; CHECK-NEXT: ret 1035; 1036; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ugt_one: 1037; CHECK-ZVBB: # %bb.0: 1038; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1039; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1040; CHECK-ZVBB-NEXT: vmsgtu.vi v0, v8, 1 1041; CHECK-ZVBB-NEXT: ret 1042 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va) 1043 %cmp = icmp ugt <vscale x 8 x i64> %a, splat (i64 1) 1044 ret <vscale x 8 x i1> %cmp 1045} 1046 1047define <vscale x 8 x i1> @ctpop_nxv8i64_eq_one(<vscale x 8 x i64> %va) { 1048; CHECK-LABEL: ctpop_nxv8i64_eq_one: 1049; CHECK: # %bb.0: 1050; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1051; CHECK-NEXT: vadd.vi v16, v8, -1 1052; CHECK-NEXT: vxor.vv v8, v8, v16 1053; CHECK-NEXT: vmsltu.vv v0, v16, v8 1054; CHECK-NEXT: ret 1055; 1056; CHECK-ZVBB-LABEL: ctpop_nxv8i64_eq_one: 1057; CHECK-ZVBB: # %bb.0: 1058; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1059; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1060; CHECK-ZVBB-NEXT: vmseq.vi v0, v8, 1 1061; CHECK-ZVBB-NEXT: ret 1062 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va) 1063 %cmp = icmp eq <vscale x 8 x i64> %a, splat (i64 1) 1064 ret <vscale x 8 x i1> %cmp 1065} 1066 1067define <vscale x 8 x i1> @ctpop_nxv8i64_ne_one(<vscale x 8 x i64> %va) { 1068; CHECK-LABEL: ctpop_nxv8i64_ne_one: 1069; CHECK: # %bb.0: 1070; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1071; CHECK-NEXT: vadd.vi v16, v8, -1 1072; CHECK-NEXT: vxor.vv v8, v8, v16 1073; CHECK-NEXT: vmsleu.vv v0, v8, v16 1074; CHECK-NEXT: ret 1075; 1076; CHECK-ZVBB-LABEL: ctpop_nxv8i64_ne_one: 1077; CHECK-ZVBB: # %bb.0: 1078; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m8, ta, ma 1079; CHECK-ZVBB-NEXT: vcpop.v v8, v8 1080; CHECK-ZVBB-NEXT: vmsne.vi v0, v8, 1 1081; CHECK-ZVBB-NEXT: ret 1082 %a = call <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64> %va) 1083 %cmp = icmp ne <vscale x 8 x i64> %a, splat (i64 1) 1084 ret <vscale x 8 x i1> %cmp 1085} 1086 1087declare <vscale x 8 x i64> @llvm.ctpop.nxv8i64(<vscale x 8 x i64>) 1088