1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -target-abi=ilp32d \ 3; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 4; RUN: llc -mtriple=riscv64 -mattr=+v,+m -target-abi=lp64d \ 5; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7declare <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8>, <2 x i1>, i32) 8 9define <2 x i8> @vp_ctpop_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { 10; CHECK-LABEL: vp_ctpop_v2i8: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 13; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 14; CHECK-NEXT: li a0, 85 15; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 16; CHECK-NEXT: li a0, 51 17; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 18; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 19; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 20; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 21; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 22; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 23; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 24; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 25; CHECK-NEXT: ret 26 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl) 27 ret <2 x i8> %v 28} 29 30define <2 x i8> @vp_ctpop_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { 31; CHECK-LABEL: vp_ctpop_v2i8_unmasked: 32; CHECK: # %bb.0: 33; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma 34; CHECK-NEXT: vsrl.vi v9, v8, 1 35; CHECK-NEXT: li a0, 85 36; CHECK-NEXT: vand.vx v9, v9, a0 37; CHECK-NEXT: li a0, 51 38; CHECK-NEXT: vsub.vv v8, v8, v9 39; CHECK-NEXT: vand.vx v9, v8, a0 40; CHECK-NEXT: vsrl.vi v8, v8, 2 41; CHECK-NEXT: vand.vx v8, v8, a0 42; CHECK-NEXT: vadd.vv v8, v9, v8 43; CHECK-NEXT: vsrl.vi v9, v8, 4 44; CHECK-NEXT: vadd.vv v8, v8, v9 45; CHECK-NEXT: vand.vi v8, v8, 15 46; CHECK-NEXT: ret 47 %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) 48 ret <2 x i8> %v 49} 50 51declare <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8>, <4 x i1>, i32) 52 53define <4 x i8> @vp_ctpop_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { 54; CHECK-LABEL: vp_ctpop_v4i8: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 57; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 58; CHECK-NEXT: li a0, 85 59; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 60; CHECK-NEXT: li a0, 51 61; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 62; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 63; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 64; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 65; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 66; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 67; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 68; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 69; CHECK-NEXT: ret 70 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl) 71 ret <4 x i8> %v 72} 73 74define <4 x i8> @vp_ctpop_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { 75; CHECK-LABEL: vp_ctpop_v4i8_unmasked: 76; CHECK: # %bb.0: 77; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma 78; CHECK-NEXT: vsrl.vi v9, v8, 1 79; CHECK-NEXT: li a0, 85 80; CHECK-NEXT: vand.vx v9, v9, a0 81; CHECK-NEXT: li a0, 51 82; CHECK-NEXT: vsub.vv v8, v8, v9 83; CHECK-NEXT: vand.vx v9, v8, a0 84; CHECK-NEXT: vsrl.vi v8, v8, 2 85; CHECK-NEXT: vand.vx v8, v8, a0 86; CHECK-NEXT: vadd.vv v8, v9, v8 87; CHECK-NEXT: vsrl.vi v9, v8, 4 88; CHECK-NEXT: vadd.vv v8, v8, v9 89; CHECK-NEXT: vand.vi v8, v8, 15 90; CHECK-NEXT: ret 91 %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) 92 ret <4 x i8> %v 93} 94 95declare <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8>, <8 x i1>, i32) 96 97define <8 x i8> @vp_ctpop_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { 98; CHECK-LABEL: vp_ctpop_v8i8: 99; CHECK: # %bb.0: 100; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 101; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 102; CHECK-NEXT: li a0, 85 103; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 104; CHECK-NEXT: li a0, 51 105; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 106; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 107; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 108; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 109; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 110; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 111; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 112; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 113; CHECK-NEXT: ret 114 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl) 115 ret <8 x i8> %v 116} 117 118define <8 x i8> @vp_ctpop_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { 119; CHECK-LABEL: vp_ctpop_v8i8_unmasked: 120; CHECK: # %bb.0: 121; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma 122; CHECK-NEXT: vsrl.vi v9, v8, 1 123; CHECK-NEXT: li a0, 85 124; CHECK-NEXT: vand.vx v9, v9, a0 125; CHECK-NEXT: li a0, 51 126; CHECK-NEXT: vsub.vv v8, v8, v9 127; CHECK-NEXT: vand.vx v9, v8, a0 128; CHECK-NEXT: vsrl.vi v8, v8, 2 129; CHECK-NEXT: vand.vx v8, v8, a0 130; CHECK-NEXT: vadd.vv v8, v9, v8 131; CHECK-NEXT: vsrl.vi v9, v8, 4 132; CHECK-NEXT: vadd.vv v8, v8, v9 133; CHECK-NEXT: vand.vi v8, v8, 15 134; CHECK-NEXT: ret 135 %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl) 136 ret <8 x i8> %v 137} 138 139declare <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8>, <16 x i1>, i32) 140 141define <16 x i8> @vp_ctpop_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { 142; CHECK-LABEL: vp_ctpop_v16i8: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 145; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 146; CHECK-NEXT: li a0, 85 147; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 148; CHECK-NEXT: li a0, 51 149; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 150; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 151; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 152; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 153; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 154; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 155; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 156; CHECK-NEXT: vand.vi v8, v8, 15, v0.t 157; CHECK-NEXT: ret 158 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl) 159 ret <16 x i8> %v 160} 161 162define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { 163; CHECK-LABEL: vp_ctpop_v16i8_unmasked: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma 166; CHECK-NEXT: vsrl.vi v9, v8, 1 167; CHECK-NEXT: li a0, 85 168; CHECK-NEXT: vand.vx v9, v9, a0 169; CHECK-NEXT: li a0, 51 170; CHECK-NEXT: vsub.vv v8, v8, v9 171; CHECK-NEXT: vand.vx v9, v8, a0 172; CHECK-NEXT: vsrl.vi v8, v8, 2 173; CHECK-NEXT: vand.vx v8, v8, a0 174; CHECK-NEXT: vadd.vv v8, v9, v8 175; CHECK-NEXT: vsrl.vi v9, v8, 4 176; CHECK-NEXT: vadd.vv v8, v8, v9 177; CHECK-NEXT: vand.vi v8, v8, 15 178; CHECK-NEXT: ret 179 %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl) 180 ret <16 x i8> %v 181} 182 183declare <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16>, <2 x i1>, i32) 184 185define <2 x i16> @vp_ctpop_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { 186; CHECK-LABEL: vp_ctpop_v2i16: 187; CHECK: # %bb.0: 188; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 189; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 190; CHECK-NEXT: lui a0, 5 191; CHECK-NEXT: addi a0, a0, 1365 192; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 193; CHECK-NEXT: lui a0, 3 194; CHECK-NEXT: addi a0, a0, 819 195; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 196; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 197; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 198; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 199; CHECK-NEXT: lui a0, 1 200; CHECK-NEXT: addi a0, a0, -241 201; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 202; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 203; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 204; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 205; CHECK-NEXT: li a0, 257 206; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 207; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 208; CHECK-NEXT: ret 209 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) 210 ret <2 x i16> %v 211} 212 213define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { 214; CHECK-LABEL: vp_ctpop_v2i16_unmasked: 215; CHECK: # %bb.0: 216; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma 217; CHECK-NEXT: vsrl.vi v9, v8, 1 218; CHECK-NEXT: lui a0, 5 219; CHECK-NEXT: addi a0, a0, 1365 220; CHECK-NEXT: vand.vx v9, v9, a0 221; CHECK-NEXT: lui a0, 3 222; CHECK-NEXT: addi a0, a0, 819 223; CHECK-NEXT: vsub.vv v8, v8, v9 224; CHECK-NEXT: vand.vx v9, v8, a0 225; CHECK-NEXT: vsrl.vi v8, v8, 2 226; CHECK-NEXT: vand.vx v8, v8, a0 227; CHECK-NEXT: lui a0, 1 228; CHECK-NEXT: addi a0, a0, -241 229; CHECK-NEXT: vadd.vv v8, v9, v8 230; CHECK-NEXT: vsrl.vi v9, v8, 4 231; CHECK-NEXT: vadd.vv v8, v8, v9 232; CHECK-NEXT: vand.vx v8, v8, a0 233; CHECK-NEXT: li a0, 257 234; CHECK-NEXT: vmul.vx v8, v8, a0 235; CHECK-NEXT: vsrl.vi v8, v8, 8 236; CHECK-NEXT: ret 237 %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) 238 ret <2 x i16> %v 239} 240 241declare <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16>, <4 x i1>, i32) 242 243define <4 x i16> @vp_ctpop_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { 244; CHECK-LABEL: vp_ctpop_v4i16: 245; CHECK: # %bb.0: 246; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 247; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 248; CHECK-NEXT: lui a0, 5 249; CHECK-NEXT: addi a0, a0, 1365 250; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 251; CHECK-NEXT: lui a0, 3 252; CHECK-NEXT: addi a0, a0, 819 253; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 254; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 255; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 256; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 257; CHECK-NEXT: lui a0, 1 258; CHECK-NEXT: addi a0, a0, -241 259; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 260; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 261; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 262; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 263; CHECK-NEXT: li a0, 257 264; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 265; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 266; CHECK-NEXT: ret 267 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) 268 ret <4 x i16> %v 269} 270 271define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { 272; CHECK-LABEL: vp_ctpop_v4i16_unmasked: 273; CHECK: # %bb.0: 274; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma 275; CHECK-NEXT: vsrl.vi v9, v8, 1 276; CHECK-NEXT: lui a0, 5 277; CHECK-NEXT: addi a0, a0, 1365 278; CHECK-NEXT: vand.vx v9, v9, a0 279; CHECK-NEXT: lui a0, 3 280; CHECK-NEXT: addi a0, a0, 819 281; CHECK-NEXT: vsub.vv v8, v8, v9 282; CHECK-NEXT: vand.vx v9, v8, a0 283; CHECK-NEXT: vsrl.vi v8, v8, 2 284; CHECK-NEXT: vand.vx v8, v8, a0 285; CHECK-NEXT: lui a0, 1 286; CHECK-NEXT: addi a0, a0, -241 287; CHECK-NEXT: vadd.vv v8, v9, v8 288; CHECK-NEXT: vsrl.vi v9, v8, 4 289; CHECK-NEXT: vadd.vv v8, v8, v9 290; CHECK-NEXT: vand.vx v8, v8, a0 291; CHECK-NEXT: li a0, 257 292; CHECK-NEXT: vmul.vx v8, v8, a0 293; CHECK-NEXT: vsrl.vi v8, v8, 8 294; CHECK-NEXT: ret 295 %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) 296 ret <4 x i16> %v 297} 298 299declare <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16>, <8 x i1>, i32) 300 301define <8 x i16> @vp_ctpop_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { 302; CHECK-LABEL: vp_ctpop_v8i16: 303; CHECK: # %bb.0: 304; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 305; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 306; CHECK-NEXT: lui a0, 5 307; CHECK-NEXT: addi a0, a0, 1365 308; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 309; CHECK-NEXT: lui a0, 3 310; CHECK-NEXT: addi a0, a0, 819 311; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 312; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 313; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 314; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 315; CHECK-NEXT: lui a0, 1 316; CHECK-NEXT: addi a0, a0, -241 317; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 318; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 319; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 320; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 321; CHECK-NEXT: li a0, 257 322; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 323; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 324; CHECK-NEXT: ret 325 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) 326 ret <8 x i16> %v 327} 328 329define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { 330; CHECK-LABEL: vp_ctpop_v8i16_unmasked: 331; CHECK: # %bb.0: 332; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma 333; CHECK-NEXT: vsrl.vi v9, v8, 1 334; CHECK-NEXT: lui a0, 5 335; CHECK-NEXT: addi a0, a0, 1365 336; CHECK-NEXT: vand.vx v9, v9, a0 337; CHECK-NEXT: lui a0, 3 338; CHECK-NEXT: addi a0, a0, 819 339; CHECK-NEXT: vsub.vv v8, v8, v9 340; CHECK-NEXT: vand.vx v9, v8, a0 341; CHECK-NEXT: vsrl.vi v8, v8, 2 342; CHECK-NEXT: vand.vx v8, v8, a0 343; CHECK-NEXT: lui a0, 1 344; CHECK-NEXT: addi a0, a0, -241 345; CHECK-NEXT: vadd.vv v8, v9, v8 346; CHECK-NEXT: vsrl.vi v9, v8, 4 347; CHECK-NEXT: vadd.vv v8, v8, v9 348; CHECK-NEXT: vand.vx v8, v8, a0 349; CHECK-NEXT: li a0, 257 350; CHECK-NEXT: vmul.vx v8, v8, a0 351; CHECK-NEXT: vsrl.vi v8, v8, 8 352; CHECK-NEXT: ret 353 %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) 354 ret <8 x i16> %v 355} 356 357declare <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16>, <16 x i1>, i32) 358 359define <16 x i16> @vp_ctpop_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { 360; CHECK-LABEL: vp_ctpop_v16i16: 361; CHECK: # %bb.0: 362; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 363; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 364; CHECK-NEXT: lui a0, 5 365; CHECK-NEXT: addi a0, a0, 1365 366; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 367; CHECK-NEXT: lui a0, 3 368; CHECK-NEXT: addi a0, a0, 819 369; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 370; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 371; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 372; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 373; CHECK-NEXT: lui a0, 1 374; CHECK-NEXT: addi a0, a0, -241 375; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 376; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 377; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 378; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 379; CHECK-NEXT: li a0, 257 380; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 381; CHECK-NEXT: vsrl.vi v8, v8, 8, v0.t 382; CHECK-NEXT: ret 383 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) 384 ret <16 x i16> %v 385} 386 387define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { 388; CHECK-LABEL: vp_ctpop_v16i16_unmasked: 389; CHECK: # %bb.0: 390; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma 391; CHECK-NEXT: vsrl.vi v10, v8, 1 392; CHECK-NEXT: lui a0, 5 393; CHECK-NEXT: addi a0, a0, 1365 394; CHECK-NEXT: vand.vx v10, v10, a0 395; CHECK-NEXT: lui a0, 3 396; CHECK-NEXT: addi a0, a0, 819 397; CHECK-NEXT: vsub.vv v8, v8, v10 398; CHECK-NEXT: vand.vx v10, v8, a0 399; CHECK-NEXT: vsrl.vi v8, v8, 2 400; CHECK-NEXT: vand.vx v8, v8, a0 401; CHECK-NEXT: lui a0, 1 402; CHECK-NEXT: addi a0, a0, -241 403; CHECK-NEXT: vadd.vv v8, v10, v8 404; CHECK-NEXT: vsrl.vi v10, v8, 4 405; CHECK-NEXT: vadd.vv v8, v8, v10 406; CHECK-NEXT: vand.vx v8, v8, a0 407; CHECK-NEXT: li a0, 257 408; CHECK-NEXT: vmul.vx v8, v8, a0 409; CHECK-NEXT: vsrl.vi v8, v8, 8 410; CHECK-NEXT: ret 411 %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) 412 ret <16 x i16> %v 413} 414 415declare <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32>, <2 x i1>, i32) 416 417define <2 x i32> @vp_ctpop_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { 418; CHECK-LABEL: vp_ctpop_v2i32: 419; CHECK: # %bb.0: 420; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 421; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 422; CHECK-NEXT: lui a0, 349525 423; CHECK-NEXT: addi a0, a0, 1365 424; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 425; CHECK-NEXT: lui a0, 209715 426; CHECK-NEXT: addi a0, a0, 819 427; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 428; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 429; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 430; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 431; CHECK-NEXT: lui a0, 61681 432; CHECK-NEXT: addi a0, a0, -241 433; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 434; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 435; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 436; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 437; CHECK-NEXT: lui a0, 4112 438; CHECK-NEXT: addi a0, a0, 257 439; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 440; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 441; CHECK-NEXT: ret 442 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) 443 ret <2 x i32> %v 444} 445 446define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { 447; CHECK-LABEL: vp_ctpop_v2i32_unmasked: 448; CHECK: # %bb.0: 449; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma 450; CHECK-NEXT: vsrl.vi v9, v8, 1 451; CHECK-NEXT: lui a0, 349525 452; CHECK-NEXT: addi a0, a0, 1365 453; CHECK-NEXT: vand.vx v9, v9, a0 454; CHECK-NEXT: lui a0, 209715 455; CHECK-NEXT: addi a0, a0, 819 456; CHECK-NEXT: vsub.vv v8, v8, v9 457; CHECK-NEXT: vand.vx v9, v8, a0 458; CHECK-NEXT: vsrl.vi v8, v8, 2 459; CHECK-NEXT: vand.vx v8, v8, a0 460; CHECK-NEXT: lui a0, 61681 461; CHECK-NEXT: addi a0, a0, -241 462; CHECK-NEXT: vadd.vv v8, v9, v8 463; CHECK-NEXT: vsrl.vi v9, v8, 4 464; CHECK-NEXT: vadd.vv v8, v8, v9 465; CHECK-NEXT: vand.vx v8, v8, a0 466; CHECK-NEXT: lui a0, 4112 467; CHECK-NEXT: addi a0, a0, 257 468; CHECK-NEXT: vmul.vx v8, v8, a0 469; CHECK-NEXT: vsrl.vi v8, v8, 24 470; CHECK-NEXT: ret 471 %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) 472 ret <2 x i32> %v 473} 474 475declare <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32>, <4 x i1>, i32) 476 477define <4 x i32> @vp_ctpop_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { 478; CHECK-LABEL: vp_ctpop_v4i32: 479; CHECK: # %bb.0: 480; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 481; CHECK-NEXT: vsrl.vi v9, v8, 1, v0.t 482; CHECK-NEXT: lui a0, 349525 483; CHECK-NEXT: addi a0, a0, 1365 484; CHECK-NEXT: vand.vx v9, v9, a0, v0.t 485; CHECK-NEXT: lui a0, 209715 486; CHECK-NEXT: addi a0, a0, 819 487; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t 488; CHECK-NEXT: vand.vx v9, v8, a0, v0.t 489; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 490; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 491; CHECK-NEXT: lui a0, 61681 492; CHECK-NEXT: addi a0, a0, -241 493; CHECK-NEXT: vadd.vv v8, v9, v8, v0.t 494; CHECK-NEXT: vsrl.vi v9, v8, 4, v0.t 495; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t 496; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 497; CHECK-NEXT: lui a0, 4112 498; CHECK-NEXT: addi a0, a0, 257 499; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 500; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 501; CHECK-NEXT: ret 502 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) 503 ret <4 x i32> %v 504} 505 506define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { 507; CHECK-LABEL: vp_ctpop_v4i32_unmasked: 508; CHECK: # %bb.0: 509; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma 510; CHECK-NEXT: vsrl.vi v9, v8, 1 511; CHECK-NEXT: lui a0, 349525 512; CHECK-NEXT: addi a0, a0, 1365 513; CHECK-NEXT: vand.vx v9, v9, a0 514; CHECK-NEXT: lui a0, 209715 515; CHECK-NEXT: addi a0, a0, 819 516; CHECK-NEXT: vsub.vv v8, v8, v9 517; CHECK-NEXT: vand.vx v9, v8, a0 518; CHECK-NEXT: vsrl.vi v8, v8, 2 519; CHECK-NEXT: vand.vx v8, v8, a0 520; CHECK-NEXT: lui a0, 61681 521; CHECK-NEXT: addi a0, a0, -241 522; CHECK-NEXT: vadd.vv v8, v9, v8 523; CHECK-NEXT: vsrl.vi v9, v8, 4 524; CHECK-NEXT: vadd.vv v8, v8, v9 525; CHECK-NEXT: vand.vx v8, v8, a0 526; CHECK-NEXT: lui a0, 4112 527; CHECK-NEXT: addi a0, a0, 257 528; CHECK-NEXT: vmul.vx v8, v8, a0 529; CHECK-NEXT: vsrl.vi v8, v8, 24 530; CHECK-NEXT: ret 531 %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) 532 ret <4 x i32> %v 533} 534 535declare <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32>, <8 x i1>, i32) 536 537define <8 x i32> @vp_ctpop_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { 538; CHECK-LABEL: vp_ctpop_v8i32: 539; CHECK: # %bb.0: 540; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 541; CHECK-NEXT: vsrl.vi v10, v8, 1, v0.t 542; CHECK-NEXT: lui a0, 349525 543; CHECK-NEXT: addi a0, a0, 1365 544; CHECK-NEXT: vand.vx v10, v10, a0, v0.t 545; CHECK-NEXT: lui a0, 209715 546; CHECK-NEXT: addi a0, a0, 819 547; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t 548; CHECK-NEXT: vand.vx v10, v8, a0, v0.t 549; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 550; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 551; CHECK-NEXT: lui a0, 61681 552; CHECK-NEXT: addi a0, a0, -241 553; CHECK-NEXT: vadd.vv v8, v10, v8, v0.t 554; CHECK-NEXT: vsrl.vi v10, v8, 4, v0.t 555; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t 556; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 557; CHECK-NEXT: lui a0, 4112 558; CHECK-NEXT: addi a0, a0, 257 559; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 560; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 561; CHECK-NEXT: ret 562 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) 563 ret <8 x i32> %v 564} 565 566define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { 567; CHECK-LABEL: vp_ctpop_v8i32_unmasked: 568; CHECK: # %bb.0: 569; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma 570; CHECK-NEXT: vsrl.vi v10, v8, 1 571; CHECK-NEXT: lui a0, 349525 572; CHECK-NEXT: addi a0, a0, 1365 573; CHECK-NEXT: vand.vx v10, v10, a0 574; CHECK-NEXT: lui a0, 209715 575; CHECK-NEXT: addi a0, a0, 819 576; CHECK-NEXT: vsub.vv v8, v8, v10 577; CHECK-NEXT: vand.vx v10, v8, a0 578; CHECK-NEXT: vsrl.vi v8, v8, 2 579; CHECK-NEXT: vand.vx v8, v8, a0 580; CHECK-NEXT: lui a0, 61681 581; CHECK-NEXT: addi a0, a0, -241 582; CHECK-NEXT: vadd.vv v8, v10, v8 583; CHECK-NEXT: vsrl.vi v10, v8, 4 584; CHECK-NEXT: vadd.vv v8, v8, v10 585; CHECK-NEXT: vand.vx v8, v8, a0 586; CHECK-NEXT: lui a0, 4112 587; CHECK-NEXT: addi a0, a0, 257 588; CHECK-NEXT: vmul.vx v8, v8, a0 589; CHECK-NEXT: vsrl.vi v8, v8, 24 590; CHECK-NEXT: ret 591 %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) 592 ret <8 x i32> %v 593} 594 595declare <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32>, <16 x i1>, i32) 596 597define <16 x i32> @vp_ctpop_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { 598; CHECK-LABEL: vp_ctpop_v16i32: 599; CHECK: # %bb.0: 600; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 601; CHECK-NEXT: vsrl.vi v12, v8, 1, v0.t 602; CHECK-NEXT: lui a0, 349525 603; CHECK-NEXT: addi a0, a0, 1365 604; CHECK-NEXT: vand.vx v12, v12, a0, v0.t 605; CHECK-NEXT: lui a0, 209715 606; CHECK-NEXT: addi a0, a0, 819 607; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t 608; CHECK-NEXT: vand.vx v12, v8, a0, v0.t 609; CHECK-NEXT: vsrl.vi v8, v8, 2, v0.t 610; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 611; CHECK-NEXT: lui a0, 61681 612; CHECK-NEXT: addi a0, a0, -241 613; CHECK-NEXT: vadd.vv v8, v12, v8, v0.t 614; CHECK-NEXT: vsrl.vi v12, v8, 4, v0.t 615; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t 616; CHECK-NEXT: vand.vx v8, v8, a0, v0.t 617; CHECK-NEXT: lui a0, 4112 618; CHECK-NEXT: addi a0, a0, 257 619; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t 620; CHECK-NEXT: vsrl.vi v8, v8, 24, v0.t 621; CHECK-NEXT: ret 622 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) 623 ret <16 x i32> %v 624} 625 626define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { 627; CHECK-LABEL: vp_ctpop_v16i32_unmasked: 628; CHECK: # %bb.0: 629; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma 630; CHECK-NEXT: vsrl.vi v12, v8, 1 631; CHECK-NEXT: lui a0, 349525 632; CHECK-NEXT: addi a0, a0, 1365 633; CHECK-NEXT: vand.vx v12, v12, a0 634; CHECK-NEXT: lui a0, 209715 635; CHECK-NEXT: addi a0, a0, 819 636; CHECK-NEXT: vsub.vv v8, v8, v12 637; CHECK-NEXT: vand.vx v12, v8, a0 638; CHECK-NEXT: vsrl.vi v8, v8, 2 639; CHECK-NEXT: vand.vx v8, v8, a0 640; CHECK-NEXT: lui a0, 61681 641; CHECK-NEXT: addi a0, a0, -241 642; CHECK-NEXT: vadd.vv v8, v12, v8 643; CHECK-NEXT: vsrl.vi v12, v8, 4 644; CHECK-NEXT: vadd.vv v8, v8, v12 645; CHECK-NEXT: vand.vx v8, v8, a0 646; CHECK-NEXT: lui a0, 4112 647; CHECK-NEXT: addi a0, a0, 257 648; CHECK-NEXT: vmul.vx v8, v8, a0 649; CHECK-NEXT: vsrl.vi v8, v8, 24 650; CHECK-NEXT: ret 651 %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) 652 ret <16 x i32> %v 653} 654 655declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32) 656 657define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { 658; RV32-LABEL: vp_ctpop_v2i64: 659; RV32: # %bb.0: 660; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 661; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t 662; RV32-NEXT: lui a1, 349525 663; RV32-NEXT: addi a1, a1, 1365 664; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 665; RV32-NEXT: vmv.v.x v10, a1 666; RV32-NEXT: lui a1, 209715 667; RV32-NEXT: addi a1, a1, 819 668; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 669; RV32-NEXT: vand.vv v9, v9, v10, v0.t 670; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 671; RV32-NEXT: vmv.v.x v10, a1 672; RV32-NEXT: lui a1, 61681 673; RV32-NEXT: addi a1, a1, -241 674; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 675; RV32-NEXT: vsub.vv v8, v8, v9, v0.t 676; RV32-NEXT: vand.vv v9, v8, v10, v0.t 677; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 678; RV32-NEXT: vand.vv v8, v8, v10, v0.t 679; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 680; RV32-NEXT: vmv.v.x v10, a1 681; RV32-NEXT: lui a1, 4112 682; RV32-NEXT: addi a1, a1, 257 683; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 684; RV32-NEXT: vadd.vv v8, v9, v8, v0.t 685; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t 686; RV32-NEXT: vadd.vv v8, v8, v9, v0.t 687; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 688; RV32-NEXT: vmv.v.x v9, a1 689; RV32-NEXT: li a1, 56 690; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 691; RV32-NEXT: vand.vv v8, v8, v10, v0.t 692; RV32-NEXT: vmul.vv v8, v8, v9, v0.t 693; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 694; RV32-NEXT: ret 695; 696; RV64-LABEL: vp_ctpop_v2i64: 697; RV64: # %bb.0: 698; RV64-NEXT: lui a1, 349525 699; RV64-NEXT: lui a2, 209715 700; RV64-NEXT: lui a3, 61681 701; RV64-NEXT: lui a4, 4112 702; RV64-NEXT: addiw a1, a1, 1365 703; RV64-NEXT: addiw a2, a2, 819 704; RV64-NEXT: addiw a3, a3, -241 705; RV64-NEXT: addiw a4, a4, 257 706; RV64-NEXT: slli a5, a1, 32 707; RV64-NEXT: add a1, a1, a5 708; RV64-NEXT: slli a5, a2, 32 709; RV64-NEXT: add a2, a2, a5 710; RV64-NEXT: slli a5, a3, 32 711; RV64-NEXT: add a3, a3, a5 712; RV64-NEXT: slli a5, a4, 32 713; RV64-NEXT: add a4, a4, a5 714; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 715; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t 716; RV64-NEXT: vand.vx v9, v9, a1, v0.t 717; RV64-NEXT: vsub.vv v8, v8, v9, v0.t 718; RV64-NEXT: vand.vx v9, v8, a2, v0.t 719; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 720; RV64-NEXT: vand.vx v8, v8, a2, v0.t 721; RV64-NEXT: vadd.vv v8, v9, v8, v0.t 722; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t 723; RV64-NEXT: vadd.vv v8, v8, v9, v0.t 724; RV64-NEXT: vand.vx v8, v8, a3, v0.t 725; RV64-NEXT: li a0, 56 726; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 727; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 728; RV64-NEXT: ret 729 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) 730 ret <2 x i64> %v 731} 732 733define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { 734; RV32-LABEL: vp_ctpop_v2i64_unmasked: 735; RV32: # %bb.0: 736; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 737; RV32-NEXT: vsrl.vi v9, v8, 1 738; RV32-NEXT: lui a1, 349525 739; RV32-NEXT: addi a1, a1, 1365 740; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 741; RV32-NEXT: vmv.v.x v10, a1 742; RV32-NEXT: lui a1, 209715 743; RV32-NEXT: addi a1, a1, 819 744; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 745; RV32-NEXT: vand.vv v9, v9, v10 746; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 747; RV32-NEXT: vmv.v.x v10, a1 748; RV32-NEXT: lui a1, 61681 749; RV32-NEXT: addi a1, a1, -241 750; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 751; RV32-NEXT: vsub.vv v8, v8, v9 752; RV32-NEXT: vand.vv v9, v8, v10 753; RV32-NEXT: vsrl.vi v8, v8, 2 754; RV32-NEXT: vand.vv v8, v8, v10 755; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 756; RV32-NEXT: vmv.v.x v10, a1 757; RV32-NEXT: lui a1, 4112 758; RV32-NEXT: addi a1, a1, 257 759; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 760; RV32-NEXT: vadd.vv v8, v9, v8 761; RV32-NEXT: vsrl.vi v9, v8, 4 762; RV32-NEXT: vadd.vv v8, v8, v9 763; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma 764; RV32-NEXT: vmv.v.x v9, a1 765; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma 766; RV32-NEXT: vand.vv v8, v8, v10 767; RV32-NEXT: vmul.vv v8, v8, v9 768; RV32-NEXT: li a0, 56 769; RV32-NEXT: vsrl.vx v8, v8, a0 770; RV32-NEXT: ret 771; 772; RV64-LABEL: vp_ctpop_v2i64_unmasked: 773; RV64: # %bb.0: 774; RV64-NEXT: lui a1, 349525 775; RV64-NEXT: lui a2, 209715 776; RV64-NEXT: lui a3, 61681 777; RV64-NEXT: lui a4, 4112 778; RV64-NEXT: addiw a1, a1, 1365 779; RV64-NEXT: addiw a2, a2, 819 780; RV64-NEXT: addiw a3, a3, -241 781; RV64-NEXT: addiw a4, a4, 257 782; RV64-NEXT: slli a5, a1, 32 783; RV64-NEXT: add a1, a1, a5 784; RV64-NEXT: slli a5, a2, 32 785; RV64-NEXT: add a2, a2, a5 786; RV64-NEXT: slli a5, a3, 32 787; RV64-NEXT: add a3, a3, a5 788; RV64-NEXT: slli a5, a4, 32 789; RV64-NEXT: add a4, a4, a5 790; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma 791; RV64-NEXT: vsrl.vi v9, v8, 1 792; RV64-NEXT: vand.vx v9, v9, a1 793; RV64-NEXT: vsub.vv v8, v8, v9 794; RV64-NEXT: vand.vx v9, v8, a2 795; RV64-NEXT: vsrl.vi v8, v8, 2 796; RV64-NEXT: vand.vx v8, v8, a2 797; RV64-NEXT: vadd.vv v8, v9, v8 798; RV64-NEXT: vsrl.vi v9, v8, 4 799; RV64-NEXT: vadd.vv v8, v8, v9 800; RV64-NEXT: vand.vx v8, v8, a3 801; RV64-NEXT: vmul.vx v8, v8, a4 802; RV64-NEXT: li a0, 56 803; RV64-NEXT: vsrl.vx v8, v8, a0 804; RV64-NEXT: ret 805 %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) 806 ret <2 x i64> %v 807} 808 809declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32) 810 811define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { 812; RV32-LABEL: vp_ctpop_v4i64: 813; RV32: # %bb.0: 814; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 815; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t 816; RV32-NEXT: lui a1, 349525 817; RV32-NEXT: addi a1, a1, 1365 818; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 819; RV32-NEXT: vmv.v.x v12, a1 820; RV32-NEXT: lui a1, 209715 821; RV32-NEXT: addi a1, a1, 819 822; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 823; RV32-NEXT: vand.vv v10, v10, v12, v0.t 824; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 825; RV32-NEXT: vmv.v.x v12, a1 826; RV32-NEXT: lui a1, 61681 827; RV32-NEXT: addi a1, a1, -241 828; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 829; RV32-NEXT: vsub.vv v8, v8, v10, v0.t 830; RV32-NEXT: vand.vv v10, v8, v12, v0.t 831; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 832; RV32-NEXT: vand.vv v8, v8, v12, v0.t 833; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 834; RV32-NEXT: vmv.v.x v12, a1 835; RV32-NEXT: lui a1, 4112 836; RV32-NEXT: addi a1, a1, 257 837; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 838; RV32-NEXT: vadd.vv v8, v10, v8, v0.t 839; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t 840; RV32-NEXT: vadd.vv v8, v8, v10, v0.t 841; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 842; RV32-NEXT: vmv.v.x v10, a1 843; RV32-NEXT: li a1, 56 844; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 845; RV32-NEXT: vand.vv v8, v8, v12, v0.t 846; RV32-NEXT: vmul.vv v8, v8, v10, v0.t 847; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 848; RV32-NEXT: ret 849; 850; RV64-LABEL: vp_ctpop_v4i64: 851; RV64: # %bb.0: 852; RV64-NEXT: lui a1, 349525 853; RV64-NEXT: lui a2, 209715 854; RV64-NEXT: lui a3, 61681 855; RV64-NEXT: lui a4, 4112 856; RV64-NEXT: addiw a1, a1, 1365 857; RV64-NEXT: addiw a2, a2, 819 858; RV64-NEXT: addiw a3, a3, -241 859; RV64-NEXT: addiw a4, a4, 257 860; RV64-NEXT: slli a5, a1, 32 861; RV64-NEXT: add a1, a1, a5 862; RV64-NEXT: slli a5, a2, 32 863; RV64-NEXT: add a2, a2, a5 864; RV64-NEXT: slli a5, a3, 32 865; RV64-NEXT: add a3, a3, a5 866; RV64-NEXT: slli a5, a4, 32 867; RV64-NEXT: add a4, a4, a5 868; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 869; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t 870; RV64-NEXT: vand.vx v10, v10, a1, v0.t 871; RV64-NEXT: vsub.vv v8, v8, v10, v0.t 872; RV64-NEXT: vand.vx v10, v8, a2, v0.t 873; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 874; RV64-NEXT: vand.vx v8, v8, a2, v0.t 875; RV64-NEXT: vadd.vv v8, v10, v8, v0.t 876; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t 877; RV64-NEXT: vadd.vv v8, v8, v10, v0.t 878; RV64-NEXT: vand.vx v8, v8, a3, v0.t 879; RV64-NEXT: li a0, 56 880; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 881; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 882; RV64-NEXT: ret 883 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) 884 ret <4 x i64> %v 885} 886 887define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { 888; RV32-LABEL: vp_ctpop_v4i64_unmasked: 889; RV32: # %bb.0: 890; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 891; RV32-NEXT: vsrl.vi v10, v8, 1 892; RV32-NEXT: lui a1, 349525 893; RV32-NEXT: addi a1, a1, 1365 894; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 895; RV32-NEXT: vmv.v.x v12, a1 896; RV32-NEXT: lui a1, 209715 897; RV32-NEXT: addi a1, a1, 819 898; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 899; RV32-NEXT: vand.vv v10, v10, v12 900; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 901; RV32-NEXT: vmv.v.x v12, a1 902; RV32-NEXT: lui a1, 61681 903; RV32-NEXT: addi a1, a1, -241 904; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 905; RV32-NEXT: vsub.vv v8, v8, v10 906; RV32-NEXT: vand.vv v10, v8, v12 907; RV32-NEXT: vsrl.vi v8, v8, 2 908; RV32-NEXT: vand.vv v8, v8, v12 909; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 910; RV32-NEXT: vmv.v.x v12, a1 911; RV32-NEXT: lui a1, 4112 912; RV32-NEXT: addi a1, a1, 257 913; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 914; RV32-NEXT: vadd.vv v8, v10, v8 915; RV32-NEXT: vsrl.vi v10, v8, 4 916; RV32-NEXT: vadd.vv v8, v8, v10 917; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma 918; RV32-NEXT: vmv.v.x v10, a1 919; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma 920; RV32-NEXT: vand.vv v8, v8, v12 921; RV32-NEXT: vmul.vv v8, v8, v10 922; RV32-NEXT: li a0, 56 923; RV32-NEXT: vsrl.vx v8, v8, a0 924; RV32-NEXT: ret 925; 926; RV64-LABEL: vp_ctpop_v4i64_unmasked: 927; RV64: # %bb.0: 928; RV64-NEXT: lui a1, 349525 929; RV64-NEXT: lui a2, 209715 930; RV64-NEXT: lui a3, 61681 931; RV64-NEXT: lui a4, 4112 932; RV64-NEXT: addiw a1, a1, 1365 933; RV64-NEXT: addiw a2, a2, 819 934; RV64-NEXT: addiw a3, a3, -241 935; RV64-NEXT: addiw a4, a4, 257 936; RV64-NEXT: slli a5, a1, 32 937; RV64-NEXT: add a1, a1, a5 938; RV64-NEXT: slli a5, a2, 32 939; RV64-NEXT: add a2, a2, a5 940; RV64-NEXT: slli a5, a3, 32 941; RV64-NEXT: add a3, a3, a5 942; RV64-NEXT: slli a5, a4, 32 943; RV64-NEXT: add a4, a4, a5 944; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma 945; RV64-NEXT: vsrl.vi v10, v8, 1 946; RV64-NEXT: vand.vx v10, v10, a1 947; RV64-NEXT: vsub.vv v8, v8, v10 948; RV64-NEXT: vand.vx v10, v8, a2 949; RV64-NEXT: vsrl.vi v8, v8, 2 950; RV64-NEXT: vand.vx v8, v8, a2 951; RV64-NEXT: vadd.vv v8, v10, v8 952; RV64-NEXT: vsrl.vi v10, v8, 4 953; RV64-NEXT: vadd.vv v8, v8, v10 954; RV64-NEXT: vand.vx v8, v8, a3 955; RV64-NEXT: vmul.vx v8, v8, a4 956; RV64-NEXT: li a0, 56 957; RV64-NEXT: vsrl.vx v8, v8, a0 958; RV64-NEXT: ret 959 %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) 960 ret <4 x i64> %v 961} 962 963declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32) 964 965define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { 966; RV32-LABEL: vp_ctpop_v8i64: 967; RV32: # %bb.0: 968; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 969; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t 970; RV32-NEXT: lui a1, 349525 971; RV32-NEXT: addi a1, a1, 1365 972; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 973; RV32-NEXT: vmv.v.x v16, a1 974; RV32-NEXT: lui a1, 209715 975; RV32-NEXT: addi a1, a1, 819 976; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 977; RV32-NEXT: vand.vv v16, v12, v16, v0.t 978; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 979; RV32-NEXT: vmv.v.x v12, a1 980; RV32-NEXT: lui a1, 61681 981; RV32-NEXT: addi a1, a1, -241 982; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 983; RV32-NEXT: vsub.vv v8, v8, v16, v0.t 984; RV32-NEXT: vand.vv v16, v8, v12, v0.t 985; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 986; RV32-NEXT: vand.vv v8, v8, v12, v0.t 987; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 988; RV32-NEXT: vmv.v.x v12, a1 989; RV32-NEXT: lui a1, 4112 990; RV32-NEXT: addi a1, a1, 257 991; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 992; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 993; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 994; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 995; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 996; RV32-NEXT: vmv.v.x v16, a1 997; RV32-NEXT: li a1, 56 998; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 999; RV32-NEXT: vand.vv v8, v8, v12, v0.t 1000; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1001; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1002; RV32-NEXT: ret 1003; 1004; RV64-LABEL: vp_ctpop_v8i64: 1005; RV64: # %bb.0: 1006; RV64-NEXT: lui a1, 349525 1007; RV64-NEXT: lui a2, 209715 1008; RV64-NEXT: lui a3, 61681 1009; RV64-NEXT: lui a4, 4112 1010; RV64-NEXT: addiw a1, a1, 1365 1011; RV64-NEXT: addiw a2, a2, 819 1012; RV64-NEXT: addiw a3, a3, -241 1013; RV64-NEXT: addiw a4, a4, 257 1014; RV64-NEXT: slli a5, a1, 32 1015; RV64-NEXT: add a1, a1, a5 1016; RV64-NEXT: slli a5, a2, 32 1017; RV64-NEXT: add a2, a2, a5 1018; RV64-NEXT: slli a5, a3, 32 1019; RV64-NEXT: add a3, a3, a5 1020; RV64-NEXT: slli a5, a4, 32 1021; RV64-NEXT: add a4, a4, a5 1022; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1023; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t 1024; RV64-NEXT: vand.vx v12, v12, a1, v0.t 1025; RV64-NEXT: vsub.vv v8, v8, v12, v0.t 1026; RV64-NEXT: vand.vx v12, v8, a2, v0.t 1027; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1028; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1029; RV64-NEXT: vadd.vv v8, v12, v8, v0.t 1030; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t 1031; RV64-NEXT: vadd.vv v8, v8, v12, v0.t 1032; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1033; RV64-NEXT: li a0, 56 1034; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1035; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1036; RV64-NEXT: ret 1037 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) 1038 ret <8 x i64> %v 1039} 1040 1041define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { 1042; RV32-LABEL: vp_ctpop_v8i64_unmasked: 1043; RV32: # %bb.0: 1044; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1045; RV32-NEXT: vsrl.vi v12, v8, 1 1046; RV32-NEXT: lui a1, 349525 1047; RV32-NEXT: addi a1, a1, 1365 1048; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1049; RV32-NEXT: vmv.v.x v16, a1 1050; RV32-NEXT: lui a1, 209715 1051; RV32-NEXT: addi a1, a1, 819 1052; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1053; RV32-NEXT: vand.vv v12, v12, v16 1054; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1055; RV32-NEXT: vmv.v.x v16, a1 1056; RV32-NEXT: lui a1, 61681 1057; RV32-NEXT: addi a1, a1, -241 1058; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1059; RV32-NEXT: vsub.vv v8, v8, v12 1060; RV32-NEXT: vand.vv v12, v8, v16 1061; RV32-NEXT: vsrl.vi v8, v8, 2 1062; RV32-NEXT: vand.vv v8, v8, v16 1063; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1064; RV32-NEXT: vmv.v.x v16, a1 1065; RV32-NEXT: lui a1, 4112 1066; RV32-NEXT: addi a1, a1, 257 1067; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1068; RV32-NEXT: vadd.vv v8, v12, v8 1069; RV32-NEXT: vsrl.vi v12, v8, 4 1070; RV32-NEXT: vadd.vv v8, v8, v12 1071; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma 1072; RV32-NEXT: vmv.v.x v12, a1 1073; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1074; RV32-NEXT: vand.vv v8, v8, v16 1075; RV32-NEXT: vmul.vv v8, v8, v12 1076; RV32-NEXT: li a0, 56 1077; RV32-NEXT: vsrl.vx v8, v8, a0 1078; RV32-NEXT: ret 1079; 1080; RV64-LABEL: vp_ctpop_v8i64_unmasked: 1081; RV64: # %bb.0: 1082; RV64-NEXT: lui a1, 349525 1083; RV64-NEXT: lui a2, 209715 1084; RV64-NEXT: lui a3, 61681 1085; RV64-NEXT: lui a4, 4112 1086; RV64-NEXT: addiw a1, a1, 1365 1087; RV64-NEXT: addiw a2, a2, 819 1088; RV64-NEXT: addiw a3, a3, -241 1089; RV64-NEXT: addiw a4, a4, 257 1090; RV64-NEXT: slli a5, a1, 32 1091; RV64-NEXT: add a1, a1, a5 1092; RV64-NEXT: slli a5, a2, 32 1093; RV64-NEXT: add a2, a2, a5 1094; RV64-NEXT: slli a5, a3, 32 1095; RV64-NEXT: add a3, a3, a5 1096; RV64-NEXT: slli a5, a4, 32 1097; RV64-NEXT: add a4, a4, a5 1098; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma 1099; RV64-NEXT: vsrl.vi v12, v8, 1 1100; RV64-NEXT: vand.vx v12, v12, a1 1101; RV64-NEXT: vsub.vv v8, v8, v12 1102; RV64-NEXT: vand.vx v12, v8, a2 1103; RV64-NEXT: vsrl.vi v8, v8, 2 1104; RV64-NEXT: vand.vx v8, v8, a2 1105; RV64-NEXT: vadd.vv v8, v12, v8 1106; RV64-NEXT: vsrl.vi v12, v8, 4 1107; RV64-NEXT: vadd.vv v8, v8, v12 1108; RV64-NEXT: vand.vx v8, v8, a3 1109; RV64-NEXT: vmul.vx v8, v8, a4 1110; RV64-NEXT: li a0, 56 1111; RV64-NEXT: vsrl.vx v8, v8, a0 1112; RV64-NEXT: ret 1113 %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) 1114 ret <8 x i64> %v 1115} 1116 1117declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32) 1118 1119define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { 1120; RV32-LABEL: vp_ctpop_v15i64: 1121; RV32: # %bb.0: 1122; RV32-NEXT: addi sp, sp, -48 1123; RV32-NEXT: .cfi_def_cfa_offset 48 1124; RV32-NEXT: csrr a1, vlenb 1125; RV32-NEXT: slli a1, a1, 3 1126; RV32-NEXT: sub sp, sp, a1 1127; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb 1128; RV32-NEXT: addi a1, sp, 48 1129; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1130; RV32-NEXT: lui a1, 349525 1131; RV32-NEXT: addi a1, a1, 1365 1132; RV32-NEXT: sw a1, 40(sp) 1133; RV32-NEXT: sw a1, 44(sp) 1134; RV32-NEXT: lui a1, 209715 1135; RV32-NEXT: addi a1, a1, 819 1136; RV32-NEXT: sw a1, 32(sp) 1137; RV32-NEXT: sw a1, 36(sp) 1138; RV32-NEXT: lui a1, 61681 1139; RV32-NEXT: addi a1, a1, -241 1140; RV32-NEXT: sw a1, 24(sp) 1141; RV32-NEXT: sw a1, 28(sp) 1142; RV32-NEXT: lui a1, 4112 1143; RV32-NEXT: addi a1, a1, 257 1144; RV32-NEXT: sw a1, 16(sp) 1145; RV32-NEXT: sw a1, 20(sp) 1146; RV32-NEXT: addi a1, sp, 40 1147; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1148; RV32-NEXT: vlse64.v v24, (a1), zero 1149; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1150; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t 1151; RV32-NEXT: addi a1, sp, 32 1152; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1153; RV32-NEXT: vlse64.v v16, (a1), zero 1154; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1155; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1156; RV32-NEXT: addi a1, sp, 24 1157; RV32-NEXT: addi a2, sp, 48 1158; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1159; RV32-NEXT: vsub.vv v8, v8, v24, v0.t 1160; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1161; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1162; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1163; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1164; RV32-NEXT: vlse64.v v8, (a1), zero 1165; RV32-NEXT: addi a1, sp, 48 1166; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1167; RV32-NEXT: addi a1, sp, 16 1168; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1169; RV32-NEXT: vadd.vv v24, v24, v16, v0.t 1170; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1171; RV32-NEXT: vlse64.v v16, (a1), zero 1172; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1173; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 1174; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1175; RV32-NEXT: li a0, 56 1176; RV32-NEXT: addi a1, sp, 48 1177; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1178; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1179; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1180; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 1181; RV32-NEXT: csrr a0, vlenb 1182; RV32-NEXT: slli a0, a0, 3 1183; RV32-NEXT: add sp, sp, a0 1184; RV32-NEXT: .cfi_def_cfa sp, 48 1185; RV32-NEXT: addi sp, sp, 48 1186; RV32-NEXT: .cfi_def_cfa_offset 0 1187; RV32-NEXT: ret 1188; 1189; RV64-LABEL: vp_ctpop_v15i64: 1190; RV64: # %bb.0: 1191; RV64-NEXT: lui a1, 349525 1192; RV64-NEXT: lui a2, 209715 1193; RV64-NEXT: lui a3, 61681 1194; RV64-NEXT: lui a4, 4112 1195; RV64-NEXT: addiw a1, a1, 1365 1196; RV64-NEXT: addiw a2, a2, 819 1197; RV64-NEXT: addiw a3, a3, -241 1198; RV64-NEXT: addiw a4, a4, 257 1199; RV64-NEXT: slli a5, a1, 32 1200; RV64-NEXT: add a1, a1, a5 1201; RV64-NEXT: slli a5, a2, 32 1202; RV64-NEXT: add a2, a2, a5 1203; RV64-NEXT: slli a5, a3, 32 1204; RV64-NEXT: add a3, a3, a5 1205; RV64-NEXT: slli a5, a4, 32 1206; RV64-NEXT: add a4, a4, a5 1207; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1208; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1209; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1210; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1211; RV64-NEXT: vand.vx v16, v8, a2, v0.t 1212; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1213; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1214; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1215; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1216; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1217; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1218; RV64-NEXT: li a0, 56 1219; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1220; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1221; RV64-NEXT: ret 1222 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl) 1223 ret <15 x i64> %v 1224} 1225 1226define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { 1227; RV32-LABEL: vp_ctpop_v15i64_unmasked: 1228; RV32: # %bb.0: 1229; RV32-NEXT: addi sp, sp, -32 1230; RV32-NEXT: .cfi_def_cfa_offset 32 1231; RV32-NEXT: lui a1, 349525 1232; RV32-NEXT: addi a1, a1, 1365 1233; RV32-NEXT: sw a1, 24(sp) 1234; RV32-NEXT: sw a1, 28(sp) 1235; RV32-NEXT: lui a1, 209715 1236; RV32-NEXT: addi a1, a1, 819 1237; RV32-NEXT: sw a1, 16(sp) 1238; RV32-NEXT: sw a1, 20(sp) 1239; RV32-NEXT: lui a1, 61681 1240; RV32-NEXT: addi a1, a1, -241 1241; RV32-NEXT: sw a1, 8(sp) 1242; RV32-NEXT: sw a1, 12(sp) 1243; RV32-NEXT: lui a1, 4112 1244; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1245; RV32-NEXT: vsrl.vi v16, v8, 1 1246; RV32-NEXT: addi a1, a1, 257 1247; RV32-NEXT: sw a1, 0(sp) 1248; RV32-NEXT: sw a1, 4(sp) 1249; RV32-NEXT: addi a1, sp, 24 1250; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1251; RV32-NEXT: vlse64.v v0, (a1), zero 1252; RV32-NEXT: addi a1, sp, 16 1253; RV32-NEXT: vlse64.v v24, (a1), zero 1254; RV32-NEXT: addi a1, sp, 8 1255; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1256; RV32-NEXT: vand.vv v0, v16, v0 1257; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1258; RV32-NEXT: vlse64.v v16, (a1), zero 1259; RV32-NEXT: mv a1, sp 1260; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1261; RV32-NEXT: vsub.vv v8, v8, v0 1262; RV32-NEXT: vand.vv v0, v8, v24 1263; RV32-NEXT: vsrl.vi v8, v8, 2 1264; RV32-NEXT: vand.vv v8, v8, v24 1265; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1266; RV32-NEXT: vlse64.v v24, (a1), zero 1267; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1268; RV32-NEXT: vadd.vv v8, v0, v8 1269; RV32-NEXT: vsrl.vi v0, v8, 4 1270; RV32-NEXT: vadd.vv v8, v8, v0 1271; RV32-NEXT: vand.vv v8, v8, v16 1272; RV32-NEXT: vmul.vv v8, v8, v24 1273; RV32-NEXT: li a0, 56 1274; RV32-NEXT: vsrl.vx v8, v8, a0 1275; RV32-NEXT: addi sp, sp, 32 1276; RV32-NEXT: .cfi_def_cfa_offset 0 1277; RV32-NEXT: ret 1278; 1279; RV64-LABEL: vp_ctpop_v15i64_unmasked: 1280; RV64: # %bb.0: 1281; RV64-NEXT: lui a1, 349525 1282; RV64-NEXT: lui a2, 209715 1283; RV64-NEXT: lui a3, 61681 1284; RV64-NEXT: lui a4, 4112 1285; RV64-NEXT: addiw a1, a1, 1365 1286; RV64-NEXT: addiw a2, a2, 819 1287; RV64-NEXT: addiw a3, a3, -241 1288; RV64-NEXT: addiw a4, a4, 257 1289; RV64-NEXT: slli a5, a1, 32 1290; RV64-NEXT: add a1, a1, a5 1291; RV64-NEXT: slli a5, a2, 32 1292; RV64-NEXT: add a2, a2, a5 1293; RV64-NEXT: slli a5, a3, 32 1294; RV64-NEXT: add a3, a3, a5 1295; RV64-NEXT: slli a5, a4, 32 1296; RV64-NEXT: add a4, a4, a5 1297; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1298; RV64-NEXT: vsrl.vi v16, v8, 1 1299; RV64-NEXT: vand.vx v16, v16, a1 1300; RV64-NEXT: vsub.vv v8, v8, v16 1301; RV64-NEXT: vand.vx v16, v8, a2 1302; RV64-NEXT: vsrl.vi v8, v8, 2 1303; RV64-NEXT: vand.vx v8, v8, a2 1304; RV64-NEXT: vadd.vv v8, v16, v8 1305; RV64-NEXT: vsrl.vi v16, v8, 4 1306; RV64-NEXT: vadd.vv v8, v8, v16 1307; RV64-NEXT: vand.vx v8, v8, a3 1308; RV64-NEXT: vmul.vx v8, v8, a4 1309; RV64-NEXT: li a0, 56 1310; RV64-NEXT: vsrl.vx v8, v8, a0 1311; RV64-NEXT: ret 1312 %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl) 1313 ret <15 x i64> %v 1314} 1315 1316declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32) 1317 1318define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { 1319; RV32-LABEL: vp_ctpop_v16i64: 1320; RV32: # %bb.0: 1321; RV32-NEXT: addi sp, sp, -48 1322; RV32-NEXT: .cfi_def_cfa_offset 48 1323; RV32-NEXT: csrr a1, vlenb 1324; RV32-NEXT: slli a1, a1, 3 1325; RV32-NEXT: sub sp, sp, a1 1326; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb 1327; RV32-NEXT: addi a1, sp, 48 1328; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1329; RV32-NEXT: lui a1, 349525 1330; RV32-NEXT: addi a1, a1, 1365 1331; RV32-NEXT: sw a1, 40(sp) 1332; RV32-NEXT: sw a1, 44(sp) 1333; RV32-NEXT: lui a1, 209715 1334; RV32-NEXT: addi a1, a1, 819 1335; RV32-NEXT: sw a1, 32(sp) 1336; RV32-NEXT: sw a1, 36(sp) 1337; RV32-NEXT: lui a1, 61681 1338; RV32-NEXT: addi a1, a1, -241 1339; RV32-NEXT: sw a1, 24(sp) 1340; RV32-NEXT: sw a1, 28(sp) 1341; RV32-NEXT: lui a1, 4112 1342; RV32-NEXT: addi a1, a1, 257 1343; RV32-NEXT: sw a1, 16(sp) 1344; RV32-NEXT: sw a1, 20(sp) 1345; RV32-NEXT: addi a1, sp, 40 1346; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1347; RV32-NEXT: vlse64.v v24, (a1), zero 1348; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1349; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t 1350; RV32-NEXT: addi a1, sp, 32 1351; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1352; RV32-NEXT: vlse64.v v16, (a1), zero 1353; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1354; RV32-NEXT: vand.vv v24, v8, v24, v0.t 1355; RV32-NEXT: addi a1, sp, 24 1356; RV32-NEXT: addi a2, sp, 48 1357; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1358; RV32-NEXT: vsub.vv v8, v8, v24, v0.t 1359; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1360; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1361; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1362; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1363; RV32-NEXT: vlse64.v v8, (a1), zero 1364; RV32-NEXT: addi a1, sp, 48 1365; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill 1366; RV32-NEXT: addi a1, sp, 16 1367; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1368; RV32-NEXT: vadd.vv v24, v24, v16, v0.t 1369; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1370; RV32-NEXT: vlse64.v v16, (a1), zero 1371; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1372; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t 1373; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1374; RV32-NEXT: li a0, 56 1375; RV32-NEXT: addi a1, sp, 48 1376; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload 1377; RV32-NEXT: vand.vv v8, v8, v24, v0.t 1378; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1379; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t 1380; RV32-NEXT: csrr a0, vlenb 1381; RV32-NEXT: slli a0, a0, 3 1382; RV32-NEXT: add sp, sp, a0 1383; RV32-NEXT: .cfi_def_cfa sp, 48 1384; RV32-NEXT: addi sp, sp, 48 1385; RV32-NEXT: .cfi_def_cfa_offset 0 1386; RV32-NEXT: ret 1387; 1388; RV64-LABEL: vp_ctpop_v16i64: 1389; RV64: # %bb.0: 1390; RV64-NEXT: lui a1, 349525 1391; RV64-NEXT: lui a2, 209715 1392; RV64-NEXT: lui a3, 61681 1393; RV64-NEXT: lui a4, 4112 1394; RV64-NEXT: addiw a1, a1, 1365 1395; RV64-NEXT: addiw a2, a2, 819 1396; RV64-NEXT: addiw a3, a3, -241 1397; RV64-NEXT: addiw a4, a4, 257 1398; RV64-NEXT: slli a5, a1, 32 1399; RV64-NEXT: add a1, a1, a5 1400; RV64-NEXT: slli a5, a2, 32 1401; RV64-NEXT: add a2, a2, a5 1402; RV64-NEXT: slli a5, a3, 32 1403; RV64-NEXT: add a3, a3, a5 1404; RV64-NEXT: slli a5, a4, 32 1405; RV64-NEXT: add a4, a4, a5 1406; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1407; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1408; RV64-NEXT: vand.vx v16, v16, a1, v0.t 1409; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1410; RV64-NEXT: vand.vx v16, v8, a2, v0.t 1411; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1412; RV64-NEXT: vand.vx v8, v8, a2, v0.t 1413; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1414; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1415; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1416; RV64-NEXT: vand.vx v8, v8, a3, v0.t 1417; RV64-NEXT: li a0, 56 1418; RV64-NEXT: vmul.vx v8, v8, a4, v0.t 1419; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t 1420; RV64-NEXT: ret 1421 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl) 1422 ret <16 x i64> %v 1423} 1424 1425define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { 1426; RV32-LABEL: vp_ctpop_v16i64_unmasked: 1427; RV32: # %bb.0: 1428; RV32-NEXT: addi sp, sp, -32 1429; RV32-NEXT: .cfi_def_cfa_offset 32 1430; RV32-NEXT: lui a1, 349525 1431; RV32-NEXT: addi a1, a1, 1365 1432; RV32-NEXT: sw a1, 24(sp) 1433; RV32-NEXT: sw a1, 28(sp) 1434; RV32-NEXT: lui a1, 209715 1435; RV32-NEXT: addi a1, a1, 819 1436; RV32-NEXT: sw a1, 16(sp) 1437; RV32-NEXT: sw a1, 20(sp) 1438; RV32-NEXT: lui a1, 61681 1439; RV32-NEXT: addi a1, a1, -241 1440; RV32-NEXT: sw a1, 8(sp) 1441; RV32-NEXT: sw a1, 12(sp) 1442; RV32-NEXT: lui a1, 4112 1443; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1444; RV32-NEXT: vsrl.vi v16, v8, 1 1445; RV32-NEXT: addi a1, a1, 257 1446; RV32-NEXT: sw a1, 0(sp) 1447; RV32-NEXT: sw a1, 4(sp) 1448; RV32-NEXT: addi a1, sp, 24 1449; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1450; RV32-NEXT: vlse64.v v0, (a1), zero 1451; RV32-NEXT: addi a1, sp, 16 1452; RV32-NEXT: vlse64.v v24, (a1), zero 1453; RV32-NEXT: addi a1, sp, 8 1454; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1455; RV32-NEXT: vand.vv v0, v16, v0 1456; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1457; RV32-NEXT: vlse64.v v16, (a1), zero 1458; RV32-NEXT: mv a1, sp 1459; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1460; RV32-NEXT: vsub.vv v8, v8, v0 1461; RV32-NEXT: vand.vv v0, v8, v24 1462; RV32-NEXT: vsrl.vi v8, v8, 2 1463; RV32-NEXT: vand.vv v8, v8, v24 1464; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1465; RV32-NEXT: vlse64.v v24, (a1), zero 1466; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1467; RV32-NEXT: vadd.vv v8, v0, v8 1468; RV32-NEXT: vsrl.vi v0, v8, 4 1469; RV32-NEXT: vadd.vv v8, v8, v0 1470; RV32-NEXT: vand.vv v8, v8, v16 1471; RV32-NEXT: vmul.vv v8, v8, v24 1472; RV32-NEXT: li a0, 56 1473; RV32-NEXT: vsrl.vx v8, v8, a0 1474; RV32-NEXT: addi sp, sp, 32 1475; RV32-NEXT: .cfi_def_cfa_offset 0 1476; RV32-NEXT: ret 1477; 1478; RV64-LABEL: vp_ctpop_v16i64_unmasked: 1479; RV64: # %bb.0: 1480; RV64-NEXT: lui a1, 349525 1481; RV64-NEXT: lui a2, 209715 1482; RV64-NEXT: lui a3, 61681 1483; RV64-NEXT: lui a4, 4112 1484; RV64-NEXT: addiw a1, a1, 1365 1485; RV64-NEXT: addiw a2, a2, 819 1486; RV64-NEXT: addiw a3, a3, -241 1487; RV64-NEXT: addiw a4, a4, 257 1488; RV64-NEXT: slli a5, a1, 32 1489; RV64-NEXT: add a1, a1, a5 1490; RV64-NEXT: slli a5, a2, 32 1491; RV64-NEXT: add a2, a2, a5 1492; RV64-NEXT: slli a5, a3, 32 1493; RV64-NEXT: add a3, a3, a5 1494; RV64-NEXT: slli a5, a4, 32 1495; RV64-NEXT: add a4, a4, a5 1496; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1497; RV64-NEXT: vsrl.vi v16, v8, 1 1498; RV64-NEXT: vand.vx v16, v16, a1 1499; RV64-NEXT: vsub.vv v8, v8, v16 1500; RV64-NEXT: vand.vx v16, v8, a2 1501; RV64-NEXT: vsrl.vi v8, v8, 2 1502; RV64-NEXT: vand.vx v8, v8, a2 1503; RV64-NEXT: vadd.vv v8, v16, v8 1504; RV64-NEXT: vsrl.vi v16, v8, 4 1505; RV64-NEXT: vadd.vv v8, v8, v16 1506; RV64-NEXT: vand.vx v8, v8, a3 1507; RV64-NEXT: vmul.vx v8, v8, a4 1508; RV64-NEXT: li a0, 56 1509; RV64-NEXT: vsrl.vx v8, v8, a0 1510; RV64-NEXT: ret 1511 %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) 1512 ret <16 x i64> %v 1513} 1514 1515declare <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64>, <32 x i1>, i32) 1516 1517define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { 1518; RV32-LABEL: vp_ctpop_v32i64: 1519; RV32: # %bb.0: 1520; RV32-NEXT: addi sp, sp, -48 1521; RV32-NEXT: .cfi_def_cfa_offset 48 1522; RV32-NEXT: csrr a1, vlenb 1523; RV32-NEXT: li a2, 48 1524; RV32-NEXT: mul a1, a1, a2 1525; RV32-NEXT: sub sp, sp, a1 1526; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb 1527; RV32-NEXT: csrr a1, vlenb 1528; RV32-NEXT: slli a1, a1, 4 1529; RV32-NEXT: add a1, sp, a1 1530; RV32-NEXT: addi a1, a1, 48 1531; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1532; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1533; RV32-NEXT: vslidedown.vi v7, v0, 2 1534; RV32-NEXT: lui a1, 349525 1535; RV32-NEXT: lui a2, 209715 1536; RV32-NEXT: addi a1, a1, 1365 1537; RV32-NEXT: sw a1, 40(sp) 1538; RV32-NEXT: sw a1, 44(sp) 1539; RV32-NEXT: lui a1, 61681 1540; RV32-NEXT: addi a2, a2, 819 1541; RV32-NEXT: sw a2, 32(sp) 1542; RV32-NEXT: sw a2, 36(sp) 1543; RV32-NEXT: lui a2, 4112 1544; RV32-NEXT: addi a1, a1, -241 1545; RV32-NEXT: sw a1, 24(sp) 1546; RV32-NEXT: sw a1, 28(sp) 1547; RV32-NEXT: li a3, 16 1548; RV32-NEXT: addi a1, a2, 257 1549; RV32-NEXT: sw a1, 16(sp) 1550; RV32-NEXT: sw a1, 20(sp) 1551; RV32-NEXT: mv a1, a0 1552; RV32-NEXT: bltu a0, a3, .LBB34_2 1553; RV32-NEXT: # %bb.1: 1554; RV32-NEXT: li a1, 16 1555; RV32-NEXT: .LBB34_2: 1556; RV32-NEXT: addi a2, sp, 40 1557; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1558; RV32-NEXT: vlse64.v v16, (a2), zero 1559; RV32-NEXT: csrr a2, vlenb 1560; RV32-NEXT: li a3, 40 1561; RV32-NEXT: mul a2, a2, a3 1562; RV32-NEXT: add a2, sp, a2 1563; RV32-NEXT: addi a2, a2, 48 1564; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1565; RV32-NEXT: addi a2, sp, 32 1566; RV32-NEXT: vlse64.v v16, (a2), zero 1567; RV32-NEXT: csrr a2, vlenb 1568; RV32-NEXT: slli a2, a2, 5 1569; RV32-NEXT: add a2, sp, a2 1570; RV32-NEXT: addi a2, a2, 48 1571; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1572; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1573; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t 1574; RV32-NEXT: csrr a2, vlenb 1575; RV32-NEXT: li a3, 40 1576; RV32-NEXT: mul a2, a2, a3 1577; RV32-NEXT: add a2, sp, a2 1578; RV32-NEXT: addi a2, a2, 48 1579; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1580; RV32-NEXT: vand.vv v16, v24, v16, v0.t 1581; RV32-NEXT: vsub.vv v8, v8, v16, v0.t 1582; RV32-NEXT: csrr a2, vlenb 1583; RV32-NEXT: slli a2, a2, 5 1584; RV32-NEXT: add a2, sp, a2 1585; RV32-NEXT: addi a2, a2, 48 1586; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1587; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1588; RV32-NEXT: csrr a2, vlenb 1589; RV32-NEXT: li a3, 24 1590; RV32-NEXT: mul a2, a2, a3 1591; RV32-NEXT: add a2, sp, a2 1592; RV32-NEXT: addi a2, a2, 48 1593; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 1594; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1595; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1596; RV32-NEXT: csrr a2, vlenb 1597; RV32-NEXT: li a3, 24 1598; RV32-NEXT: mul a2, a2, a3 1599; RV32-NEXT: add a2, sp, a2 1600; RV32-NEXT: addi a2, a2, 48 1601; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1602; RV32-NEXT: vadd.vv v8, v16, v8, v0.t 1603; RV32-NEXT: csrr a2, vlenb 1604; RV32-NEXT: slli a2, a2, 3 1605; RV32-NEXT: add a2, sp, a2 1606; RV32-NEXT: addi a2, a2, 48 1607; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1608; RV32-NEXT: addi a2, sp, 24 1609; RV32-NEXT: addi a3, sp, 16 1610; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1611; RV32-NEXT: vlse64.v v16, (a2), zero 1612; RV32-NEXT: addi a2, sp, 48 1613; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1614; RV32-NEXT: vlse64.v v8, (a3), zero 1615; RV32-NEXT: csrr a2, vlenb 1616; RV32-NEXT: li a3, 24 1617; RV32-NEXT: mul a2, a2, a3 1618; RV32-NEXT: add a2, sp, a2 1619; RV32-NEXT: addi a2, a2, 48 1620; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1621; RV32-NEXT: csrr a2, vlenb 1622; RV32-NEXT: slli a2, a2, 3 1623; RV32-NEXT: add a2, sp, a2 1624; RV32-NEXT: addi a2, a2, 48 1625; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload 1626; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1627; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t 1628; RV32-NEXT: vadd.vv v8, v8, v24, v0.t 1629; RV32-NEXT: vand.vv v16, v8, v16, v0.t 1630; RV32-NEXT: csrr a1, vlenb 1631; RV32-NEXT: li a2, 24 1632; RV32-NEXT: mul a1, a1, a2 1633; RV32-NEXT: add a1, sp, a1 1634; RV32-NEXT: addi a1, a1, 48 1635; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload 1636; RV32-NEXT: vmul.vv v8, v16, v8, v0.t 1637; RV32-NEXT: li a1, 56 1638; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t 1639; RV32-NEXT: csrr a2, vlenb 1640; RV32-NEXT: slli a2, a2, 3 1641; RV32-NEXT: add a2, sp, a2 1642; RV32-NEXT: addi a2, a2, 48 1643; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1644; RV32-NEXT: addi a2, a0, -16 1645; RV32-NEXT: sltu a0, a0, a2 1646; RV32-NEXT: addi a0, a0, -1 1647; RV32-NEXT: and a0, a0, a2 1648; RV32-NEXT: vmv1r.v v0, v7 1649; RV32-NEXT: csrr a2, vlenb 1650; RV32-NEXT: slli a2, a2, 4 1651; RV32-NEXT: add a2, sp, a2 1652; RV32-NEXT: addi a2, a2, 48 1653; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1654; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1655; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t 1656; RV32-NEXT: csrr a0, vlenb 1657; RV32-NEXT: li a2, 40 1658; RV32-NEXT: mul a0, a0, a2 1659; RV32-NEXT: add a0, sp, a0 1660; RV32-NEXT: addi a0, a0, 48 1661; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1662; RV32-NEXT: vand.vv v8, v24, v8, v0.t 1663; RV32-NEXT: vsub.vv v8, v16, v8, v0.t 1664; RV32-NEXT: csrr a0, vlenb 1665; RV32-NEXT: slli a0, a0, 5 1666; RV32-NEXT: add a0, sp, a0 1667; RV32-NEXT: addi a0, a0, 48 1668; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1669; RV32-NEXT: vand.vv v24, v8, v16, v0.t 1670; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t 1671; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1672; RV32-NEXT: vadd.vv v8, v24, v8, v0.t 1673; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t 1674; RV32-NEXT: vadd.vv v8, v8, v16, v0.t 1675; RV32-NEXT: addi a0, sp, 48 1676; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1677; RV32-NEXT: vand.vv v8, v8, v16, v0.t 1678; RV32-NEXT: csrr a0, vlenb 1679; RV32-NEXT: li a2, 24 1680; RV32-NEXT: mul a0, a0, a2 1681; RV32-NEXT: add a0, sp, a0 1682; RV32-NEXT: addi a0, a0, 48 1683; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload 1684; RV32-NEXT: vmul.vv v8, v8, v16, v0.t 1685; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t 1686; RV32-NEXT: csrr a0, vlenb 1687; RV32-NEXT: slli a0, a0, 3 1688; RV32-NEXT: add a0, sp, a0 1689; RV32-NEXT: addi a0, a0, 48 1690; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1691; RV32-NEXT: csrr a0, vlenb 1692; RV32-NEXT: li a1, 48 1693; RV32-NEXT: mul a0, a0, a1 1694; RV32-NEXT: add sp, sp, a0 1695; RV32-NEXT: .cfi_def_cfa sp, 48 1696; RV32-NEXT: addi sp, sp, 48 1697; RV32-NEXT: .cfi_def_cfa_offset 0 1698; RV32-NEXT: ret 1699; 1700; RV64-LABEL: vp_ctpop_v32i64: 1701; RV64: # %bb.0: 1702; RV64-NEXT: addi sp, sp, -16 1703; RV64-NEXT: .cfi_def_cfa_offset 16 1704; RV64-NEXT: csrr a1, vlenb 1705; RV64-NEXT: slli a1, a1, 4 1706; RV64-NEXT: sub sp, sp, a1 1707; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb 1708; RV64-NEXT: csrr a1, vlenb 1709; RV64-NEXT: slli a1, a1, 3 1710; RV64-NEXT: add a1, sp, a1 1711; RV64-NEXT: addi a1, a1, 16 1712; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill 1713; RV64-NEXT: li a2, 16 1714; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma 1715; RV64-NEXT: vslidedown.vi v24, v0, 2 1716; RV64-NEXT: mv a1, a0 1717; RV64-NEXT: bltu a0, a2, .LBB34_2 1718; RV64-NEXT: # %bb.1: 1719; RV64-NEXT: li a1, 16 1720; RV64-NEXT: .LBB34_2: 1721; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1722; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1723; RV64-NEXT: lui a1, 349525 1724; RV64-NEXT: lui a2, 209715 1725; RV64-NEXT: lui a3, 61681 1726; RV64-NEXT: lui a4, 4112 1727; RV64-NEXT: addiw a1, a1, 1365 1728; RV64-NEXT: addiw a2, a2, 819 1729; RV64-NEXT: addiw a3, a3, -241 1730; RV64-NEXT: addiw a4, a4, 257 1731; RV64-NEXT: slli a5, a1, 32 1732; RV64-NEXT: add a5, a1, a5 1733; RV64-NEXT: slli a1, a2, 32 1734; RV64-NEXT: add a6, a2, a1 1735; RV64-NEXT: slli a1, a3, 32 1736; RV64-NEXT: add a1, a3, a1 1737; RV64-NEXT: slli a2, a4, 32 1738; RV64-NEXT: add a2, a4, a2 1739; RV64-NEXT: addi a3, a0, -16 1740; RV64-NEXT: sltu a0, a0, a3 1741; RV64-NEXT: addi a0, a0, -1 1742; RV64-NEXT: and a0, a0, a3 1743; RV64-NEXT: li a3, 56 1744; RV64-NEXT: vand.vx v16, v16, a5, v0.t 1745; RV64-NEXT: vsub.vv v8, v8, v16, v0.t 1746; RV64-NEXT: vand.vx v16, v8, a6, v0.t 1747; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t 1748; RV64-NEXT: vand.vx v8, v8, a6, v0.t 1749; RV64-NEXT: vadd.vv v8, v16, v8, v0.t 1750; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1751; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1752; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1753; RV64-NEXT: vmul.vx v8, v8, a2, v0.t 1754; RV64-NEXT: vsrl.vx v8, v8, a3, v0.t 1755; RV64-NEXT: addi a4, sp, 16 1756; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill 1757; RV64-NEXT: vmv1r.v v0, v24 1758; RV64-NEXT: csrr a4, vlenb 1759; RV64-NEXT: slli a4, a4, 3 1760; RV64-NEXT: add a4, sp, a4 1761; RV64-NEXT: addi a4, a4, 16 1762; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload 1763; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1764; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t 1765; RV64-NEXT: vand.vx v16, v16, a5, v0.t 1766; RV64-NEXT: vsub.vv v16, v8, v16, v0.t 1767; RV64-NEXT: vand.vx v8, v16, a6, v0.t 1768; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t 1769; RV64-NEXT: vand.vx v16, v16, a6, v0.t 1770; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1771; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t 1772; RV64-NEXT: vadd.vv v8, v8, v16, v0.t 1773; RV64-NEXT: vand.vx v8, v8, a1, v0.t 1774; RV64-NEXT: vmul.vx v8, v8, a2, v0.t 1775; RV64-NEXT: vsrl.vx v16, v8, a3, v0.t 1776; RV64-NEXT: addi a0, sp, 16 1777; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload 1778; RV64-NEXT: csrr a0, vlenb 1779; RV64-NEXT: slli a0, a0, 4 1780; RV64-NEXT: add sp, sp, a0 1781; RV64-NEXT: .cfi_def_cfa sp, 16 1782; RV64-NEXT: addi sp, sp, 16 1783; RV64-NEXT: .cfi_def_cfa_offset 0 1784; RV64-NEXT: ret 1785 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) 1786 ret <32 x i64> %v 1787} 1788 1789define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { 1790; RV32-LABEL: vp_ctpop_v32i64_unmasked: 1791; RV32: # %bb.0: 1792; RV32-NEXT: addi sp, sp, -48 1793; RV32-NEXT: .cfi_def_cfa_offset 48 1794; RV32-NEXT: csrr a1, vlenb 1795; RV32-NEXT: li a2, 24 1796; RV32-NEXT: mul a1, a1, a2 1797; RV32-NEXT: sub sp, sp, a1 1798; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb 1799; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 1800; RV32-NEXT: vmv8r.v v24, v16 1801; RV32-NEXT: lui a1, 349525 1802; RV32-NEXT: lui a2, 209715 1803; RV32-NEXT: addi a1, a1, 1365 1804; RV32-NEXT: sw a1, 40(sp) 1805; RV32-NEXT: sw a1, 44(sp) 1806; RV32-NEXT: lui a1, 61681 1807; RV32-NEXT: addi a2, a2, 819 1808; RV32-NEXT: sw a2, 32(sp) 1809; RV32-NEXT: sw a2, 36(sp) 1810; RV32-NEXT: lui a2, 4112 1811; RV32-NEXT: addi a1, a1, -241 1812; RV32-NEXT: sw a1, 24(sp) 1813; RV32-NEXT: sw a1, 28(sp) 1814; RV32-NEXT: li a3, 16 1815; RV32-NEXT: addi a1, a2, 257 1816; RV32-NEXT: sw a1, 16(sp) 1817; RV32-NEXT: sw a1, 20(sp) 1818; RV32-NEXT: mv a1, a0 1819; RV32-NEXT: bltu a0, a3, .LBB35_2 1820; RV32-NEXT: # %bb.1: 1821; RV32-NEXT: li a1, 16 1822; RV32-NEXT: .LBB35_2: 1823; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1824; RV32-NEXT: vsrl.vi v0, v8, 1 1825; RV32-NEXT: addi a2, sp, 40 1826; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1827; RV32-NEXT: vlse64.v v16, (a2), zero 1828; RV32-NEXT: addi a2, a0, -16 1829; RV32-NEXT: sltu a0, a0, a2 1830; RV32-NEXT: addi a0, a0, -1 1831; RV32-NEXT: and a0, a0, a2 1832; RV32-NEXT: addi a2, sp, 32 1833; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1834; RV32-NEXT: vand.vv v0, v0, v16 1835; RV32-NEXT: csrr a3, vlenb 1836; RV32-NEXT: slli a3, a3, 4 1837; RV32-NEXT: add a3, sp, a3 1838; RV32-NEXT: addi a3, a3, 48 1839; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill 1840; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1841; RV32-NEXT: vlse64.v v0, (a2), zero 1842; RV32-NEXT: csrr a2, vlenb 1843; RV32-NEXT: slli a2, a2, 3 1844; RV32-NEXT: add a2, sp, a2 1845; RV32-NEXT: addi a2, a2, 48 1846; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill 1847; RV32-NEXT: vmv8r.v v8, v24 1848; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1849; RV32-NEXT: vsrl.vi v24, v24, 1 1850; RV32-NEXT: vand.vv v16, v24, v16 1851; RV32-NEXT: addi a2, sp, 48 1852; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill 1853; RV32-NEXT: csrr a2, vlenb 1854; RV32-NEXT: slli a2, a2, 4 1855; RV32-NEXT: add a2, sp, a2 1856; RV32-NEXT: addi a2, a2, 48 1857; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload 1858; RV32-NEXT: csrr a2, vlenb 1859; RV32-NEXT: slli a2, a2, 3 1860; RV32-NEXT: add a2, sp, a2 1861; RV32-NEXT: addi a2, a2, 48 1862; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1863; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1864; RV32-NEXT: vsub.vv v16, v24, v16 1865; RV32-NEXT: addi a2, sp, 48 1866; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1867; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1868; RV32-NEXT: vsub.vv v8, v8, v24 1869; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1870; RV32-NEXT: vand.vv v24, v16, v0 1871; RV32-NEXT: csrr a2, vlenb 1872; RV32-NEXT: slli a2, a2, 3 1873; RV32-NEXT: add a2, sp, a2 1874; RV32-NEXT: addi a2, a2, 48 1875; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 1876; RV32-NEXT: vsrl.vi v16, v16, 2 1877; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1878; RV32-NEXT: vand.vv v24, v8, v0 1879; RV32-NEXT: csrr a2, vlenb 1880; RV32-NEXT: slli a2, a2, 4 1881; RV32-NEXT: add a2, sp, a2 1882; RV32-NEXT: addi a2, a2, 48 1883; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill 1884; RV32-NEXT: vsrl.vi v8, v8, 2 1885; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1886; RV32-NEXT: vand.vv v16, v16, v0 1887; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1888; RV32-NEXT: vand.vv v0, v8, v0 1889; RV32-NEXT: addi a2, sp, 24 1890; RV32-NEXT: csrr a3, vlenb 1891; RV32-NEXT: slli a3, a3, 3 1892; RV32-NEXT: add a3, sp, a3 1893; RV32-NEXT: addi a3, a3, 48 1894; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload 1895; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1896; RV32-NEXT: vadd.vv v16, v8, v16 1897; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1898; RV32-NEXT: vlse64.v v8, (a2), zero 1899; RV32-NEXT: addi a2, sp, 16 1900; RV32-NEXT: csrr a3, vlenb 1901; RV32-NEXT: slli a3, a3, 4 1902; RV32-NEXT: add a3, sp, a3 1903; RV32-NEXT: addi a3, a3, 48 1904; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload 1905; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1906; RV32-NEXT: vadd.vv v24, v24, v0 1907; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1908; RV32-NEXT: vsrl.vi v0, v16, 4 1909; RV32-NEXT: vadd.vv v16, v16, v0 1910; RV32-NEXT: csrr a3, vlenb 1911; RV32-NEXT: slli a3, a3, 4 1912; RV32-NEXT: add a3, sp, a3 1913; RV32-NEXT: addi a3, a3, 48 1914; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill 1915; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma 1916; RV32-NEXT: vlse64.v v0, (a2), zero 1917; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1918; RV32-NEXT: vsrl.vi v16, v24, 4 1919; RV32-NEXT: vadd.vv v16, v24, v16 1920; RV32-NEXT: csrr a2, vlenb 1921; RV32-NEXT: slli a2, a2, 4 1922; RV32-NEXT: add a2, sp, a2 1923; RV32-NEXT: addi a2, a2, 48 1924; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload 1925; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1926; RV32-NEXT: vand.vv v24, v24, v8 1927; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1928; RV32-NEXT: vand.vv v8, v16, v8 1929; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1930; RV32-NEXT: vmul.vv v16, v24, v0 1931; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1932; RV32-NEXT: vmul.vv v24, v8, v0 1933; RV32-NEXT: li a2, 56 1934; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1935; RV32-NEXT: vsrl.vx v8, v16, a2 1936; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1937; RV32-NEXT: vsrl.vx v16, v24, a2 1938; RV32-NEXT: csrr a0, vlenb 1939; RV32-NEXT: li a1, 24 1940; RV32-NEXT: mul a0, a0, a1 1941; RV32-NEXT: add sp, sp, a0 1942; RV32-NEXT: .cfi_def_cfa sp, 48 1943; RV32-NEXT: addi sp, sp, 48 1944; RV32-NEXT: .cfi_def_cfa_offset 0 1945; RV32-NEXT: ret 1946; 1947; RV64-LABEL: vp_ctpop_v32i64_unmasked: 1948; RV64: # %bb.0: 1949; RV64-NEXT: li a2, 16 1950; RV64-NEXT: mv a1, a0 1951; RV64-NEXT: bltu a0, a2, .LBB35_2 1952; RV64-NEXT: # %bb.1: 1953; RV64-NEXT: li a1, 16 1954; RV64-NEXT: .LBB35_2: 1955; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1956; RV64-NEXT: vsrl.vi v24, v8, 1 1957; RV64-NEXT: lui a2, 349525 1958; RV64-NEXT: lui a3, 209715 1959; RV64-NEXT: lui a4, 61681 1960; RV64-NEXT: lui a5, 4112 1961; RV64-NEXT: addiw a2, a2, 1365 1962; RV64-NEXT: addiw a3, a3, 819 1963; RV64-NEXT: addiw a4, a4, -241 1964; RV64-NEXT: addiw a5, a5, 257 1965; RV64-NEXT: slli a6, a2, 32 1966; RV64-NEXT: add a2, a2, a6 1967; RV64-NEXT: slli a6, a3, 32 1968; RV64-NEXT: add a3, a3, a6 1969; RV64-NEXT: slli a6, a4, 32 1970; RV64-NEXT: add a4, a4, a6 1971; RV64-NEXT: slli a6, a5, 32 1972; RV64-NEXT: add a5, a5, a6 1973; RV64-NEXT: addi a6, a0, -16 1974; RV64-NEXT: sltu a0, a0, a6 1975; RV64-NEXT: addi a0, a0, -1 1976; RV64-NEXT: and a0, a0, a6 1977; RV64-NEXT: li a6, 56 1978; RV64-NEXT: vand.vx v24, v24, a2 1979; RV64-NEXT: vsub.vv v8, v8, v24 1980; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1981; RV64-NEXT: vsrl.vi v24, v16, 1 1982; RV64-NEXT: vand.vx v24, v24, a2 1983; RV64-NEXT: vsub.vv v16, v16, v24 1984; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1985; RV64-NEXT: vand.vx v24, v8, a3 1986; RV64-NEXT: vsrl.vi v8, v8, 2 1987; RV64-NEXT: vand.vx v8, v8, a3 1988; RV64-NEXT: vadd.vv v8, v24, v8 1989; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1990; RV64-NEXT: vand.vx v24, v16, a3 1991; RV64-NEXT: vsrl.vi v16, v16, 2 1992; RV64-NEXT: vand.vx v16, v16, a3 1993; RV64-NEXT: vadd.vv v16, v24, v16 1994; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 1995; RV64-NEXT: vsrl.vi v24, v8, 4 1996; RV64-NEXT: vadd.vv v8, v8, v24 1997; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 1998; RV64-NEXT: vsrl.vi v24, v16, 4 1999; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2000; RV64-NEXT: vand.vx v8, v8, a4 2001; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2002; RV64-NEXT: vadd.vv v16, v16, v24 2003; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2004; RV64-NEXT: vmul.vx v8, v8, a5 2005; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2006; RV64-NEXT: vand.vx v16, v16, a4 2007; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma 2008; RV64-NEXT: vsrl.vx v8, v8, a6 2009; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma 2010; RV64-NEXT: vmul.vx v16, v16, a5 2011; RV64-NEXT: vsrl.vx v16, v16, a6 2012; RV64-NEXT: ret 2013 %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl) 2014 ret <32 x i64> %v 2015} 2016