1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -O0 -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=CHECKO0 3; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON 4; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=CHECK,DOT 5; RUN: llc < %s -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE 6; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISEL 7; RUN: llc < %s -O0 -global-isel -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=GISELO0 8; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon | FileCheck %s --check-prefixes=GISEL,NEON-GISEL 9; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+neon,+dotprod | FileCheck %s --check-prefixes=GISEL,DOT-GISEL 10; RUN: llc < %s -global-isel -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=GISEL,SVE-GISEL 11 12 13; Function Attrs: nobuiltin nounwind readonly 14define i8 @popcount128(ptr nocapture nonnull readonly %0) { 15; CHECKO0-LABEL: popcount128: 16; CHECKO0: // %bb.0: // %Entry 17; CHECKO0-NEXT: ldr q0, [x0] 18; CHECKO0-NEXT: cnt v0.16b, v0.16b 19; CHECKO0-NEXT: uaddlv h0, v0.16b 20; CHECKO0-NEXT: // kill: def $q0 killed $h0 21; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0 22; CHECKO0-NEXT: fmov w0, s0 23; CHECKO0-NEXT: ret 24; 25; CHECK-LABEL: popcount128: 26; CHECK: // %bb.0: // %Entry 27; CHECK-NEXT: ldr d0, [x0] 28; CHECK-NEXT: add x8, x0, #8 29; CHECK-NEXT: ld1 { v0.d }[1], [x8] 30; CHECK-NEXT: cnt v0.16b, v0.16b 31; CHECK-NEXT: addv b0, v0.16b 32; CHECK-NEXT: fmov w0, s0 33; CHECK-NEXT: ret 34; 35; GISEL-LABEL: popcount128: 36; GISEL: // %bb.0: // %Entry 37; GISEL-NEXT: ldr q0, [x0] 38; GISEL-NEXT: cnt v0.16b, v0.16b 39; GISEL-NEXT: uaddlv h0, v0.16b 40; GISEL-NEXT: fmov w0, s0 41; GISEL-NEXT: ret 42; 43; GISELO0-LABEL: popcount128: 44; GISELO0: // %bb.0: // %Entry 45; GISELO0-NEXT: ldr q0, [x0] 46; GISELO0-NEXT: cnt v0.16b, v0.16b 47; GISELO0-NEXT: uaddlv h0, v0.16b 48; GISELO0-NEXT: // kill: def $q0 killed $h0 49; GISELO0-NEXT: // kill: def $s0 killed $s0 killed $q0 50; GISELO0-NEXT: fmov w0, s0 51; GISELO0-NEXT: ret 52Entry: 53 %1 = load i128, ptr %0, align 16 54 %2 = tail call i128 @llvm.ctpop.i128(i128 %1) 55 %3 = trunc i128 %2 to i8 56 ret i8 %3 57} 58 59; Function Attrs: nounwind readnone speculatable willreturn 60declare i128 @llvm.ctpop.i128(i128) 61 62; Function Attrs: nobuiltin nounwind readonly 63define i16 @popcount256(ptr nocapture nonnull readonly %0) { 64; CHECKO0-LABEL: popcount256: 65; CHECKO0: // %bb.0: // %Entry 66; CHECKO0-NEXT: ldr x11, [x0] 67; CHECKO0-NEXT: ldr x10, [x0, #8] 68; CHECKO0-NEXT: ldr x9, [x0, #16] 69; CHECKO0-NEXT: ldr x8, [x0, #24] 70; CHECKO0-NEXT: // implicit-def: $q1 71; CHECKO0-NEXT: mov v1.d[0], x11 72; CHECKO0-NEXT: mov v1.d[1], x10 73; CHECKO0-NEXT: // implicit-def: $q0 74; CHECKO0-NEXT: mov v0.d[0], x9 75; CHECKO0-NEXT: mov v0.d[1], x8 76; CHECKO0-NEXT: cnt v1.16b, v1.16b 77; CHECKO0-NEXT: uaddlv h1, v1.16b 78; CHECKO0-NEXT: // kill: def $q1 killed $h1 79; CHECKO0-NEXT: // kill: def $s1 killed $s1 killed $q1 80; CHECKO0-NEXT: fmov w0, s1 81; CHECKO0-NEXT: mov w10, wzr 82; CHECKO0-NEXT: mov w9, w0 83; CHECKO0-NEXT: mov w8, w10 84; CHECKO0-NEXT: bfi x9, x8, #32, #32 85; CHECKO0-NEXT: cnt v0.16b, v0.16b 86; CHECKO0-NEXT: uaddlv h0, v0.16b 87; CHECKO0-NEXT: // kill: def $q0 killed $h0 88; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0 89; CHECKO0-NEXT: fmov w0, s0 90; CHECKO0-NEXT: mov w8, w0 91; CHECKO0-NEXT: // kill: def $x10 killed $w10 92; CHECKO0-NEXT: bfi x8, x10, #32, #32 93; CHECKO0-NEXT: adds x8, x8, x9 94; CHECKO0-NEXT: mov w0, w8 95; CHECKO0-NEXT: ret 96; 97; CHECK-LABEL: popcount256: 98; CHECK: // %bb.0: // %Entry 99; CHECK-NEXT: ldr d0, [x0, #16] 100; CHECK-NEXT: ldr d1, [x0] 101; CHECK-NEXT: add x8, x0, #8 102; CHECK-NEXT: add x9, x0, #24 103; CHECK-NEXT: ld1 { v0.d }[1], [x9] 104; CHECK-NEXT: ld1 { v1.d }[1], [x8] 105; CHECK-NEXT: cnt v0.16b, v0.16b 106; CHECK-NEXT: cnt v1.16b, v1.16b 107; CHECK-NEXT: addv b0, v0.16b 108; CHECK-NEXT: addv b1, v1.16b 109; CHECK-NEXT: fmov w8, s0 110; CHECK-NEXT: fmov w9, s1 111; CHECK-NEXT: add w0, w9, w8 112; CHECK-NEXT: ret 113; 114; GISEL-LABEL: popcount256: 115; GISEL: // %bb.0: // %Entry 116; GISEL-NEXT: ldp x8, x9, [x0] 117; GISEL-NEXT: mov v0.d[0], x8 118; GISEL-NEXT: ldp x8, x10, [x0, #16] 119; GISEL-NEXT: mov v1.d[0], x8 120; GISEL-NEXT: mov v0.d[1], x9 121; GISEL-NEXT: mov v1.d[1], x10 122; GISEL-NEXT: cnt v0.16b, v0.16b 123; GISEL-NEXT: cnt v1.16b, v1.16b 124; GISEL-NEXT: uaddlv h0, v0.16b 125; GISEL-NEXT: uaddlv h1, v1.16b 126; GISEL-NEXT: mov w8, v0.s[0] 127; GISEL-NEXT: fmov w9, s1 128; GISEL-NEXT: add x0, x8, w9, uxtw 129; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0 130; GISEL-NEXT: ret 131; 132; GISELO0-LABEL: popcount256: 133; GISELO0: // %bb.0: // %Entry 134; GISELO0-NEXT: ldr x11, [x0] 135; GISELO0-NEXT: ldr x10, [x0, #8] 136; GISELO0-NEXT: ldr x9, [x0, #16] 137; GISELO0-NEXT: ldr x8, [x0, #24] 138; GISELO0-NEXT: // implicit-def: $q1 139; GISELO0-NEXT: mov v1.d[0], x11 140; GISELO0-NEXT: mov v1.d[1], x10 141; GISELO0-NEXT: // implicit-def: $q0 142; GISELO0-NEXT: mov v0.d[0], x9 143; GISELO0-NEXT: mov v0.d[1], x8 144; GISELO0-NEXT: cnt v1.16b, v1.16b 145; GISELO0-NEXT: uaddlv h1, v1.16b 146; GISELO0-NEXT: // kill: def $q1 killed $h1 147; GISELO0-NEXT: // kill: def $s1 killed $s1 killed $q1 148; GISELO0-NEXT: fmov w0, s1 149; GISELO0-NEXT: mov w10, wzr 150; GISELO0-NEXT: mov w9, w0 151; GISELO0-NEXT: mov w8, w10 152; GISELO0-NEXT: bfi x9, x8, #32, #32 153; GISELO0-NEXT: cnt v0.16b, v0.16b 154; GISELO0-NEXT: uaddlv h0, v0.16b 155; GISELO0-NEXT: // kill: def $q0 killed $h0 156; GISELO0-NEXT: // kill: def $s0 killed $s0 killed $q0 157; GISELO0-NEXT: fmov w0, s0 158; GISELO0-NEXT: mov w8, w0 159; GISELO0-NEXT: // kill: def $x10 killed $w10 160; GISELO0-NEXT: bfi x8, x10, #32, #32 161; GISELO0-NEXT: adds x8, x8, x9 162; GISELO0-NEXT: mov w0, w8 163; GISELO0-NEXT: ret 164Entry: 165 %1 = load i256, ptr %0, align 16 166 %2 = tail call i256 @llvm.ctpop.i256(i256 %1) 167 %3 = trunc i256 %2 to i16 168 ret i16 %3 169} 170 171; Function Attrs: nounwind readnone speculatable willreturn 172declare i256 @llvm.ctpop.i256(i256) 173 174define <1 x i128> @popcount1x128(<1 x i128> %0) { 175; CHECKO0-LABEL: popcount1x128: 176; CHECKO0: // %bb.0: // %Entry 177; CHECKO0-NEXT: // implicit-def: $q0 178; CHECKO0-NEXT: mov v0.d[0], x0 179; CHECKO0-NEXT: mov v0.d[1], x1 180; CHECKO0-NEXT: cnt v0.16b, v0.16b 181; CHECKO0-NEXT: uaddlv h0, v0.16b 182; CHECKO0-NEXT: // kill: def $q0 killed $h0 183; CHECKO0-NEXT: mov x1, xzr 184; CHECKO0-NEXT: // kill: def $s0 killed $s0 killed $q0 185; CHECKO0-NEXT: fmov w0, s0 186; CHECKO0-NEXT: mov w8, wzr 187; CHECKO0-NEXT: // kill: def $x0 killed $w0 188; CHECKO0-NEXT: // kill: def $x8 killed $w8 189; CHECKO0-NEXT: bfi x0, x8, #32, #32 190; CHECKO0-NEXT: ret 191; 192; CHECK-LABEL: popcount1x128: 193; CHECK: // %bb.0: // %Entry 194; CHECK-NEXT: fmov d0, x0 195; CHECK-NEXT: mov v0.d[1], x1 196; CHECK-NEXT: cnt v0.16b, v0.16b 197; CHECK-NEXT: addv b0, v0.16b 198; CHECK-NEXT: mov x1, v0.d[1] 199; CHECK-NEXT: fmov x0, d0 200; CHECK-NEXT: ret 201; 202; GISEL-LABEL: popcount1x128: 203; GISEL: // %bb.0: // %Entry 204; GISEL-NEXT: mov v0.d[0], x0 205; GISEL-NEXT: mov v0.d[1], x1 206; GISEL-NEXT: mov x1, xzr 207; GISEL-NEXT: cnt v0.16b, v0.16b 208; GISEL-NEXT: uaddlv h0, v0.16b 209; GISEL-NEXT: mov w0, v0.s[0] 210; GISEL-NEXT: ret 211; 212; GISELO0-LABEL: popcount1x128: 213; GISELO0: // %bb.0: // %Entry 214; GISELO0-NEXT: // implicit-def: $q0 215; GISELO0-NEXT: mov v0.d[0], x0 216; GISELO0-NEXT: mov v0.d[1], x1 217; GISELO0-NEXT: cnt v0.16b, v0.16b 218; GISELO0-NEXT: uaddlv h0, v0.16b 219; GISELO0-NEXT: // kill: def $q0 killed $h0 220; GISELO0-NEXT: mov x1, xzr 221; GISELO0-NEXT: // kill: def $s0 killed $s0 killed $q0 222; GISELO0-NEXT: fmov w0, s0 223; GISELO0-NEXT: mov w8, wzr 224; GISELO0-NEXT: // kill: def $x0 killed $w0 225; GISELO0-NEXT: // kill: def $x8 killed $w8 226; GISELO0-NEXT: bfi x0, x8, #32, #32 227; GISELO0-NEXT: ret 228Entry: 229 %1 = tail call <1 x i128> @llvm.ctpop.v1i128(<1 x i128> %0) 230 ret <1 x i128> %1 231} 232 233declare <1 x i128> @llvm.ctpop.v1i128(<1 x i128>) 234 235define <2 x i64> @popcount2x64(<2 x i64> %0) { 236; CHECKO0-LABEL: popcount2x64: 237; CHECKO0: // %bb.0: // %Entry 238; CHECKO0-NEXT: cnt v0.16b, v0.16b 239; CHECKO0-NEXT: uaddlp v0.8h, v0.16b 240; CHECKO0-NEXT: uaddlp v0.4s, v0.8h 241; CHECKO0-NEXT: uaddlp v0.2d, v0.4s 242; CHECKO0-NEXT: ret 243; 244; NEON-LABEL: popcount2x64: 245; NEON: // %bb.0: // %Entry 246; NEON-NEXT: cnt v0.16b, v0.16b 247; NEON-NEXT: uaddlp v0.8h, v0.16b 248; NEON-NEXT: uaddlp v0.4s, v0.8h 249; NEON-NEXT: uaddlp v0.2d, v0.4s 250; NEON-NEXT: ret 251; 252; DOT-LABEL: popcount2x64: 253; DOT: // %bb.0: // %Entry 254; DOT-NEXT: movi v1.16b, #1 255; DOT-NEXT: cnt v0.16b, v0.16b 256; DOT-NEXT: movi v2.2d, #0000000000000000 257; DOT-NEXT: udot v2.4s, v1.16b, v0.16b 258; DOT-NEXT: uaddlp v0.2d, v2.4s 259; DOT-NEXT: ret 260; 261; SVE-LABEL: popcount2x64: 262; SVE: // %bb.0: // %Entry 263; SVE-NEXT: cnt v0.16b, v0.16b 264; SVE-NEXT: uaddlp v0.8h, v0.16b 265; SVE-NEXT: uaddlp v0.4s, v0.8h 266; SVE-NEXT: uaddlp v0.2d, v0.4s 267; SVE-NEXT: ret 268; 269; GISELO0-LABEL: popcount2x64: 270; GISELO0: // %bb.0: // %Entry 271; GISELO0-NEXT: cnt v0.16b, v0.16b 272; GISELO0-NEXT: uaddlp v0.8h, v0.16b 273; GISELO0-NEXT: uaddlp v0.4s, v0.8h 274; GISELO0-NEXT: uaddlp v0.2d, v0.4s 275; GISELO0-NEXT: ret 276; 277; NEON-GISEL-LABEL: popcount2x64: 278; NEON-GISEL: // %bb.0: // %Entry 279; NEON-GISEL-NEXT: cnt v0.16b, v0.16b 280; NEON-GISEL-NEXT: uaddlp v0.8h, v0.16b 281; NEON-GISEL-NEXT: uaddlp v0.4s, v0.8h 282; NEON-GISEL-NEXT: uaddlp v0.2d, v0.4s 283; NEON-GISEL-NEXT: ret 284; 285; DOT-GISEL-LABEL: popcount2x64: 286; DOT-GISEL: // %bb.0: // %Entry 287; DOT-GISEL-NEXT: movi v1.2d, #0000000000000000 288; DOT-GISEL-NEXT: cnt v0.16b, v0.16b 289; DOT-GISEL-NEXT: movi v2.16b, #1 290; DOT-GISEL-NEXT: udot v1.4s, v2.16b, v0.16b 291; DOT-GISEL-NEXT: uaddlp v0.2d, v1.4s 292; DOT-GISEL-NEXT: ret 293; 294; SVE-GISEL-LABEL: popcount2x64: 295; SVE-GISEL: // %bb.0: // %Entry 296; SVE-GISEL-NEXT: cnt v0.16b, v0.16b 297; SVE-GISEL-NEXT: uaddlp v0.8h, v0.16b 298; SVE-GISEL-NEXT: uaddlp v0.4s, v0.8h 299; SVE-GISEL-NEXT: uaddlp v0.2d, v0.4s 300; SVE-GISEL-NEXT: ret 301Entry: 302 %1 = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %0) 303 ret <2 x i64> %1 304} 305 306declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 307 308define <1 x i64> @popcount1x64(<1 x i64> %0) { 309; CHECKO0-LABEL: popcount1x64: 310; CHECKO0: // %bb.0: // %Entry 311; CHECKO0-NEXT: fmov x0, d0 312; CHECKO0-NEXT: fmov d0, x0 313; CHECKO0-NEXT: cnt v0.8b, v0.8b 314; CHECKO0-NEXT: uaddlv h0, v0.8b 315; CHECKO0-NEXT: // kill: def $q0 killed $h0 316; CHECKO0-NEXT: mov w8, v0.s[0] 317; CHECKO0-NEXT: // kill: def $x8 killed $w8 318; CHECKO0-NEXT: fmov d0, x8 319; CHECKO0-NEXT: ret 320; 321; CHECK-LABEL: popcount1x64: 322; CHECK: // %bb.0: // %Entry 323; CHECK-NEXT: cnt v0.8b, v0.8b 324; CHECK-NEXT: uaddlp v0.4h, v0.8b 325; CHECK-NEXT: uaddlp v0.2s, v0.4h 326; CHECK-NEXT: uaddlp v0.1d, v0.2s 327; CHECK-NEXT: ret 328; 329; GISEL-LABEL: popcount1x64: 330; GISEL: // %bb.0: // %Entry 331; GISEL-NEXT: cnt v0.8b, v0.8b 332; GISEL-NEXT: uaddlv h0, v0.8b 333; GISEL-NEXT: mov w8, v0.s[0] 334; GISEL-NEXT: fmov d0, x8 335; GISEL-NEXT: ret 336; 337; GISELO0-LABEL: popcount1x64: 338; GISELO0: // %bb.0: // %Entry 339; GISELO0-NEXT: fmov x0, d0 340; GISELO0-NEXT: fmov d0, x0 341; GISELO0-NEXT: cnt v0.8b, v0.8b 342; GISELO0-NEXT: uaddlv h0, v0.8b 343; GISELO0-NEXT: // kill: def $q0 killed $h0 344; GISELO0-NEXT: mov w8, v0.s[0] 345; GISELO0-NEXT: // kill: def $x8 killed $w8 346; GISELO0-NEXT: fmov d0, x8 347; GISELO0-NEXT: ret 348Entry: 349 %1 = tail call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %0) 350 ret <1 x i64> %1 351} 352 353declare <1 x i64> @llvm.ctpop.v1i64(<1 x i64>) 354 355define <4 x i32> @popcount4x32(<4 x i32> %0) { 356; CHECKO0-LABEL: popcount4x32: 357; CHECKO0: // %bb.0: // %Entry 358; CHECKO0-NEXT: cnt v0.16b, v0.16b 359; CHECKO0-NEXT: uaddlp v0.8h, v0.16b 360; CHECKO0-NEXT: uaddlp v0.4s, v0.8h 361; CHECKO0-NEXT: ret 362; 363; NEON-LABEL: popcount4x32: 364; NEON: // %bb.0: // %Entry 365; NEON-NEXT: cnt v0.16b, v0.16b 366; NEON-NEXT: uaddlp v0.8h, v0.16b 367; NEON-NEXT: uaddlp v0.4s, v0.8h 368; NEON-NEXT: ret 369; 370; DOT-LABEL: popcount4x32: 371; DOT: // %bb.0: // %Entry 372; DOT-NEXT: movi v1.16b, #1 373; DOT-NEXT: cnt v2.16b, v0.16b 374; DOT-NEXT: movi v0.2d, #0000000000000000 375; DOT-NEXT: udot v0.4s, v1.16b, v2.16b 376; DOT-NEXT: ret 377; 378; SVE-LABEL: popcount4x32: 379; SVE: // %bb.0: // %Entry 380; SVE-NEXT: cnt v0.16b, v0.16b 381; SVE-NEXT: uaddlp v0.8h, v0.16b 382; SVE-NEXT: uaddlp v0.4s, v0.8h 383; SVE-NEXT: ret 384; 385; GISELO0-LABEL: popcount4x32: 386; GISELO0: // %bb.0: // %Entry 387; GISELO0-NEXT: cnt v0.16b, v0.16b 388; GISELO0-NEXT: uaddlp v0.8h, v0.16b 389; GISELO0-NEXT: uaddlp v0.4s, v0.8h 390; GISELO0-NEXT: ret 391; 392; NEON-GISEL-LABEL: popcount4x32: 393; NEON-GISEL: // %bb.0: // %Entry 394; NEON-GISEL-NEXT: cnt v0.16b, v0.16b 395; NEON-GISEL-NEXT: uaddlp v0.8h, v0.16b 396; NEON-GISEL-NEXT: uaddlp v0.4s, v0.8h 397; NEON-GISEL-NEXT: ret 398; 399; DOT-GISEL-LABEL: popcount4x32: 400; DOT-GISEL: // %bb.0: // %Entry 401; DOT-GISEL-NEXT: movi v1.2d, #0000000000000000 402; DOT-GISEL-NEXT: cnt v0.16b, v0.16b 403; DOT-GISEL-NEXT: movi v2.16b, #1 404; DOT-GISEL-NEXT: udot v1.4s, v2.16b, v0.16b 405; DOT-GISEL-NEXT: mov v0.16b, v1.16b 406; DOT-GISEL-NEXT: ret 407; 408; SVE-GISEL-LABEL: popcount4x32: 409; SVE-GISEL: // %bb.0: // %Entry 410; SVE-GISEL-NEXT: cnt v0.16b, v0.16b 411; SVE-GISEL-NEXT: uaddlp v0.8h, v0.16b 412; SVE-GISEL-NEXT: uaddlp v0.4s, v0.8h 413; SVE-GISEL-NEXT: ret 414Entry: 415 %1 = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %0) 416 ret <4 x i32> %1 417} 418 419declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 420 421define <2 x i32> @popcount2x32(<2 x i32> %0) { 422; CHECKO0-LABEL: popcount2x32: 423; CHECKO0: // %bb.0: // %Entry 424; CHECKO0-NEXT: cnt v0.8b, v0.8b 425; CHECKO0-NEXT: uaddlp v0.4h, v0.8b 426; CHECKO0-NEXT: uaddlp v0.2s, v0.4h 427; CHECKO0-NEXT: ret 428; 429; NEON-LABEL: popcount2x32: 430; NEON: // %bb.0: // %Entry 431; NEON-NEXT: cnt v0.8b, v0.8b 432; NEON-NEXT: uaddlp v0.4h, v0.8b 433; NEON-NEXT: uaddlp v0.2s, v0.4h 434; NEON-NEXT: ret 435; 436; DOT-LABEL: popcount2x32: 437; DOT: // %bb.0: // %Entry 438; DOT-NEXT: movi v1.2d, #0000000000000000 439; DOT-NEXT: cnt v0.8b, v0.8b 440; DOT-NEXT: movi v2.8b, #1 441; DOT-NEXT: udot v1.2s, v2.8b, v0.8b 442; DOT-NEXT: fmov d0, d1 443; DOT-NEXT: ret 444; 445; SVE-LABEL: popcount2x32: 446; SVE: // %bb.0: // %Entry 447; SVE-NEXT: cnt v0.8b, v0.8b 448; SVE-NEXT: uaddlp v0.4h, v0.8b 449; SVE-NEXT: uaddlp v0.2s, v0.4h 450; SVE-NEXT: ret 451; 452; GISELO0-LABEL: popcount2x32: 453; GISELO0: // %bb.0: // %Entry 454; GISELO0-NEXT: cnt v0.8b, v0.8b 455; GISELO0-NEXT: uaddlp v0.4h, v0.8b 456; GISELO0-NEXT: uaddlp v0.2s, v0.4h 457; GISELO0-NEXT: ret 458; 459; NEON-GISEL-LABEL: popcount2x32: 460; NEON-GISEL: // %bb.0: // %Entry 461; NEON-GISEL-NEXT: cnt v0.8b, v0.8b 462; NEON-GISEL-NEXT: uaddlp v0.4h, v0.8b 463; NEON-GISEL-NEXT: uaddlp v0.2s, v0.4h 464; NEON-GISEL-NEXT: ret 465; 466; DOT-GISEL-LABEL: popcount2x32: 467; DOT-GISEL: // %bb.0: // %Entry 468; DOT-GISEL-NEXT: movi v1.2d, #0000000000000000 469; DOT-GISEL-NEXT: cnt v0.8b, v0.8b 470; DOT-GISEL-NEXT: movi v2.8b, #1 471; DOT-GISEL-NEXT: udot v1.2s, v2.8b, v0.8b 472; DOT-GISEL-NEXT: fmov d0, d1 473; DOT-GISEL-NEXT: ret 474; 475; SVE-GISEL-LABEL: popcount2x32: 476; SVE-GISEL: // %bb.0: // %Entry 477; SVE-GISEL-NEXT: cnt v0.8b, v0.8b 478; SVE-GISEL-NEXT: uaddlp v0.4h, v0.8b 479; SVE-GISEL-NEXT: uaddlp v0.2s, v0.4h 480; SVE-GISEL-NEXT: ret 481Entry: 482 %1 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %0) 483 ret <2 x i32> %1 484} 485 486declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) 487 488define <8 x i16> @popcount8x16(<8 x i16> %0) { 489; CHECKO0-LABEL: popcount8x16: 490; CHECKO0: // %bb.0: // %Entry 491; CHECKO0-NEXT: cnt v0.16b, v0.16b 492; CHECKO0-NEXT: uaddlp v0.8h, v0.16b 493; CHECKO0-NEXT: ret 494; 495; CHECK-LABEL: popcount8x16: 496; CHECK: // %bb.0: // %Entry 497; CHECK-NEXT: cnt v0.16b, v0.16b 498; CHECK-NEXT: uaddlp v0.8h, v0.16b 499; CHECK-NEXT: ret 500; 501; GISEL-LABEL: popcount8x16: 502; GISEL: // %bb.0: // %Entry 503; GISEL-NEXT: cnt v0.16b, v0.16b 504; GISEL-NEXT: uaddlp v0.8h, v0.16b 505; GISEL-NEXT: ret 506; 507; GISELO0-LABEL: popcount8x16: 508; GISELO0: // %bb.0: // %Entry 509; GISELO0-NEXT: cnt v0.16b, v0.16b 510; GISELO0-NEXT: uaddlp v0.8h, v0.16b 511; GISELO0-NEXT: ret 512Entry: 513 %1 = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %0) 514 ret <8 x i16> %1 515} 516 517declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) 518 519define <4 x i16> @popcount4x16(<4 x i16> %0) { 520; CHECKO0-LABEL: popcount4x16: 521; CHECKO0: // %bb.0: // %Entry 522; CHECKO0-NEXT: cnt v0.8b, v0.8b 523; CHECKO0-NEXT: uaddlp v0.4h, v0.8b 524; CHECKO0-NEXT: ret 525; 526; CHECK-LABEL: popcount4x16: 527; CHECK: // %bb.0: // %Entry 528; CHECK-NEXT: cnt v0.8b, v0.8b 529; CHECK-NEXT: uaddlp v0.4h, v0.8b 530; CHECK-NEXT: ret 531; 532; GISEL-LABEL: popcount4x16: 533; GISEL: // %bb.0: // %Entry 534; GISEL-NEXT: cnt v0.8b, v0.8b 535; GISEL-NEXT: uaddlp v0.4h, v0.8b 536; GISEL-NEXT: ret 537; 538; GISELO0-LABEL: popcount4x16: 539; GISELO0: // %bb.0: // %Entry 540; GISELO0-NEXT: cnt v0.8b, v0.8b 541; GISELO0-NEXT: uaddlp v0.4h, v0.8b 542; GISELO0-NEXT: ret 543Entry: 544 %1 = tail call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %0) 545 ret <4 x i16> %1 546} 547 548declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) 549