1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 3; RUN: llc < %s -mtriple=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s 4; RUN: llc < %s -mtriple=aarch64 -mattr +cssc -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-CSSC %s 5 6define i32 @cnt32_advsimd(i32 %x) nounwind readnone { 7; CHECK-LABEL: cnt32_advsimd: 8; CHECK: // %bb.0: 9; CHECK-NEXT: fmov s0, w0 10; CHECK-NEXT: cnt.8b v0, v0 11; CHECK-NEXT: addv.8b b0, v0 12; CHECK-NEXT: fmov w0, s0 13; CHECK-NEXT: ret 14; 15; CHECK-NONEON-LABEL: cnt32_advsimd: 16; CHECK-NONEON: // %bb.0: 17; CHECK-NONEON-NEXT: lsr w9, w0, #1 18; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101 19; CHECK-NONEON-NEXT: and w9, w9, #0x55555555 20; CHECK-NONEON-NEXT: sub w9, w0, w9 21; CHECK-NONEON-NEXT: lsr w10, w9, #2 22; CHECK-NONEON-NEXT: and w9, w9, #0x33333333 23; CHECK-NONEON-NEXT: and w10, w10, #0x33333333 24; CHECK-NONEON-NEXT: add w9, w9, w10 25; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4 26; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f 27; CHECK-NONEON-NEXT: mul w8, w9, w8 28; CHECK-NONEON-NEXT: lsr w0, w8, #24 29; CHECK-NONEON-NEXT: ret 30; 31; CHECK-CSSC-LABEL: cnt32_advsimd: 32; CHECK-CSSC: // %bb.0: 33; CHECK-CSSC-NEXT: cnt w0, w0 34; CHECK-CSSC-NEXT: ret 35 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) 36 ret i32 %cnt 37} 38 39define i32 @cnt32_advsimd_2(<2 x i32> %x) { 40; CHECK-LABEL: cnt32_advsimd_2: 41; CHECK: // %bb.0: 42; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 43; CHECK-NEXT: fmov w8, s0 44; CHECK-NEXT: fmov s0, w8 45; CHECK-NEXT: cnt.8b v0, v0 46; CHECK-NEXT: addv.8b b0, v0 47; CHECK-NEXT: fmov w0, s0 48; CHECK-NEXT: ret 49; 50; CHECK-NONEON-LABEL: cnt32_advsimd_2: 51; CHECK-NONEON: // %bb.0: 52; CHECK-NONEON-NEXT: lsr w9, w0, #1 53; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101 54; CHECK-NONEON-NEXT: and w9, w9, #0x55555555 55; CHECK-NONEON-NEXT: sub w9, w0, w9 56; CHECK-NONEON-NEXT: lsr w10, w9, #2 57; CHECK-NONEON-NEXT: and w9, w9, #0x33333333 58; CHECK-NONEON-NEXT: and w10, w10, #0x33333333 59; CHECK-NONEON-NEXT: add w9, w9, w10 60; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4 61; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f 62; CHECK-NONEON-NEXT: mul w8, w9, w8 63; CHECK-NONEON-NEXT: lsr w0, w8, #24 64; CHECK-NONEON-NEXT: ret 65; 66; CHECK-CSSC-LABEL: cnt32_advsimd_2: 67; CHECK-CSSC: // %bb.0: 68; CHECK-CSSC-NEXT: // kill: def $d0 killed $d0 def $q0 69; CHECK-CSSC-NEXT: fmov w8, s0 70; CHECK-CSSC-NEXT: cnt w0, w8 71; CHECK-CSSC-NEXT: ret 72 %1 = extractelement <2 x i32> %x, i64 0 73 %2 = tail call i32 @llvm.ctpop.i32(i32 %1) 74 ret i32 %2 75} 76 77define i64 @cnt64_advsimd(i64 %x) nounwind readnone { 78; CHECK-LABEL: cnt64_advsimd: 79; CHECK: // %bb.0: 80; CHECK-NEXT: fmov d0, x0 81; CHECK-NEXT: cnt.8b v0, v0 82; CHECK-NEXT: addv.8b b0, v0 83; CHECK-NEXT: fmov x0, d0 84; CHECK-NEXT: ret 85; 86; CHECK-NONEON-LABEL: cnt64_advsimd: 87; CHECK-NONEON: // %bb.0: 88; CHECK-NONEON-NEXT: lsr x9, x0, #1 89; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101 90; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555 91; CHECK-NONEON-NEXT: sub x9, x0, x9 92; CHECK-NONEON-NEXT: lsr x10, x9, #2 93; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333 94; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333 95; CHECK-NONEON-NEXT: add x9, x9, x10 96; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4 97; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f 98; CHECK-NONEON-NEXT: mul x8, x9, x8 99; CHECK-NONEON-NEXT: lsr x0, x8, #56 100; CHECK-NONEON-NEXT: ret 101; 102; CHECK-CSSC-LABEL: cnt64_advsimd: 103; CHECK-CSSC: // %bb.0: 104; CHECK-CSSC-NEXT: cnt x0, x0 105; CHECK-CSSC-NEXT: ret 106 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) 107 ret i64 %cnt 108} 109 110; Do not use AdvSIMD when -mno-implicit-float is specified. 111; rdar://9473858 112 113define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat { 114; CHECK-LABEL: cnt32: 115; CHECK: // %bb.0: 116; CHECK-NEXT: lsr w9, w0, #1 117; CHECK-NEXT: mov w8, #16843009 // =0x1010101 118; CHECK-NEXT: and w9, w9, #0x55555555 119; CHECK-NEXT: sub w9, w0, w9 120; CHECK-NEXT: lsr w10, w9, #2 121; CHECK-NEXT: and w9, w9, #0x33333333 122; CHECK-NEXT: and w10, w10, #0x33333333 123; CHECK-NEXT: add w9, w9, w10 124; CHECK-NEXT: add w9, w9, w9, lsr #4 125; CHECK-NEXT: and w9, w9, #0xf0f0f0f 126; CHECK-NEXT: mul w8, w9, w8 127; CHECK-NEXT: lsr w0, w8, #24 128; CHECK-NEXT: ret 129; 130; CHECK-NONEON-LABEL: cnt32: 131; CHECK-NONEON: // %bb.0: 132; CHECK-NONEON-NEXT: lsr w9, w0, #1 133; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101 134; CHECK-NONEON-NEXT: and w9, w9, #0x55555555 135; CHECK-NONEON-NEXT: sub w9, w0, w9 136; CHECK-NONEON-NEXT: lsr w10, w9, #2 137; CHECK-NONEON-NEXT: and w9, w9, #0x33333333 138; CHECK-NONEON-NEXT: and w10, w10, #0x33333333 139; CHECK-NONEON-NEXT: add w9, w9, w10 140; CHECK-NONEON-NEXT: add w9, w9, w9, lsr #4 141; CHECK-NONEON-NEXT: and w9, w9, #0xf0f0f0f 142; CHECK-NONEON-NEXT: mul w8, w9, w8 143; CHECK-NONEON-NEXT: lsr w0, w8, #24 144; CHECK-NONEON-NEXT: ret 145; 146; CHECK-CSSC-LABEL: cnt32: 147; CHECK-CSSC: // %bb.0: 148; CHECK-CSSC-NEXT: cnt w0, w0 149; CHECK-CSSC-NEXT: ret 150 %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) 151 ret i32 %cnt 152} 153 154define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat { 155; CHECK-LABEL: cnt64: 156; CHECK: // %bb.0: 157; CHECK-NEXT: lsr x9, x0, #1 158; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101 159; CHECK-NEXT: and x9, x9, #0x5555555555555555 160; CHECK-NEXT: sub x9, x0, x9 161; CHECK-NEXT: lsr x10, x9, #2 162; CHECK-NEXT: and x9, x9, #0x3333333333333333 163; CHECK-NEXT: and x10, x10, #0x3333333333333333 164; CHECK-NEXT: add x9, x9, x10 165; CHECK-NEXT: add x9, x9, x9, lsr #4 166; CHECK-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f 167; CHECK-NEXT: mul x8, x9, x8 168; CHECK-NEXT: lsr x0, x8, #56 169; CHECK-NEXT: ret 170; 171; CHECK-NONEON-LABEL: cnt64: 172; CHECK-NONEON: // %bb.0: 173; CHECK-NONEON-NEXT: lsr x9, x0, #1 174; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101 175; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555 176; CHECK-NONEON-NEXT: sub x9, x0, x9 177; CHECK-NONEON-NEXT: lsr x10, x9, #2 178; CHECK-NONEON-NEXT: and x9, x9, #0x3333333333333333 179; CHECK-NONEON-NEXT: and x10, x10, #0x3333333333333333 180; CHECK-NONEON-NEXT: add x9, x9, x10 181; CHECK-NONEON-NEXT: add x9, x9, x9, lsr #4 182; CHECK-NONEON-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f 183; CHECK-NONEON-NEXT: mul x8, x9, x8 184; CHECK-NONEON-NEXT: lsr x0, x8, #56 185; CHECK-NONEON-NEXT: ret 186; 187; CHECK-CSSC-LABEL: cnt64: 188; CHECK-CSSC: // %bb.0: 189; CHECK-CSSC-NEXT: cnt x0, x0 190; CHECK-CSSC-NEXT: ret 191 %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) 192 ret i64 %cnt 193} 194 195define i32 @ctpop_eq_one(i64 %x) nounwind readnone { 196; CHECK-LABEL: ctpop_eq_one: 197; CHECK: // %bb.0: 198; CHECK-NEXT: sub x8, x0, #1 199; CHECK-NEXT: eor x9, x0, x8 200; CHECK-NEXT: cmp x9, x8 201; CHECK-NEXT: cset w0, hi 202; CHECK-NEXT: ret 203; 204; CHECK-NONEON-LABEL: ctpop_eq_one: 205; CHECK-NONEON: // %bb.0: 206; CHECK-NONEON-NEXT: sub x8, x0, #1 207; CHECK-NONEON-NEXT: eor x9, x0, x8 208; CHECK-NONEON-NEXT: cmp x9, x8 209; CHECK-NONEON-NEXT: cset w0, hi 210; CHECK-NONEON-NEXT: ret 211; 212; CHECK-CSSC-LABEL: ctpop_eq_one: 213; CHECK-CSSC: // %bb.0: 214; CHECK-CSSC-NEXT: cnt x8, x0 215; CHECK-CSSC-NEXT: cmp x8, #1 216; CHECK-CSSC-NEXT: cset w0, eq 217; CHECK-CSSC-NEXT: ret 218 %count = tail call i64 @llvm.ctpop.i64(i64 %x) 219 %cmp = icmp eq i64 %count, 1 220 %conv = zext i1 %cmp to i32 221 ret i32 %conv 222} 223 224define i32 @ctpop_ne_one(i64 %x) nounwind readnone { 225; CHECK-LABEL: ctpop_ne_one: 226; CHECK: // %bb.0: 227; CHECK-NEXT: sub x8, x0, #1 228; CHECK-NEXT: eor x9, x0, x8 229; CHECK-NEXT: cmp x9, x8 230; CHECK-NEXT: cset w0, ls 231; CHECK-NEXT: ret 232; 233; CHECK-NONEON-LABEL: ctpop_ne_one: 234; CHECK-NONEON: // %bb.0: 235; CHECK-NONEON-NEXT: sub x8, x0, #1 236; CHECK-NONEON-NEXT: eor x9, x0, x8 237; CHECK-NONEON-NEXT: cmp x9, x8 238; CHECK-NONEON-NEXT: cset w0, ls 239; CHECK-NONEON-NEXT: ret 240; 241; CHECK-CSSC-LABEL: ctpop_ne_one: 242; CHECK-CSSC: // %bb.0: 243; CHECK-CSSC-NEXT: cnt x8, x0 244; CHECK-CSSC-NEXT: cmp x8, #1 245; CHECK-CSSC-NEXT: cset w0, ne 246; CHECK-CSSC-NEXT: ret 247 %count = tail call i64 @llvm.ctpop.i64(i64 %x) 248 %cmp = icmp ne i64 %count, 1 249 %conv = zext i1 %cmp to i32 250 ret i32 %conv 251} 252 253define i1 @ctpop32_ne_one(i32 %x) nounwind readnone { 254; CHECK-LABEL: ctpop32_ne_one: 255; CHECK: // %bb.0: 256; CHECK-NEXT: sub w8, w0, #1 257; CHECK-NEXT: eor w9, w0, w8 258; CHECK-NEXT: cmp w9, w8 259; CHECK-NEXT: cset w0, ls 260; CHECK-NEXT: ret 261; 262; CHECK-NONEON-LABEL: ctpop32_ne_one: 263; CHECK-NONEON: // %bb.0: 264; CHECK-NONEON-NEXT: sub w8, w0, #1 265; CHECK-NONEON-NEXT: eor w9, w0, w8 266; CHECK-NONEON-NEXT: cmp w9, w8 267; CHECK-NONEON-NEXT: cset w0, ls 268; CHECK-NONEON-NEXT: ret 269; 270; CHECK-CSSC-LABEL: ctpop32_ne_one: 271; CHECK-CSSC: // %bb.0: 272; CHECK-CSSC-NEXT: cnt w8, w0 273; CHECK-CSSC-NEXT: cmp w8, #1 274; CHECK-CSSC-NEXT: cset w0, ne 275; CHECK-CSSC-NEXT: ret 276 %count = tail call i32 @llvm.ctpop.i32(i32 %x) 277 %cmp = icmp ne i32 %count, 1 278 ret i1 %cmp 279} 280 281define i1 @ctpop32_eq_one_nonzero(i32 %x) { 282; CHECK-LABEL: ctpop32_eq_one_nonzero: 283; CHECK: // %bb.0: // %entry 284; CHECK-NEXT: sub w8, w0, #1 285; CHECK-NEXT: tst w0, w8 286; CHECK-NEXT: cset w0, eq 287; CHECK-NEXT: ret 288; 289; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero: 290; CHECK-NONEON: // %bb.0: // %entry 291; CHECK-NONEON-NEXT: sub w8, w0, #1 292; CHECK-NONEON-NEXT: tst w0, w8 293; CHECK-NONEON-NEXT: cset w0, eq 294; CHECK-NONEON-NEXT: ret 295; 296; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero: 297; CHECK-CSSC: // %bb.0: // %entry 298; CHECK-CSSC-NEXT: sub w8, w0, #1 299; CHECK-CSSC-NEXT: tst w0, w8 300; CHECK-CSSC-NEXT: cset w0, eq 301; CHECK-CSSC-NEXT: ret 302entry: 303 %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) 304 %cmp = icmp eq i32 %popcnt, 1 305 ret i1 %cmp 306} 307 308define i1 @ctpop32_ne_one_nonzero(i32 %x) { 309; CHECK-LABEL: ctpop32_ne_one_nonzero: 310; CHECK: // %bb.0: // %entry 311; CHECK-NEXT: sub w8, w0, #1 312; CHECK-NEXT: tst w0, w8 313; CHECK-NEXT: cset w0, ne 314; CHECK-NEXT: ret 315; 316; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero: 317; CHECK-NONEON: // %bb.0: // %entry 318; CHECK-NONEON-NEXT: sub w8, w0, #1 319; CHECK-NONEON-NEXT: tst w0, w8 320; CHECK-NONEON-NEXT: cset w0, ne 321; CHECK-NONEON-NEXT: ret 322; 323; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero: 324; CHECK-CSSC: // %bb.0: // %entry 325; CHECK-CSSC-NEXT: sub w8, w0, #1 326; CHECK-CSSC-NEXT: tst w0, w8 327; CHECK-CSSC-NEXT: cset w0, ne 328; CHECK-CSSC-NEXT: ret 329entry: 330 %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) 331 %cmp = icmp ne i32 %popcnt, 1 332 ret i1 %cmp 333} 334 335declare i32 @llvm.ctpop.i32(i32) nounwind readnone 336declare i64 @llvm.ctpop.i64(i64) nounwind readnone 337