1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-- -mattr=+popcntd < %s | FileCheck %s --check-prefix=FAST 3; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW 4; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix-xcoff -mattr=+popcntd < %s | FileCheck %s --check-prefix=FAST 5; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-ibm-aix-xcoff -mattr=+slow-popcntd < %s | FileCheck %s --check-prefix=SLOW 6 7define i16 @zpop_i8_i16(i8 %x) { 8; FAST-LABEL: zpop_i8_i16: 9; FAST: # %bb.0: 10; FAST-NEXT: clrldi 3, 3, 56 11; FAST-NEXT: popcntd 3, 3 12; FAST-NEXT: blr 13; 14; SLOW-LABEL: zpop_i8_i16: 15; SLOW: # %bb.0: 16; SLOW-NEXT: clrlwi 4, 3, 24 17; SLOW-NEXT: rotlwi 3, 3, 31 18; SLOW-NEXT: andi. 3, 3, 85 19; SLOW-NEXT: sub 3, 4, 3 20; SLOW-NEXT: andi. 4, 3, 13107 21; SLOW-NEXT: rotlwi 3, 3, 30 22; SLOW-NEXT: andi. 3, 3, 13107 23; SLOW-NEXT: add 3, 4, 3 24; SLOW-NEXT: srwi 4, 3, 4 25; SLOW-NEXT: add 3, 3, 4 26; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31 27; SLOW-NEXT: clrlwi 3, 3, 28 28; SLOW-NEXT: add 3, 3, 4 29; SLOW-NEXT: blr 30 %z = zext i8 %x to i16 31 %pop = tail call i16 @llvm.ctpop.i16(i16 %z) 32 ret i16 %pop 33} 34 35define i16 @popz_i8_i16(i8 %x) { 36; FAST-LABEL: popz_i8_i16: 37; FAST: # %bb.0: 38; FAST-NEXT: clrldi 3, 3, 56 39; FAST-NEXT: popcntd 3, 3 40; FAST-NEXT: blr 41; 42; SLOW-LABEL: popz_i8_i16: 43; SLOW: # %bb.0: 44; SLOW-NEXT: rotlwi 4, 3, 31 45; SLOW-NEXT: andi. 4, 4, 85 46; SLOW-NEXT: sub 3, 3, 4 47; SLOW-NEXT: rlwinm 4, 3, 30, 30, 31 48; SLOW-NEXT: rlwimi 4, 3, 30, 26, 27 49; SLOW-NEXT: andi. 3, 3, 51 50; SLOW-NEXT: add 3, 3, 4 51; SLOW-NEXT: srwi 4, 3, 4 52; SLOW-NEXT: add 3, 3, 4 53; SLOW-NEXT: clrlwi 3, 3, 28 54; SLOW-NEXT: blr 55 %pop = tail call i8 @llvm.ctpop.i8(i8 %x) 56 %z = zext i8 %pop to i16 57 ret i16 %z 58} 59 60define i32 @zpop_i8_i32(i8 %x) { 61; FAST-LABEL: zpop_i8_i32: 62; FAST: # %bb.0: 63; FAST-NEXT: clrlwi 3, 3, 24 64; FAST-NEXT: popcntw 3, 3 65; FAST-NEXT: blr 66; 67; SLOW-LABEL: zpop_i8_i32: 68; SLOW: # %bb.0: 69; SLOW-NEXT: clrlwi 5, 3, 24 70; SLOW-NEXT: rotlwi 3, 3, 31 71; SLOW-NEXT: andi. 3, 3, 85 72; SLOW-NEXT: lis 4, 13107 73; SLOW-NEXT: sub 3, 5, 3 74; SLOW-NEXT: ori 4, 4, 13107 75; SLOW-NEXT: rotlwi 5, 3, 30 76; SLOW-NEXT: and 3, 3, 4 77; SLOW-NEXT: andis. 4, 5, 13107 78; SLOW-NEXT: andi. 5, 5, 13107 79; SLOW-NEXT: or 4, 5, 4 80; SLOW-NEXT: add 3, 3, 4 81; SLOW-NEXT: lis 5, 3855 82; SLOW-NEXT: srwi 4, 3, 4 83; SLOW-NEXT: add 3, 3, 4 84; SLOW-NEXT: lis 4, 257 85; SLOW-NEXT: ori 5, 5, 3855 86; SLOW-NEXT: and 3, 3, 5 87; SLOW-NEXT: ori 4, 4, 257 88; SLOW-NEXT: mullw 3, 3, 4 89; SLOW-NEXT: srwi 3, 3, 24 90; SLOW-NEXT: blr 91 %z = zext i8 %x to i32 92 %pop = tail call i32 @llvm.ctpop.i32(i32 %z) 93 ret i32 %pop 94} 95 96define i32 @popz_i8_32(i8 %x) { 97; FAST-LABEL: popz_i8_32: 98; FAST: # %bb.0: 99; FAST-NEXT: clrldi 3, 3, 56 100; FAST-NEXT: popcntd 3, 3 101; FAST-NEXT: blr 102; 103; SLOW-LABEL: popz_i8_32: 104; SLOW: # %bb.0: 105; SLOW-NEXT: rotlwi 4, 3, 31 106; SLOW-NEXT: andi. 4, 4, 85 107; SLOW-NEXT: sub 3, 3, 4 108; SLOW-NEXT: rlwinm 4, 3, 30, 30, 31 109; SLOW-NEXT: rlwimi 4, 3, 30, 26, 27 110; SLOW-NEXT: andi. 3, 3, 51 111; SLOW-NEXT: add 3, 3, 4 112; SLOW-NEXT: srwi 4, 3, 4 113; SLOW-NEXT: add 3, 3, 4 114; SLOW-NEXT: clrlwi 3, 3, 28 115; SLOW-NEXT: blr 116 %pop = tail call i8 @llvm.ctpop.i8(i8 %x) 117 %z = zext i8 %pop to i32 118 ret i32 %z 119} 120 121define i32 @zpop_i16_i32(i16 %x) { 122; FAST-LABEL: zpop_i16_i32: 123; FAST: # %bb.0: 124; FAST-NEXT: clrlwi 3, 3, 16 125; FAST-NEXT: popcntw 3, 3 126; FAST-NEXT: blr 127; 128; SLOW-LABEL: zpop_i16_i32: 129; SLOW: # %bb.0: 130; SLOW-NEXT: clrlwi 5, 3, 16 131; SLOW-NEXT: rotlwi 3, 3, 31 132; SLOW-NEXT: andi. 3, 3, 21845 133; SLOW-NEXT: lis 4, 13107 134; SLOW-NEXT: sub 3, 5, 3 135; SLOW-NEXT: ori 4, 4, 13107 136; SLOW-NEXT: rotlwi 5, 3, 30 137; SLOW-NEXT: and 3, 3, 4 138; SLOW-NEXT: andis. 4, 5, 13107 139; SLOW-NEXT: andi. 5, 5, 13107 140; SLOW-NEXT: or 4, 5, 4 141; SLOW-NEXT: add 3, 3, 4 142; SLOW-NEXT: lis 5, 3855 143; SLOW-NEXT: srwi 4, 3, 4 144; SLOW-NEXT: add 3, 3, 4 145; SLOW-NEXT: lis 4, 257 146; SLOW-NEXT: ori 5, 5, 3855 147; SLOW-NEXT: and 3, 3, 5 148; SLOW-NEXT: ori 4, 4, 257 149; SLOW-NEXT: mullw 3, 3, 4 150; SLOW-NEXT: srwi 3, 3, 24 151; SLOW-NEXT: blr 152 %z = zext i16 %x to i32 153 %pop = tail call i32 @llvm.ctpop.i32(i32 %z) 154 ret i32 %pop 155} 156 157define i32 @popz_i16_32(i16 %x) { 158; FAST-LABEL: popz_i16_32: 159; FAST: # %bb.0: 160; FAST-NEXT: clrldi 3, 3, 48 161; FAST-NEXT: popcntd 3, 3 162; FAST-NEXT: blr 163; 164; SLOW-LABEL: popz_i16_32: 165; SLOW: # %bb.0: 166; SLOW-NEXT: rotlwi 4, 3, 31 167; SLOW-NEXT: andi. 4, 4, 21845 168; SLOW-NEXT: sub 3, 3, 4 169; SLOW-NEXT: andi. 4, 3, 13107 170; SLOW-NEXT: rotlwi 3, 3, 30 171; SLOW-NEXT: andi. 3, 3, 13107 172; SLOW-NEXT: add 3, 4, 3 173; SLOW-NEXT: srwi 4, 3, 4 174; SLOW-NEXT: add 3, 3, 4 175; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31 176; SLOW-NEXT: clrlwi 3, 3, 28 177; SLOW-NEXT: add 3, 3, 4 178; SLOW-NEXT: clrldi 3, 3, 32 179; SLOW-NEXT: blr 180 %pop = tail call i16 @llvm.ctpop.i16(i16 %x) 181 %z = zext i16 %pop to i32 182 ret i32 %z 183} 184 185define i64 @zpop_i32_i64(i32 %x) { 186; FAST-LABEL: zpop_i32_i64: 187; FAST: # %bb.0: 188; FAST-NEXT: clrldi 3, 3, 32 189; FAST-NEXT: popcntd 3, 3 190; FAST-NEXT: blr 191; 192; SLOW-LABEL: zpop_i32_i64: 193; SLOW: # %bb.0: 194; SLOW-NEXT: rlwinm 5, 3, 31, 1, 0 195; SLOW-NEXT: lis 4, 13107 196; SLOW-NEXT: andis. 6, 5, 21845 197; SLOW-NEXT: andi. 5, 5, 21845 198; SLOW-NEXT: ori 4, 4, 13107 199; SLOW-NEXT: or 5, 5, 6 200; SLOW-NEXT: clrldi 3, 3, 32 201; SLOW-NEXT: rldimi 4, 4, 32, 0 202; SLOW-NEXT: sub 3, 3, 5 203; SLOW-NEXT: and 5, 3, 4 204; SLOW-NEXT: rotldi 3, 3, 62 205; SLOW-NEXT: and 3, 3, 4 206; SLOW-NEXT: add 3, 5, 3 207; SLOW-NEXT: lis 4, 3855 208; SLOW-NEXT: rldicl 5, 3, 60, 4 209; SLOW-NEXT: ori 4, 4, 3855 210; SLOW-NEXT: add 3, 3, 5 211; SLOW-NEXT: lis 5, 257 212; SLOW-NEXT: rldimi 4, 4, 32, 0 213; SLOW-NEXT: ori 5, 5, 257 214; SLOW-NEXT: and 3, 3, 4 215; SLOW-NEXT: rldimi 5, 5, 32, 0 216; SLOW-NEXT: mulld 3, 3, 5 217; SLOW-NEXT: rldicl 3, 3, 8, 56 218; SLOW-NEXT: blr 219 %z = zext i32 %x to i64 220 %pop = tail call i64 @llvm.ctpop.i64(i64 %z) 221 ret i64 %pop 222} 223 224define i64 @popz_i32_i64(i32 %x) { 225; FAST-LABEL: popz_i32_i64: 226; FAST: # %bb.0: 227; FAST-NEXT: popcntw 3, 3 228; FAST-NEXT: clrldi 3, 3, 32 229; FAST-NEXT: blr 230; 231; SLOW-LABEL: popz_i32_i64: 232; SLOW: # %bb.0: 233; SLOW-NEXT: rotlwi 5, 3, 31 234; SLOW-NEXT: andis. 6, 5, 21845 235; SLOW-NEXT: andi. 5, 5, 21845 236; SLOW-NEXT: or 5, 5, 6 237; SLOW-NEXT: lis 4, 13107 238; SLOW-NEXT: sub 3, 3, 5 239; SLOW-NEXT: ori 4, 4, 13107 240; SLOW-NEXT: rotlwi 5, 3, 30 241; SLOW-NEXT: and 3, 3, 4 242; SLOW-NEXT: andis. 4, 5, 13107 243; SLOW-NEXT: andi. 5, 5, 13107 244; SLOW-NEXT: or 4, 5, 4 245; SLOW-NEXT: add 3, 3, 4 246; SLOW-NEXT: lis 5, 3855 247; SLOW-NEXT: srwi 4, 3, 4 248; SLOW-NEXT: add 3, 3, 4 249; SLOW-NEXT: lis 4, 257 250; SLOW-NEXT: ori 5, 5, 3855 251; SLOW-NEXT: and 3, 3, 5 252; SLOW-NEXT: ori 4, 4, 257 253; SLOW-NEXT: mullw 3, 3, 4 254; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31 255; SLOW-NEXT: blr 256 %pop = tail call i32 @llvm.ctpop.i32(i32 %x) 257 %z = zext i32 %pop to i64 258 ret i64 %z 259} 260 261define i64 @popa_i16_i64(i16 %x) { 262; FAST-LABEL: popa_i16_i64: 263; FAST: # %bb.0: 264; FAST-NEXT: clrldi 3, 3, 48 265; FAST-NEXT: popcntd 3, 3 266; FAST-NEXT: rlwinm 3, 3, 0, 27, 27 267; FAST-NEXT: blr 268; 269; SLOW-LABEL: popa_i16_i64: 270; SLOW: # %bb.0: 271; SLOW-NEXT: rotlwi 4, 3, 31 272; SLOW-NEXT: andi. 4, 4, 21845 273; SLOW-NEXT: sub 3, 3, 4 274; SLOW-NEXT: andi. 4, 3, 13107 275; SLOW-NEXT: rotlwi 3, 3, 30 276; SLOW-NEXT: andi. 3, 3, 13107 277; SLOW-NEXT: add 3, 4, 3 278; SLOW-NEXT: srwi 4, 3, 4 279; SLOW-NEXT: add 3, 3, 4 280; SLOW-NEXT: rlwinm 4, 3, 24, 28, 31 281; SLOW-NEXT: clrlwi 3, 3, 28 282; SLOW-NEXT: add 3, 3, 4 283; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27 284; SLOW-NEXT: blr 285 %pop = call i16 @llvm.ctpop.i16(i16 %x) 286 %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext 287 %a = and i64 %z, 16 288 ret i64 %a 289} 290 291declare i8 @llvm.ctpop.i8(i8) nounwind readnone 292declare i16 @llvm.ctpop.i16(i16) nounwind readnone 293declare i32 @llvm.ctpop.i32(i32) nounwind readnone 294declare i64 @llvm.ctpop.i64(i64) nounwind readnone 295