1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,RV32I 4; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,RV32ZBB 6 7declare i32 @llvm.ctlz.i32(i32, i1) 8 9define i32 @ctlz_i32(i32 %a) nounwind { 10; RV32I-LABEL: ctlz_i32: 11; RV32I: # %bb.0: 12; RV32I-NEXT: beqz a0, .LBB0_2 13; RV32I-NEXT: # %bb.1: # %cond.false 14; RV32I-NEXT: srli a1, a0, 1 15; RV32I-NEXT: lui a2, 349525 16; RV32I-NEXT: or a0, a0, a1 17; RV32I-NEXT: addi a1, a2, 1365 18; RV32I-NEXT: srli a2, a0, 2 19; RV32I-NEXT: or a0, a0, a2 20; RV32I-NEXT: srli a2, a0, 4 21; RV32I-NEXT: or a0, a0, a2 22; RV32I-NEXT: srli a2, a0, 8 23; RV32I-NEXT: or a0, a0, a2 24; RV32I-NEXT: srli a2, a0, 16 25; RV32I-NEXT: or a0, a0, a2 26; RV32I-NEXT: not a0, a0 27; RV32I-NEXT: srli a2, a0, 1 28; RV32I-NEXT: and a1, a2, a1 29; RV32I-NEXT: lui a2, 209715 30; RV32I-NEXT: addi a2, a2, 819 31; RV32I-NEXT: sub a0, a0, a1 32; RV32I-NEXT: and a1, a0, a2 33; RV32I-NEXT: srli a0, a0, 2 34; RV32I-NEXT: and a0, a0, a2 35; RV32I-NEXT: lui a2, 61681 36; RV32I-NEXT: add a0, a1, a0 37; RV32I-NEXT: srli a1, a0, 4 38; RV32I-NEXT: add a0, a0, a1 39; RV32I-NEXT: addi a1, a2, -241 40; RV32I-NEXT: and a0, a0, a1 41; RV32I-NEXT: slli a1, a0, 8 42; RV32I-NEXT: add a0, a0, a1 43; RV32I-NEXT: slli a1, a0, 16 44; RV32I-NEXT: add a0, a0, a1 45; RV32I-NEXT: srli a0, a0, 24 46; RV32I-NEXT: ret 47; RV32I-NEXT: .LBB0_2: 48; RV32I-NEXT: li a0, 32 49; RV32I-NEXT: ret 50; 51; RV32ZBB-LABEL: ctlz_i32: 52; RV32ZBB: # %bb.0: 53; RV32ZBB-NEXT: clz a0, a0 54; RV32ZBB-NEXT: ret 55 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) 56 ret i32 %1 57} 58 59declare i64 @llvm.ctlz.i64(i64, i1) 60 61define i64 @ctlz_i64(i64 %a) nounwind { 62; RV32I-LABEL: ctlz_i64: 63; RV32I: # %bb.0: 64; RV32I-NEXT: lui a2, 349525 65; RV32I-NEXT: lui a3, 209715 66; RV32I-NEXT: lui a5, 61681 67; RV32I-NEXT: addi a4, a2, 1365 68; RV32I-NEXT: addi a3, a3, 819 69; RV32I-NEXT: addi a2, a5, -241 70; RV32I-NEXT: bnez a1, .LBB1_2 71; RV32I-NEXT: # %bb.1: 72; RV32I-NEXT: srli a1, a0, 1 73; RV32I-NEXT: or a0, a0, a1 74; RV32I-NEXT: srli a1, a0, 2 75; RV32I-NEXT: or a0, a0, a1 76; RV32I-NEXT: srli a1, a0, 4 77; RV32I-NEXT: or a0, a0, a1 78; RV32I-NEXT: srli a1, a0, 8 79; RV32I-NEXT: or a0, a0, a1 80; RV32I-NEXT: srli a1, a0, 16 81; RV32I-NEXT: or a0, a0, a1 82; RV32I-NEXT: not a0, a0 83; RV32I-NEXT: srli a1, a0, 1 84; RV32I-NEXT: and a1, a1, a4 85; RV32I-NEXT: sub a0, a0, a1 86; RV32I-NEXT: and a1, a0, a3 87; RV32I-NEXT: srli a0, a0, 2 88; RV32I-NEXT: and a0, a0, a3 89; RV32I-NEXT: add a0, a1, a0 90; RV32I-NEXT: srli a1, a0, 4 91; RV32I-NEXT: add a0, a0, a1 92; RV32I-NEXT: and a0, a0, a2 93; RV32I-NEXT: slli a1, a0, 8 94; RV32I-NEXT: add a0, a0, a1 95; RV32I-NEXT: slli a1, a0, 16 96; RV32I-NEXT: add a0, a0, a1 97; RV32I-NEXT: srli a0, a0, 24 98; RV32I-NEXT: addi a0, a0, 32 99; RV32I-NEXT: li a1, 0 100; RV32I-NEXT: ret 101; RV32I-NEXT: .LBB1_2: 102; RV32I-NEXT: srli a0, a1, 1 103; RV32I-NEXT: or a0, a1, a0 104; RV32I-NEXT: srli a1, a0, 2 105; RV32I-NEXT: or a0, a0, a1 106; RV32I-NEXT: srli a1, a0, 4 107; RV32I-NEXT: or a0, a0, a1 108; RV32I-NEXT: srli a1, a0, 8 109; RV32I-NEXT: or a0, a0, a1 110; RV32I-NEXT: srli a1, a0, 16 111; RV32I-NEXT: or a0, a0, a1 112; RV32I-NEXT: not a0, a0 113; RV32I-NEXT: srli a1, a0, 1 114; RV32I-NEXT: and a1, a1, a4 115; RV32I-NEXT: sub a0, a0, a1 116; RV32I-NEXT: and a1, a0, a3 117; RV32I-NEXT: srli a0, a0, 2 118; RV32I-NEXT: and a0, a0, a3 119; RV32I-NEXT: add a0, a1, a0 120; RV32I-NEXT: srli a1, a0, 4 121; RV32I-NEXT: add a0, a0, a1 122; RV32I-NEXT: and a0, a0, a2 123; RV32I-NEXT: slli a1, a0, 8 124; RV32I-NEXT: add a0, a0, a1 125; RV32I-NEXT: slli a1, a0, 16 126; RV32I-NEXT: add a0, a0, a1 127; RV32I-NEXT: srli a0, a0, 24 128; RV32I-NEXT: li a1, 0 129; RV32I-NEXT: ret 130; 131; RV32ZBB-LABEL: ctlz_i64: 132; RV32ZBB: # %bb.0: 133; RV32ZBB-NEXT: bnez a1, .LBB1_2 134; RV32ZBB-NEXT: # %bb.1: 135; RV32ZBB-NEXT: clz a0, a0 136; RV32ZBB-NEXT: addi a0, a0, 32 137; RV32ZBB-NEXT: li a1, 0 138; RV32ZBB-NEXT: ret 139; RV32ZBB-NEXT: .LBB1_2: 140; RV32ZBB-NEXT: clz a0, a1 141; RV32ZBB-NEXT: li a1, 0 142; RV32ZBB-NEXT: ret 143 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) 144 ret i64 %1 145} 146 147declare i32 @llvm.cttz.i32(i32, i1) 148 149define i32 @cttz_i32(i32 %a) nounwind { 150; RV32I-LABEL: cttz_i32: 151; RV32I: # %bb.0: 152; RV32I-NEXT: beqz a0, .LBB2_2 153; RV32I-NEXT: # %bb.1: # %cond.false 154; RV32I-NEXT: addi sp, sp, -16 155; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 156; RV32I-NEXT: neg a1, a0 157; RV32I-NEXT: and a0, a0, a1 158; RV32I-NEXT: lui a1, 30667 159; RV32I-NEXT: addi a1, a1, 1329 160; RV32I-NEXT: call __mulsi3 161; RV32I-NEXT: srli a0, a0, 27 162; RV32I-NEXT: lui a1, %hi(.LCPI2_0) 163; RV32I-NEXT: addi a1, a1, %lo(.LCPI2_0) 164; RV32I-NEXT: add a0, a1, a0 165; RV32I-NEXT: lbu a0, 0(a0) 166; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 167; RV32I-NEXT: addi sp, sp, 16 168; RV32I-NEXT: ret 169; RV32I-NEXT: .LBB2_2: 170; RV32I-NEXT: li a0, 32 171; RV32I-NEXT: ret 172; 173; RV32ZBB-LABEL: cttz_i32: 174; RV32ZBB: # %bb.0: 175; RV32ZBB-NEXT: ctz a0, a0 176; RV32ZBB-NEXT: ret 177 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) 178 ret i32 %1 179} 180 181declare i64 @llvm.cttz.i64(i64, i1) 182 183define i64 @cttz_i64(i64 %a) nounwind { 184; RV32I-LABEL: cttz_i64: 185; RV32I: # %bb.0: 186; RV32I-NEXT: addi sp, sp, -32 187; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 188; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 189; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 190; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 191; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 192; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill 193; RV32I-NEXT: mv s2, a1 194; RV32I-NEXT: mv s0, a0 195; RV32I-NEXT: neg a0, a0 196; RV32I-NEXT: and a0, s0, a0 197; RV32I-NEXT: lui a1, 30667 198; RV32I-NEXT: addi s3, a1, 1329 199; RV32I-NEXT: mv a1, s3 200; RV32I-NEXT: call __mulsi3 201; RV32I-NEXT: mv s1, a0 202; RV32I-NEXT: lui s4, %hi(.LCPI3_0) 203; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0) 204; RV32I-NEXT: neg a0, s2 205; RV32I-NEXT: and a0, s2, a0 206; RV32I-NEXT: mv a1, s3 207; RV32I-NEXT: call __mulsi3 208; RV32I-NEXT: bnez s2, .LBB3_3 209; RV32I-NEXT: # %bb.1: 210; RV32I-NEXT: li a0, 32 211; RV32I-NEXT: beqz s0, .LBB3_4 212; RV32I-NEXT: .LBB3_2: 213; RV32I-NEXT: srli s1, s1, 27 214; RV32I-NEXT: add s1, s4, s1 215; RV32I-NEXT: lbu a0, 0(s1) 216; RV32I-NEXT: j .LBB3_5 217; RV32I-NEXT: .LBB3_3: 218; RV32I-NEXT: srli a0, a0, 27 219; RV32I-NEXT: add a0, s4, a0 220; RV32I-NEXT: lbu a0, 0(a0) 221; RV32I-NEXT: bnez s0, .LBB3_2 222; RV32I-NEXT: .LBB3_4: 223; RV32I-NEXT: addi a0, a0, 32 224; RV32I-NEXT: .LBB3_5: 225; RV32I-NEXT: li a1, 0 226; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 227; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 228; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 229; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 230; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 231; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload 232; RV32I-NEXT: addi sp, sp, 32 233; RV32I-NEXT: ret 234; 235; RV32ZBB-LABEL: cttz_i64: 236; RV32ZBB: # %bb.0: 237; RV32ZBB-NEXT: bnez a0, .LBB3_2 238; RV32ZBB-NEXT: # %bb.1: 239; RV32ZBB-NEXT: ctz a0, a1 240; RV32ZBB-NEXT: addi a0, a0, 32 241; RV32ZBB-NEXT: li a1, 0 242; RV32ZBB-NEXT: ret 243; RV32ZBB-NEXT: .LBB3_2: 244; RV32ZBB-NEXT: ctz a0, a0 245; RV32ZBB-NEXT: li a1, 0 246; RV32ZBB-NEXT: ret 247 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) 248 ret i64 %1 249} 250 251declare i32 @llvm.ctpop.i32(i32) 252 253define i32 @ctpop_i32(i32 %a) nounwind { 254; RV32I-LABEL: ctpop_i32: 255; RV32I: # %bb.0: 256; RV32I-NEXT: srli a1, a0, 1 257; RV32I-NEXT: lui a2, 349525 258; RV32I-NEXT: addi a2, a2, 1365 259; RV32I-NEXT: and a1, a1, a2 260; RV32I-NEXT: lui a2, 209715 261; RV32I-NEXT: addi a2, a2, 819 262; RV32I-NEXT: sub a0, a0, a1 263; RV32I-NEXT: and a1, a0, a2 264; RV32I-NEXT: srli a0, a0, 2 265; RV32I-NEXT: and a0, a0, a2 266; RV32I-NEXT: lui a2, 61681 267; RV32I-NEXT: add a0, a1, a0 268; RV32I-NEXT: srli a1, a0, 4 269; RV32I-NEXT: add a0, a0, a1 270; RV32I-NEXT: addi a1, a2, -241 271; RV32I-NEXT: and a0, a0, a1 272; RV32I-NEXT: slli a1, a0, 8 273; RV32I-NEXT: add a0, a0, a1 274; RV32I-NEXT: slli a1, a0, 16 275; RV32I-NEXT: add a0, a0, a1 276; RV32I-NEXT: srli a0, a0, 24 277; RV32I-NEXT: ret 278; 279; RV32ZBB-LABEL: ctpop_i32: 280; RV32ZBB: # %bb.0: 281; RV32ZBB-NEXT: cpop a0, a0 282; RV32ZBB-NEXT: ret 283 %1 = call i32 @llvm.ctpop.i32(i32 %a) 284 ret i32 %1 285} 286 287define i1 @ctpop_i32_ult_two(i32 signext %a) nounwind { 288; RV32I-LABEL: ctpop_i32_ult_two: 289; RV32I: # %bb.0: 290; RV32I-NEXT: addi a1, a0, -1 291; RV32I-NEXT: and a0, a0, a1 292; RV32I-NEXT: seqz a0, a0 293; RV32I-NEXT: ret 294; 295; RV32ZBB-LABEL: ctpop_i32_ult_two: 296; RV32ZBB: # %bb.0: 297; RV32ZBB-NEXT: cpop a0, a0 298; RV32ZBB-NEXT: sltiu a0, a0, 2 299; RV32ZBB-NEXT: ret 300 %1 = call i32 @llvm.ctpop.i32(i32 %a) 301 %2 = icmp ult i32 %1, 2 302 ret i1 %2 303} 304 305define i1 @ctpop_i32_ugt_one(i32 signext %a) nounwind { 306; RV32I-LABEL: ctpop_i32_ugt_one: 307; RV32I: # %bb.0: 308; RV32I-NEXT: addi a1, a0, -1 309; RV32I-NEXT: and a0, a0, a1 310; RV32I-NEXT: snez a0, a0 311; RV32I-NEXT: ret 312; 313; RV32ZBB-LABEL: ctpop_i32_ugt_one: 314; RV32ZBB: # %bb.0: 315; RV32ZBB-NEXT: cpop a0, a0 316; RV32ZBB-NEXT: sltiu a0, a0, 2 317; RV32ZBB-NEXT: xori a0, a0, 1 318; RV32ZBB-NEXT: ret 319 %1 = call i32 @llvm.ctpop.i32(i32 %a) 320 %2 = icmp ugt i32 %1, 1 321 ret i1 %2 322} 323 324define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind { 325; RV32I-LABEL: ctpop_i32_eq_one: 326; RV32I: # %bb.0: 327; RV32I-NEXT: addi a1, a0, -1 328; RV32I-NEXT: xor a0, a0, a1 329; RV32I-NEXT: sltu a0, a1, a0 330; RV32I-NEXT: ret 331; 332; RV32ZBB-LABEL: ctpop_i32_eq_one: 333; RV32ZBB: # %bb.0: 334; RV32ZBB-NEXT: cpop a0, a0 335; RV32ZBB-NEXT: addi a0, a0, -1 336; RV32ZBB-NEXT: seqz a0, a0 337; RV32ZBB-NEXT: ret 338 %1 = call i32 @llvm.ctpop.i32(i32 %a) 339 %2 = icmp eq i32 %1, 1 340 ret i1 %2 341} 342 343define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind { 344; RV32I-LABEL: ctpop_i32_ne_one: 345; RV32I: # %bb.0: 346; RV32I-NEXT: addi a1, a0, -1 347; RV32I-NEXT: xor a0, a0, a1 348; RV32I-NEXT: sltu a0, a1, a0 349; RV32I-NEXT: xori a0, a0, 1 350; RV32I-NEXT: ret 351; 352; RV32ZBB-LABEL: ctpop_i32_ne_one: 353; RV32ZBB: # %bb.0: 354; RV32ZBB-NEXT: cpop a0, a0 355; RV32ZBB-NEXT: addi a0, a0, -1 356; RV32ZBB-NEXT: snez a0, a0 357; RV32ZBB-NEXT: ret 358 %1 = call i32 @llvm.ctpop.i32(i32 %a) 359 %2 = icmp ne i32 %1, 1 360 ret i1 %2 361} 362 363declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) 364 365define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { 366; RV32I-LABEL: ctpop_v2i32: 367; RV32I: # %bb.0: 368; RV32I-NEXT: srli a2, a0, 1 369; RV32I-NEXT: lui a3, 349525 370; RV32I-NEXT: lui a4, 209715 371; RV32I-NEXT: srli a5, a1, 1 372; RV32I-NEXT: addi a3, a3, 1365 373; RV32I-NEXT: and a2, a2, a3 374; RV32I-NEXT: and a3, a5, a3 375; RV32I-NEXT: lui a5, 61681 376; RV32I-NEXT: addi a4, a4, 819 377; RV32I-NEXT: addi a5, a5, -241 378; RV32I-NEXT: sub a0, a0, a2 379; RV32I-NEXT: sub a1, a1, a3 380; RV32I-NEXT: and a2, a0, a4 381; RV32I-NEXT: srli a0, a0, 2 382; RV32I-NEXT: and a3, a1, a4 383; RV32I-NEXT: srli a1, a1, 2 384; RV32I-NEXT: and a0, a0, a4 385; RV32I-NEXT: and a1, a1, a4 386; RV32I-NEXT: add a0, a2, a0 387; RV32I-NEXT: add a1, a3, a1 388; RV32I-NEXT: srli a2, a0, 4 389; RV32I-NEXT: srli a3, a1, 4 390; RV32I-NEXT: add a0, a0, a2 391; RV32I-NEXT: add a1, a1, a3 392; RV32I-NEXT: and a0, a0, a5 393; RV32I-NEXT: and a1, a1, a5 394; RV32I-NEXT: slli a2, a0, 8 395; RV32I-NEXT: slli a3, a1, 8 396; RV32I-NEXT: add a0, a0, a2 397; RV32I-NEXT: add a1, a1, a3 398; RV32I-NEXT: slli a2, a0, 16 399; RV32I-NEXT: slli a3, a1, 16 400; RV32I-NEXT: add a0, a0, a2 401; RV32I-NEXT: add a1, a1, a3 402; RV32I-NEXT: srli a0, a0, 24 403; RV32I-NEXT: srli a1, a1, 24 404; RV32I-NEXT: ret 405; 406; RV32ZBB-LABEL: ctpop_v2i32: 407; RV32ZBB: # %bb.0: 408; RV32ZBB-NEXT: cpop a0, a0 409; RV32ZBB-NEXT: cpop a1, a1 410; RV32ZBB-NEXT: ret 411 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) 412 ret <2 x i32> %1 413} 414 415define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind { 416; RV32I-LABEL: ctpop_v2i32_ult_two: 417; RV32I: # %bb.0: 418; RV32I-NEXT: addi a2, a0, -1 419; RV32I-NEXT: and a0, a0, a2 420; RV32I-NEXT: addi a2, a1, -1 421; RV32I-NEXT: and a1, a1, a2 422; RV32I-NEXT: seqz a0, a0 423; RV32I-NEXT: seqz a1, a1 424; RV32I-NEXT: ret 425; 426; RV32ZBB-LABEL: ctpop_v2i32_ult_two: 427; RV32ZBB: # %bb.0: 428; RV32ZBB-NEXT: cpop a1, a1 429; RV32ZBB-NEXT: cpop a0, a0 430; RV32ZBB-NEXT: sltiu a0, a0, 2 431; RV32ZBB-NEXT: sltiu a1, a1, 2 432; RV32ZBB-NEXT: ret 433 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) 434 %2 = icmp ult <2 x i32> %1, <i32 2, i32 2> 435 ret <2 x i1> %2 436} 437 438define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind { 439; RV32I-LABEL: ctpop_v2i32_ugt_one: 440; RV32I: # %bb.0: 441; RV32I-NEXT: addi a2, a0, -1 442; RV32I-NEXT: and a0, a0, a2 443; RV32I-NEXT: addi a2, a1, -1 444; RV32I-NEXT: and a1, a1, a2 445; RV32I-NEXT: snez a0, a0 446; RV32I-NEXT: snez a1, a1 447; RV32I-NEXT: ret 448; 449; RV32ZBB-LABEL: ctpop_v2i32_ugt_one: 450; RV32ZBB: # %bb.0: 451; RV32ZBB-NEXT: cpop a1, a1 452; RV32ZBB-NEXT: cpop a0, a0 453; RV32ZBB-NEXT: sltiu a0, a0, 2 454; RV32ZBB-NEXT: sltiu a1, a1, 2 455; RV32ZBB-NEXT: xori a0, a0, 1 456; RV32ZBB-NEXT: xori a1, a1, 1 457; RV32ZBB-NEXT: ret 458 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) 459 %2 = icmp ugt <2 x i32> %1, <i32 1, i32 1> 460 ret <2 x i1> %2 461} 462 463define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind { 464; RV32I-LABEL: ctpop_v2i32_eq_one: 465; RV32I: # %bb.0: 466; RV32I-NEXT: addi a2, a0, -1 467; RV32I-NEXT: xor a0, a0, a2 468; RV32I-NEXT: sltu a0, a2, a0 469; RV32I-NEXT: addi a2, a1, -1 470; RV32I-NEXT: xor a1, a1, a2 471; RV32I-NEXT: sltu a1, a2, a1 472; RV32I-NEXT: ret 473; 474; RV32ZBB-LABEL: ctpop_v2i32_eq_one: 475; RV32ZBB: # %bb.0: 476; RV32ZBB-NEXT: cpop a1, a1 477; RV32ZBB-NEXT: cpop a0, a0 478; RV32ZBB-NEXT: addi a0, a0, -1 479; RV32ZBB-NEXT: addi a1, a1, -1 480; RV32ZBB-NEXT: seqz a0, a0 481; RV32ZBB-NEXT: seqz a1, a1 482; RV32ZBB-NEXT: ret 483 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) 484 %2 = icmp eq <2 x i32> %1, <i32 1, i32 1> 485 ret <2 x i1> %2 486} 487 488define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind { 489; RV32I-LABEL: ctpop_v2i32_ne_one: 490; RV32I: # %bb.0: 491; RV32I-NEXT: addi a2, a0, -1 492; RV32I-NEXT: xor a0, a0, a2 493; RV32I-NEXT: sltu a0, a2, a0 494; RV32I-NEXT: addi a2, a1, -1 495; RV32I-NEXT: xor a1, a1, a2 496; RV32I-NEXT: sltu a1, a2, a1 497; RV32I-NEXT: xori a0, a0, 1 498; RV32I-NEXT: xori a1, a1, 1 499; RV32I-NEXT: ret 500; 501; RV32ZBB-LABEL: ctpop_v2i32_ne_one: 502; RV32ZBB: # %bb.0: 503; RV32ZBB-NEXT: cpop a1, a1 504; RV32ZBB-NEXT: cpop a0, a0 505; RV32ZBB-NEXT: addi a0, a0, -1 506; RV32ZBB-NEXT: addi a1, a1, -1 507; RV32ZBB-NEXT: snez a0, a0 508; RV32ZBB-NEXT: snez a1, a1 509; RV32ZBB-NEXT: ret 510 %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) 511 %2 = icmp ne <2 x i32> %1, <i32 1, i32 1> 512 ret <2 x i1> %2 513} 514 515declare i64 @llvm.ctpop.i64(i64) 516 517define i64 @ctpop_i64(i64 %a) nounwind { 518; RV32I-LABEL: ctpop_i64: 519; RV32I: # %bb.0: 520; RV32I-NEXT: srli a2, a1, 1 521; RV32I-NEXT: lui a3, 349525 522; RV32I-NEXT: lui a4, 209715 523; RV32I-NEXT: srli a5, a0, 1 524; RV32I-NEXT: addi a3, a3, 1365 525; RV32I-NEXT: and a2, a2, a3 526; RV32I-NEXT: and a3, a5, a3 527; RV32I-NEXT: lui a5, 61681 528; RV32I-NEXT: addi a4, a4, 819 529; RV32I-NEXT: addi a5, a5, -241 530; RV32I-NEXT: sub a1, a1, a2 531; RV32I-NEXT: sub a0, a0, a3 532; RV32I-NEXT: and a2, a1, a4 533; RV32I-NEXT: srli a1, a1, 2 534; RV32I-NEXT: and a3, a0, a4 535; RV32I-NEXT: srli a0, a0, 2 536; RV32I-NEXT: and a1, a1, a4 537; RV32I-NEXT: and a0, a0, a4 538; RV32I-NEXT: add a1, a2, a1 539; RV32I-NEXT: add a0, a3, a0 540; RV32I-NEXT: srli a2, a1, 4 541; RV32I-NEXT: srli a3, a0, 4 542; RV32I-NEXT: add a1, a1, a2 543; RV32I-NEXT: add a0, a0, a3 544; RV32I-NEXT: and a1, a1, a5 545; RV32I-NEXT: and a0, a0, a5 546; RV32I-NEXT: slli a2, a1, 8 547; RV32I-NEXT: slli a3, a0, 8 548; RV32I-NEXT: add a1, a1, a2 549; RV32I-NEXT: add a0, a0, a3 550; RV32I-NEXT: slli a2, a1, 16 551; RV32I-NEXT: slli a3, a0, 16 552; RV32I-NEXT: add a1, a1, a2 553; RV32I-NEXT: add a0, a0, a3 554; RV32I-NEXT: srli a1, a1, 24 555; RV32I-NEXT: srli a0, a0, 24 556; RV32I-NEXT: add a0, a0, a1 557; RV32I-NEXT: li a1, 0 558; RV32I-NEXT: ret 559; 560; RV32ZBB-LABEL: ctpop_i64: 561; RV32ZBB: # %bb.0: 562; RV32ZBB-NEXT: cpop a1, a1 563; RV32ZBB-NEXT: cpop a0, a0 564; RV32ZBB-NEXT: add a0, a0, a1 565; RV32ZBB-NEXT: li a1, 0 566; RV32ZBB-NEXT: ret 567 %1 = call i64 @llvm.ctpop.i64(i64 %a) 568 ret i64 %1 569} 570 571define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { 572; RV32I-LABEL: ctpop_i64_ugt_two: 573; RV32I: # %bb.0: 574; RV32I-NEXT: addi a2, a0, -1 575; RV32I-NEXT: and a2, a0, a2 576; RV32I-NEXT: seqz a0, a0 577; RV32I-NEXT: sub a0, a1, a0 578; RV32I-NEXT: and a0, a1, a0 579; RV32I-NEXT: or a0, a2, a0 580; RV32I-NEXT: seqz a0, a0 581; RV32I-NEXT: ret 582; 583; RV32ZBB-LABEL: ctpop_i64_ugt_two: 584; RV32ZBB: # %bb.0: 585; RV32ZBB-NEXT: cpop a1, a1 586; RV32ZBB-NEXT: cpop a0, a0 587; RV32ZBB-NEXT: add a0, a0, a1 588; RV32ZBB-NEXT: sltiu a0, a0, 2 589; RV32ZBB-NEXT: ret 590 %1 = call i64 @llvm.ctpop.i64(i64 %a) 591 %2 = icmp ult i64 %1, 2 592 ret i1 %2 593} 594 595define i1 @ctpop_i64_ugt_one(i64 %a) nounwind { 596; RV32I-LABEL: ctpop_i64_ugt_one: 597; RV32I: # %bb.0: 598; RV32I-NEXT: addi a2, a0, -1 599; RV32I-NEXT: and a2, a0, a2 600; RV32I-NEXT: seqz a0, a0 601; RV32I-NEXT: sub a0, a1, a0 602; RV32I-NEXT: and a0, a1, a0 603; RV32I-NEXT: or a0, a2, a0 604; RV32I-NEXT: snez a0, a0 605; RV32I-NEXT: ret 606; 607; RV32ZBB-LABEL: ctpop_i64_ugt_one: 608; RV32ZBB: # %bb.0: 609; RV32ZBB-NEXT: cpop a1, a1 610; RV32ZBB-NEXT: cpop a0, a0 611; RV32ZBB-NEXT: add a0, a0, a1 612; RV32ZBB-NEXT: sltiu a0, a0, 2 613; RV32ZBB-NEXT: xori a0, a0, 1 614; RV32ZBB-NEXT: ret 615 %1 = call i64 @llvm.ctpop.i64(i64 %a) 616 %2 = icmp ugt i64 %1, 1 617 ret i1 %2 618} 619 620define i1 @ctpop_i64_eq_one(i64 %a) nounwind { 621; RV32I-LABEL: ctpop_i64_eq_one: 622; RV32I: # %bb.0: 623; RV32I-NEXT: beqz a1, .LBB17_2 624; RV32I-NEXT: # %bb.1: 625; RV32I-NEXT: seqz a0, a0 626; RV32I-NEXT: sub a0, a1, a0 627; RV32I-NEXT: xor a1, a1, a0 628; RV32I-NEXT: sltu a0, a0, a1 629; RV32I-NEXT: ret 630; RV32I-NEXT: .LBB17_2: 631; RV32I-NEXT: addi a1, a0, -1 632; RV32I-NEXT: xor a0, a0, a1 633; RV32I-NEXT: sltu a0, a1, a0 634; RV32I-NEXT: ret 635; 636; RV32ZBB-LABEL: ctpop_i64_eq_one: 637; RV32ZBB: # %bb.0: 638; RV32ZBB-NEXT: cpop a1, a1 639; RV32ZBB-NEXT: cpop a0, a0 640; RV32ZBB-NEXT: add a0, a0, a1 641; RV32ZBB-NEXT: addi a0, a0, -1 642; RV32ZBB-NEXT: seqz a0, a0 643; RV32ZBB-NEXT: ret 644 %1 = call i64 @llvm.ctpop.i64(i64 %a) 645 %2 = icmp eq i64 %1, 1 646 ret i1 %2 647} 648 649define i1 @ctpop_i64_ne_one(i64 %a) nounwind { 650; RV32I-LABEL: ctpop_i64_ne_one: 651; RV32I: # %bb.0: 652; RV32I-NEXT: beqz a1, .LBB18_2 653; RV32I-NEXT: # %bb.1: 654; RV32I-NEXT: seqz a0, a0 655; RV32I-NEXT: sub a0, a1, a0 656; RV32I-NEXT: xor a1, a1, a0 657; RV32I-NEXT: sltu a0, a0, a1 658; RV32I-NEXT: xori a0, a0, 1 659; RV32I-NEXT: ret 660; RV32I-NEXT: .LBB18_2: 661; RV32I-NEXT: addi a1, a0, -1 662; RV32I-NEXT: xor a0, a0, a1 663; RV32I-NEXT: sltu a0, a1, a0 664; RV32I-NEXT: xori a0, a0, 1 665; RV32I-NEXT: ret 666; 667; RV32ZBB-LABEL: ctpop_i64_ne_one: 668; RV32ZBB: # %bb.0: 669; RV32ZBB-NEXT: cpop a1, a1 670; RV32ZBB-NEXT: cpop a0, a0 671; RV32ZBB-NEXT: add a0, a0, a1 672; RV32ZBB-NEXT: addi a0, a0, -1 673; RV32ZBB-NEXT: snez a0, a0 674; RV32ZBB-NEXT: ret 675 %1 = call i64 @llvm.ctpop.i64(i64 %a) 676 %2 = icmp ne i64 %1, 1 677 ret i1 %2 678} 679 680declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 681 682define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { 683; RV32I-LABEL: ctpop_v2i64: 684; RV32I: # %bb.0: 685; RV32I-NEXT: lw a3, 0(a1) 686; RV32I-NEXT: lw a4, 4(a1) 687; RV32I-NEXT: lw a2, 8(a1) 688; RV32I-NEXT: lw a1, 12(a1) 689; RV32I-NEXT: lui a5, 349525 690; RV32I-NEXT: addi a5, a5, 1365 691; RV32I-NEXT: srli a6, a4, 1 692; RV32I-NEXT: srli a7, a3, 1 693; RV32I-NEXT: srli t0, a1, 1 694; RV32I-NEXT: srli t1, a2, 1 695; RV32I-NEXT: and a6, a6, a5 696; RV32I-NEXT: and a7, a7, a5 697; RV32I-NEXT: and t0, t0, a5 698; RV32I-NEXT: and a5, t1, a5 699; RV32I-NEXT: lui t1, 209715 700; RV32I-NEXT: addi t1, t1, 819 701; RV32I-NEXT: sub a4, a4, a6 702; RV32I-NEXT: sub a3, a3, a7 703; RV32I-NEXT: sub a1, a1, t0 704; RV32I-NEXT: sub a2, a2, a5 705; RV32I-NEXT: and a5, a4, t1 706; RV32I-NEXT: srli a4, a4, 2 707; RV32I-NEXT: and a6, a3, t1 708; RV32I-NEXT: srli a3, a3, 2 709; RV32I-NEXT: and a7, a1, t1 710; RV32I-NEXT: srli a1, a1, 2 711; RV32I-NEXT: and t0, a2, t1 712; RV32I-NEXT: srli a2, a2, 2 713; RV32I-NEXT: and a4, a4, t1 714; RV32I-NEXT: and a3, a3, t1 715; RV32I-NEXT: and a1, a1, t1 716; RV32I-NEXT: and a2, a2, t1 717; RV32I-NEXT: add a4, a5, a4 718; RV32I-NEXT: lui a5, 61681 719; RV32I-NEXT: addi a5, a5, -241 720; RV32I-NEXT: add a3, a6, a3 721; RV32I-NEXT: add a1, a7, a1 722; RV32I-NEXT: add a2, t0, a2 723; RV32I-NEXT: srli a6, a4, 4 724; RV32I-NEXT: srli a7, a3, 4 725; RV32I-NEXT: srli t0, a1, 4 726; RV32I-NEXT: add a4, a4, a6 727; RV32I-NEXT: srli a6, a2, 4 728; RV32I-NEXT: add a3, a3, a7 729; RV32I-NEXT: add a1, a1, t0 730; RV32I-NEXT: add a2, a2, a6 731; RV32I-NEXT: and a4, a4, a5 732; RV32I-NEXT: and a3, a3, a5 733; RV32I-NEXT: and a1, a1, a5 734; RV32I-NEXT: and a2, a2, a5 735; RV32I-NEXT: slli a5, a4, 8 736; RV32I-NEXT: slli a6, a3, 8 737; RV32I-NEXT: slli a7, a1, 8 738; RV32I-NEXT: slli t0, a2, 8 739; RV32I-NEXT: add a4, a4, a5 740; RV32I-NEXT: add a3, a3, a6 741; RV32I-NEXT: add a1, a1, a7 742; RV32I-NEXT: add a2, a2, t0 743; RV32I-NEXT: slli a5, a4, 16 744; RV32I-NEXT: slli a6, a3, 16 745; RV32I-NEXT: slli a7, a1, 16 746; RV32I-NEXT: slli t0, a2, 16 747; RV32I-NEXT: add a4, a4, a5 748; RV32I-NEXT: add a3, a3, a6 749; RV32I-NEXT: add a1, a1, a7 750; RV32I-NEXT: add a2, a2, t0 751; RV32I-NEXT: srli a4, a4, 24 752; RV32I-NEXT: srli a3, a3, 24 753; RV32I-NEXT: srli a1, a1, 24 754; RV32I-NEXT: srli a2, a2, 24 755; RV32I-NEXT: add a3, a3, a4 756; RV32I-NEXT: add a1, a2, a1 757; RV32I-NEXT: sw a3, 0(a0) 758; RV32I-NEXT: sw zero, 4(a0) 759; RV32I-NEXT: sw a1, 8(a0) 760; RV32I-NEXT: sw zero, 12(a0) 761; RV32I-NEXT: ret 762; 763; RV32ZBB-LABEL: ctpop_v2i64: 764; RV32ZBB: # %bb.0: 765; RV32ZBB-NEXT: lw a2, 4(a1) 766; RV32ZBB-NEXT: lw a3, 0(a1) 767; RV32ZBB-NEXT: lw a4, 12(a1) 768; RV32ZBB-NEXT: lw a1, 8(a1) 769; RV32ZBB-NEXT: cpop a2, a2 770; RV32ZBB-NEXT: cpop a3, a3 771; RV32ZBB-NEXT: cpop a4, a4 772; RV32ZBB-NEXT: cpop a1, a1 773; RV32ZBB-NEXT: add a2, a3, a2 774; RV32ZBB-NEXT: add a1, a1, a4 775; RV32ZBB-NEXT: sw a2, 0(a0) 776; RV32ZBB-NEXT: sw zero, 4(a0) 777; RV32ZBB-NEXT: sw a1, 8(a0) 778; RV32ZBB-NEXT: sw zero, 12(a0) 779; RV32ZBB-NEXT: ret 780 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 781 ret <2 x i64> %1 782} 783 784define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind { 785; RV32I-LABEL: ctpop_v2i64_ult_two: 786; RV32I: # %bb.0: 787; RV32I-NEXT: lw a1, 0(a0) 788; RV32I-NEXT: lw a2, 4(a0) 789; RV32I-NEXT: lw a3, 8(a0) 790; RV32I-NEXT: lw a0, 12(a0) 791; RV32I-NEXT: addi a4, a1, -1 792; RV32I-NEXT: and a4, a1, a4 793; RV32I-NEXT: seqz a1, a1 794; RV32I-NEXT: sub a1, a2, a1 795; RV32I-NEXT: and a1, a2, a1 796; RV32I-NEXT: addi a2, a3, -1 797; RV32I-NEXT: and a2, a3, a2 798; RV32I-NEXT: seqz a3, a3 799; RV32I-NEXT: sub a3, a0, a3 800; RV32I-NEXT: and a0, a0, a3 801; RV32I-NEXT: or a1, a4, a1 802; RV32I-NEXT: or a2, a2, a0 803; RV32I-NEXT: seqz a0, a1 804; RV32I-NEXT: seqz a1, a2 805; RV32I-NEXT: ret 806; 807; RV32ZBB-LABEL: ctpop_v2i64_ult_two: 808; RV32ZBB: # %bb.0: 809; RV32ZBB-NEXT: lw a1, 12(a0) 810; RV32ZBB-NEXT: lw a2, 8(a0) 811; RV32ZBB-NEXT: lw a3, 4(a0) 812; RV32ZBB-NEXT: lw a0, 0(a0) 813; RV32ZBB-NEXT: cpop a1, a1 814; RV32ZBB-NEXT: cpop a2, a2 815; RV32ZBB-NEXT: cpop a3, a3 816; RV32ZBB-NEXT: cpop a0, a0 817; RV32ZBB-NEXT: add a1, a2, a1 818; RV32ZBB-NEXT: add a0, a0, a3 819; RV32ZBB-NEXT: sltiu a0, a0, 2 820; RV32ZBB-NEXT: sltiu a1, a1, 2 821; RV32ZBB-NEXT: ret 822 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 823 %2 = icmp ult <2 x i64> %1, <i64 2, i64 2> 824 ret <2 x i1> %2 825} 826 827define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind { 828; RV32I-LABEL: ctpop_v2i64_ugt_one: 829; RV32I: # %bb.0: 830; RV32I-NEXT: lw a1, 0(a0) 831; RV32I-NEXT: lw a2, 4(a0) 832; RV32I-NEXT: lw a3, 8(a0) 833; RV32I-NEXT: lw a0, 12(a0) 834; RV32I-NEXT: addi a4, a1, -1 835; RV32I-NEXT: and a4, a1, a4 836; RV32I-NEXT: seqz a1, a1 837; RV32I-NEXT: sub a1, a2, a1 838; RV32I-NEXT: and a1, a2, a1 839; RV32I-NEXT: addi a2, a3, -1 840; RV32I-NEXT: and a2, a3, a2 841; RV32I-NEXT: seqz a3, a3 842; RV32I-NEXT: sub a3, a0, a3 843; RV32I-NEXT: and a0, a0, a3 844; RV32I-NEXT: or a1, a4, a1 845; RV32I-NEXT: or a2, a2, a0 846; RV32I-NEXT: snez a0, a1 847; RV32I-NEXT: snez a1, a2 848; RV32I-NEXT: ret 849; 850; RV32ZBB-LABEL: ctpop_v2i64_ugt_one: 851; RV32ZBB: # %bb.0: 852; RV32ZBB-NEXT: lw a1, 12(a0) 853; RV32ZBB-NEXT: lw a2, 8(a0) 854; RV32ZBB-NEXT: lw a3, 4(a0) 855; RV32ZBB-NEXT: lw a0, 0(a0) 856; RV32ZBB-NEXT: cpop a1, a1 857; RV32ZBB-NEXT: cpop a2, a2 858; RV32ZBB-NEXT: cpop a3, a3 859; RV32ZBB-NEXT: cpop a0, a0 860; RV32ZBB-NEXT: add a1, a2, a1 861; RV32ZBB-NEXT: add a0, a0, a3 862; RV32ZBB-NEXT: sltiu a0, a0, 2 863; RV32ZBB-NEXT: sltiu a1, a1, 2 864; RV32ZBB-NEXT: xori a0, a0, 1 865; RV32ZBB-NEXT: xori a1, a1, 1 866; RV32ZBB-NEXT: ret 867 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 868 %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1> 869 ret <2 x i1> %2 870} 871 872define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { 873; RV32I-LABEL: ctpop_v2i64_eq_one: 874; RV32I: # %bb.0: 875; RV32I-NEXT: mv a1, a0 876; RV32I-NEXT: lw a0, 0(a0) 877; RV32I-NEXT: lw a3, 4(a1) 878; RV32I-NEXT: lw a2, 12(a1) 879; RV32I-NEXT: beqz a3, .LBB22_3 880; RV32I-NEXT: # %bb.1: 881; RV32I-NEXT: seqz a0, a0 882; RV32I-NEXT: sub a0, a3, a0 883; RV32I-NEXT: xor a3, a3, a0 884; RV32I-NEXT: sltu a0, a0, a3 885; RV32I-NEXT: lw a1, 8(a1) 886; RV32I-NEXT: bnez a2, .LBB22_4 887; RV32I-NEXT: .LBB22_2: 888; RV32I-NEXT: addi a2, a1, -1 889; RV32I-NEXT: xor a1, a1, a2 890; RV32I-NEXT: sltu a1, a2, a1 891; RV32I-NEXT: ret 892; RV32I-NEXT: .LBB22_3: 893; RV32I-NEXT: addi a3, a0, -1 894; RV32I-NEXT: xor a0, a0, a3 895; RV32I-NEXT: sltu a0, a3, a0 896; RV32I-NEXT: lw a1, 8(a1) 897; RV32I-NEXT: beqz a2, .LBB22_2 898; RV32I-NEXT: .LBB22_4: 899; RV32I-NEXT: seqz a1, a1 900; RV32I-NEXT: sub a1, a2, a1 901; RV32I-NEXT: xor a2, a2, a1 902; RV32I-NEXT: sltu a1, a1, a2 903; RV32I-NEXT: ret 904; 905; RV32ZBB-LABEL: ctpop_v2i64_eq_one: 906; RV32ZBB: # %bb.0: 907; RV32ZBB-NEXT: lw a1, 12(a0) 908; RV32ZBB-NEXT: lw a2, 8(a0) 909; RV32ZBB-NEXT: lw a3, 4(a0) 910; RV32ZBB-NEXT: lw a0, 0(a0) 911; RV32ZBB-NEXT: cpop a1, a1 912; RV32ZBB-NEXT: cpop a2, a2 913; RV32ZBB-NEXT: cpop a3, a3 914; RV32ZBB-NEXT: cpop a0, a0 915; RV32ZBB-NEXT: add a1, a2, a1 916; RV32ZBB-NEXT: add a0, a0, a3 917; RV32ZBB-NEXT: addi a0, a0, -1 918; RV32ZBB-NEXT: addi a1, a1, -1 919; RV32ZBB-NEXT: seqz a0, a0 920; RV32ZBB-NEXT: seqz a1, a1 921; RV32ZBB-NEXT: ret 922 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 923 %2 = icmp eq <2 x i64> %1, <i64 1, i64 1> 924 ret <2 x i1> %2 925} 926 927define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind { 928; RV32I-LABEL: ctpop_v2i64_ne_one: 929; RV32I: # %bb.0: 930; RV32I-NEXT: lw a2, 0(a0) 931; RV32I-NEXT: lw a3, 4(a0) 932; RV32I-NEXT: lw a1, 12(a0) 933; RV32I-NEXT: beqz a3, .LBB23_2 934; RV32I-NEXT: # %bb.1: 935; RV32I-NEXT: seqz a2, a2 936; RV32I-NEXT: sub a2, a3, a2 937; RV32I-NEXT: xor a3, a3, a2 938; RV32I-NEXT: sltu a2, a2, a3 939; RV32I-NEXT: j .LBB23_3 940; RV32I-NEXT: .LBB23_2: 941; RV32I-NEXT: addi a3, a2, -1 942; RV32I-NEXT: xor a2, a2, a3 943; RV32I-NEXT: sltu a2, a3, a2 944; RV32I-NEXT: .LBB23_3: 945; RV32I-NEXT: lw a3, 8(a0) 946; RV32I-NEXT: xori a0, a2, 1 947; RV32I-NEXT: beqz a1, .LBB23_5 948; RV32I-NEXT: # %bb.4: 949; RV32I-NEXT: seqz a2, a3 950; RV32I-NEXT: sub a2, a1, a2 951; RV32I-NEXT: xor a1, a1, a2 952; RV32I-NEXT: sltu a1, a2, a1 953; RV32I-NEXT: xori a1, a1, 1 954; RV32I-NEXT: ret 955; RV32I-NEXT: .LBB23_5: 956; RV32I-NEXT: addi a1, a3, -1 957; RV32I-NEXT: xor a3, a3, a1 958; RV32I-NEXT: sltu a1, a1, a3 959; RV32I-NEXT: xori a1, a1, 1 960; RV32I-NEXT: ret 961; 962; RV32ZBB-LABEL: ctpop_v2i64_ne_one: 963; RV32ZBB: # %bb.0: 964; RV32ZBB-NEXT: lw a1, 12(a0) 965; RV32ZBB-NEXT: lw a2, 8(a0) 966; RV32ZBB-NEXT: lw a3, 4(a0) 967; RV32ZBB-NEXT: lw a0, 0(a0) 968; RV32ZBB-NEXT: cpop a1, a1 969; RV32ZBB-NEXT: cpop a2, a2 970; RV32ZBB-NEXT: cpop a3, a3 971; RV32ZBB-NEXT: cpop a0, a0 972; RV32ZBB-NEXT: add a1, a2, a1 973; RV32ZBB-NEXT: add a0, a0, a3 974; RV32ZBB-NEXT: addi a0, a0, -1 975; RV32ZBB-NEXT: addi a1, a1, -1 976; RV32ZBB-NEXT: snez a0, a0 977; RV32ZBB-NEXT: snez a1, a1 978; RV32ZBB-NEXT: ret 979 %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 980 %2 = icmp ne <2 x i64> %1, <i64 1, i64 1> 981 ret <2 x i1> %2 982} 983 984define i32 @sextb_i32(i32 %a) nounwind { 985; RV32I-LABEL: sextb_i32: 986; RV32I: # %bb.0: 987; RV32I-NEXT: slli a0, a0, 24 988; RV32I-NEXT: srai a0, a0, 24 989; RV32I-NEXT: ret 990; 991; RV32ZBB-LABEL: sextb_i32: 992; RV32ZBB: # %bb.0: 993; RV32ZBB-NEXT: sext.b a0, a0 994; RV32ZBB-NEXT: ret 995 %shl = shl i32 %a, 24 996 %shr = ashr exact i32 %shl, 24 997 ret i32 %shr 998} 999 1000define i64 @sextb_i64(i64 %a) nounwind { 1001; RV32I-LABEL: sextb_i64: 1002; RV32I: # %bb.0: 1003; RV32I-NEXT: slli a1, a0, 24 1004; RV32I-NEXT: srai a0, a1, 24 1005; RV32I-NEXT: srai a1, a1, 31 1006; RV32I-NEXT: ret 1007; 1008; RV32ZBB-LABEL: sextb_i64: 1009; RV32ZBB: # %bb.0: 1010; RV32ZBB-NEXT: sext.b a0, a0 1011; RV32ZBB-NEXT: srai a1, a0, 31 1012; RV32ZBB-NEXT: ret 1013 %shl = shl i64 %a, 56 1014 %shr = ashr exact i64 %shl, 56 1015 ret i64 %shr 1016} 1017 1018define i32 @sexth_i32(i32 %a) nounwind { 1019; RV32I-LABEL: sexth_i32: 1020; RV32I: # %bb.0: 1021; RV32I-NEXT: slli a0, a0, 16 1022; RV32I-NEXT: srai a0, a0, 16 1023; RV32I-NEXT: ret 1024; 1025; RV32ZBB-LABEL: sexth_i32: 1026; RV32ZBB: # %bb.0: 1027; RV32ZBB-NEXT: sext.h a0, a0 1028; RV32ZBB-NEXT: ret 1029 %shl = shl i32 %a, 16 1030 %shr = ashr exact i32 %shl, 16 1031 ret i32 %shr 1032} 1033 1034define i64 @sexth_i64(i64 %a) nounwind { 1035; RV32I-LABEL: sexth_i64: 1036; RV32I: # %bb.0: 1037; RV32I-NEXT: slli a1, a0, 16 1038; RV32I-NEXT: srai a0, a1, 16 1039; RV32I-NEXT: srai a1, a1, 31 1040; RV32I-NEXT: ret 1041; 1042; RV32ZBB-LABEL: sexth_i64: 1043; RV32ZBB: # %bb.0: 1044; RV32ZBB-NEXT: sext.h a0, a0 1045; RV32ZBB-NEXT: srai a1, a0, 31 1046; RV32ZBB-NEXT: ret 1047 %shl = shl i64 %a, 48 1048 %shr = ashr exact i64 %shl, 48 1049 ret i64 %shr 1050} 1051 1052define i32 @min_i32(i32 %a, i32 %b) nounwind { 1053; RV32I-LABEL: min_i32: 1054; RV32I: # %bb.0: 1055; RV32I-NEXT: blt a0, a1, .LBB28_2 1056; RV32I-NEXT: # %bb.1: 1057; RV32I-NEXT: mv a0, a1 1058; RV32I-NEXT: .LBB28_2: 1059; RV32I-NEXT: ret 1060; 1061; RV32ZBB-LABEL: min_i32: 1062; RV32ZBB: # %bb.0: 1063; RV32ZBB-NEXT: min a0, a0, a1 1064; RV32ZBB-NEXT: ret 1065 %cmp = icmp slt i32 %a, %b 1066 %cond = select i1 %cmp, i32 %a, i32 %b 1067 ret i32 %cond 1068} 1069 1070; As we are not matching directly i64 code patterns on RV32 some i64 patterns 1071; don't have yet any matching bit manipulation instructions on RV32. 1072; This test is presented here in case future expansions of the Bitmanip 1073; extensions introduce instructions suitable for this pattern. 1074 1075define i64 @min_i64(i64 %a, i64 %b) nounwind { 1076; CHECK-LABEL: min_i64: 1077; CHECK: # %bb.0: 1078; CHECK-NEXT: beq a1, a3, .LBB29_2 1079; CHECK-NEXT: # %bb.1: 1080; CHECK-NEXT: slt a4, a1, a3 1081; CHECK-NEXT: beqz a4, .LBB29_3 1082; CHECK-NEXT: j .LBB29_4 1083; CHECK-NEXT: .LBB29_2: 1084; CHECK-NEXT: sltu a4, a0, a2 1085; CHECK-NEXT: bnez a4, .LBB29_4 1086; CHECK-NEXT: .LBB29_3: 1087; CHECK-NEXT: mv a0, a2 1088; CHECK-NEXT: mv a1, a3 1089; CHECK-NEXT: .LBB29_4: 1090; CHECK-NEXT: ret 1091 %cmp = icmp slt i64 %a, %b 1092 %cond = select i1 %cmp, i64 %a, i64 %b 1093 ret i64 %cond 1094} 1095 1096define i32 @max_i32(i32 %a, i32 %b) nounwind { 1097; RV32I-LABEL: max_i32: 1098; RV32I: # %bb.0: 1099; RV32I-NEXT: blt a1, a0, .LBB30_2 1100; RV32I-NEXT: # %bb.1: 1101; RV32I-NEXT: mv a0, a1 1102; RV32I-NEXT: .LBB30_2: 1103; RV32I-NEXT: ret 1104; 1105; RV32ZBB-LABEL: max_i32: 1106; RV32ZBB: # %bb.0: 1107; RV32ZBB-NEXT: max a0, a0, a1 1108; RV32ZBB-NEXT: ret 1109 %cmp = icmp sgt i32 %a, %b 1110 %cond = select i1 %cmp, i32 %a, i32 %b 1111 ret i32 %cond 1112} 1113 1114; As we are not matching directly i64 code patterns on RV32 some i64 patterns 1115; don't have yet any matching bit manipulation instructions on RV32. 1116; This test is presented here in case future expansions of the Bitmanip 1117; extensions introduce instructions suitable for this pattern. 1118 1119define i64 @max_i64(i64 %a, i64 %b) nounwind { 1120; CHECK-LABEL: max_i64: 1121; CHECK: # %bb.0: 1122; CHECK-NEXT: beq a1, a3, .LBB31_2 1123; CHECK-NEXT: # %bb.1: 1124; CHECK-NEXT: slt a4, a3, a1 1125; CHECK-NEXT: beqz a4, .LBB31_3 1126; CHECK-NEXT: j .LBB31_4 1127; CHECK-NEXT: .LBB31_2: 1128; CHECK-NEXT: sltu a4, a2, a0 1129; CHECK-NEXT: bnez a4, .LBB31_4 1130; CHECK-NEXT: .LBB31_3: 1131; CHECK-NEXT: mv a0, a2 1132; CHECK-NEXT: mv a1, a3 1133; CHECK-NEXT: .LBB31_4: 1134; CHECK-NEXT: ret 1135 %cmp = icmp sgt i64 %a, %b 1136 %cond = select i1 %cmp, i64 %a, i64 %b 1137 ret i64 %cond 1138} 1139 1140define i32 @minu_i32(i32 %a, i32 %b) nounwind { 1141; RV32I-LABEL: minu_i32: 1142; RV32I: # %bb.0: 1143; RV32I-NEXT: bltu a0, a1, .LBB32_2 1144; RV32I-NEXT: # %bb.1: 1145; RV32I-NEXT: mv a0, a1 1146; RV32I-NEXT: .LBB32_2: 1147; RV32I-NEXT: ret 1148; 1149; RV32ZBB-LABEL: minu_i32: 1150; RV32ZBB: # %bb.0: 1151; RV32ZBB-NEXT: minu a0, a0, a1 1152; RV32ZBB-NEXT: ret 1153 %cmp = icmp ult i32 %a, %b 1154 %cond = select i1 %cmp, i32 %a, i32 %b 1155 ret i32 %cond 1156} 1157 1158; As we are not matching directly i64 code patterns on RV32 some i64 patterns 1159; don't have yet any matching bit manipulation instructions on RV32. 1160; This test is presented here in case future expansions of the Bitmanip 1161; extensions introduce instructions suitable for this pattern. 1162 1163define i64 @minu_i64(i64 %a, i64 %b) nounwind { 1164; CHECK-LABEL: minu_i64: 1165; CHECK: # %bb.0: 1166; CHECK-NEXT: beq a1, a3, .LBB33_2 1167; CHECK-NEXT: # %bb.1: 1168; CHECK-NEXT: sltu a4, a1, a3 1169; CHECK-NEXT: beqz a4, .LBB33_3 1170; CHECK-NEXT: j .LBB33_4 1171; CHECK-NEXT: .LBB33_2: 1172; CHECK-NEXT: sltu a4, a0, a2 1173; CHECK-NEXT: bnez a4, .LBB33_4 1174; CHECK-NEXT: .LBB33_3: 1175; CHECK-NEXT: mv a0, a2 1176; CHECK-NEXT: mv a1, a3 1177; CHECK-NEXT: .LBB33_4: 1178; CHECK-NEXT: ret 1179 %cmp = icmp ult i64 %a, %b 1180 %cond = select i1 %cmp, i64 %a, i64 %b 1181 ret i64 %cond 1182} 1183 1184define i32 @maxu_i32(i32 %a, i32 %b) nounwind { 1185; RV32I-LABEL: maxu_i32: 1186; RV32I: # %bb.0: 1187; RV32I-NEXT: bltu a1, a0, .LBB34_2 1188; RV32I-NEXT: # %bb.1: 1189; RV32I-NEXT: mv a0, a1 1190; RV32I-NEXT: .LBB34_2: 1191; RV32I-NEXT: ret 1192; 1193; RV32ZBB-LABEL: maxu_i32: 1194; RV32ZBB: # %bb.0: 1195; RV32ZBB-NEXT: maxu a0, a0, a1 1196; RV32ZBB-NEXT: ret 1197 %cmp = icmp ugt i32 %a, %b 1198 %cond = select i1 %cmp, i32 %a, i32 %b 1199 ret i32 %cond 1200} 1201 1202; As we are not matching directly i64 code patterns on RV32 some i64 patterns 1203; don't have yet any matching bit manipulation instructions on RV32. 1204; This test is presented here in case future expansions of the Bitmanip 1205; extensions introduce instructions suitable for this pattern. 1206 1207define i64 @maxu_i64(i64 %a, i64 %b) nounwind { 1208; CHECK-LABEL: maxu_i64: 1209; CHECK: # %bb.0: 1210; CHECK-NEXT: beq a1, a3, .LBB35_2 1211; CHECK-NEXT: # %bb.1: 1212; CHECK-NEXT: sltu a4, a3, a1 1213; CHECK-NEXT: beqz a4, .LBB35_3 1214; CHECK-NEXT: j .LBB35_4 1215; CHECK-NEXT: .LBB35_2: 1216; CHECK-NEXT: sltu a4, a2, a0 1217; CHECK-NEXT: bnez a4, .LBB35_4 1218; CHECK-NEXT: .LBB35_3: 1219; CHECK-NEXT: mv a0, a2 1220; CHECK-NEXT: mv a1, a3 1221; CHECK-NEXT: .LBB35_4: 1222; CHECK-NEXT: ret 1223 %cmp = icmp ugt i64 %a, %b 1224 %cond = select i1 %cmp, i64 %a, i64 %b 1225 ret i64 %cond 1226} 1227 1228declare i32 @llvm.abs.i32(i32, i1 immarg) 1229 1230define i32 @abs_i32(i32 %x) { 1231; RV32I-LABEL: abs_i32: 1232; RV32I: # %bb.0: 1233; RV32I-NEXT: srai a1, a0, 31 1234; RV32I-NEXT: xor a0, a0, a1 1235; RV32I-NEXT: sub a0, a0, a1 1236; RV32I-NEXT: ret 1237; 1238; RV32ZBB-LABEL: abs_i32: 1239; RV32ZBB: # %bb.0: 1240; RV32ZBB-NEXT: neg a1, a0 1241; RV32ZBB-NEXT: max a0, a0, a1 1242; RV32ZBB-NEXT: ret 1243 %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) 1244 ret i32 %abs 1245} 1246 1247declare i64 @llvm.abs.i64(i64, i1 immarg) 1248 1249define i64 @abs_i64(i64 %x) { 1250; CHECK-LABEL: abs_i64: 1251; CHECK: # %bb.0: 1252; CHECK-NEXT: bgez a1, .LBB37_2 1253; CHECK-NEXT: # %bb.1: 1254; CHECK-NEXT: snez a2, a0 1255; CHECK-NEXT: neg a0, a0 1256; CHECK-NEXT: neg a1, a1 1257; CHECK-NEXT: sub a1, a1, a2 1258; CHECK-NEXT: .LBB37_2: 1259; CHECK-NEXT: ret 1260 %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) 1261 ret i64 %abs 1262} 1263 1264define i32 @zexth_i32(i32 %a) nounwind { 1265; RV32I-LABEL: zexth_i32: 1266; RV32I: # %bb.0: 1267; RV32I-NEXT: slli a0, a0, 16 1268; RV32I-NEXT: srli a0, a0, 16 1269; RV32I-NEXT: ret 1270; 1271; RV32ZBB-LABEL: zexth_i32: 1272; RV32ZBB: # %bb.0: 1273; RV32ZBB-NEXT: zext.h a0, a0 1274; RV32ZBB-NEXT: ret 1275 %and = and i32 %a, 65535 1276 ret i32 %and 1277} 1278 1279define i64 @zexth_i64(i64 %a) nounwind { 1280; RV32I-LABEL: zexth_i64: 1281; RV32I: # %bb.0: 1282; RV32I-NEXT: slli a0, a0, 16 1283; RV32I-NEXT: srli a0, a0, 16 1284; RV32I-NEXT: li a1, 0 1285; RV32I-NEXT: ret 1286; 1287; RV32ZBB-LABEL: zexth_i64: 1288; RV32ZBB: # %bb.0: 1289; RV32ZBB-NEXT: zext.h a0, a0 1290; RV32ZBB-NEXT: li a1, 0 1291; RV32ZBB-NEXT: ret 1292 %and = and i64 %a, 65535 1293 ret i64 %and 1294} 1295 1296declare i32 @llvm.bswap.i32(i32) 1297 1298define i32 @bswap_i32(i32 %a) nounwind { 1299; RV32I-LABEL: bswap_i32: 1300; RV32I: # %bb.0: 1301; RV32I-NEXT: srli a1, a0, 8 1302; RV32I-NEXT: lui a2, 16 1303; RV32I-NEXT: srli a3, a0, 24 1304; RV32I-NEXT: addi a2, a2, -256 1305; RV32I-NEXT: and a1, a1, a2 1306; RV32I-NEXT: and a2, a0, a2 1307; RV32I-NEXT: or a1, a1, a3 1308; RV32I-NEXT: slli a2, a2, 8 1309; RV32I-NEXT: slli a0, a0, 24 1310; RV32I-NEXT: or a0, a0, a2 1311; RV32I-NEXT: or a0, a0, a1 1312; RV32I-NEXT: ret 1313; 1314; RV32ZBB-LABEL: bswap_i32: 1315; RV32ZBB: # %bb.0: 1316; RV32ZBB-NEXT: rev8 a0, a0 1317; RV32ZBB-NEXT: ret 1318 %1 = tail call i32 @llvm.bswap.i32(i32 %a) 1319 ret i32 %1 1320} 1321 1322declare i64 @llvm.bswap.i64(i64) 1323 1324define i64 @bswap_i64(i64 %a) { 1325; RV32I-LABEL: bswap_i64: 1326; RV32I: # %bb.0: 1327; RV32I-NEXT: srli a2, a1, 8 1328; RV32I-NEXT: lui a3, 16 1329; RV32I-NEXT: srli a4, a1, 24 1330; RV32I-NEXT: srli a5, a0, 8 1331; RV32I-NEXT: addi a3, a3, -256 1332; RV32I-NEXT: and a2, a2, a3 1333; RV32I-NEXT: or a2, a2, a4 1334; RV32I-NEXT: srli a4, a0, 24 1335; RV32I-NEXT: and a5, a5, a3 1336; RV32I-NEXT: or a4, a5, a4 1337; RV32I-NEXT: slli a5, a1, 24 1338; RV32I-NEXT: and a1, a1, a3 1339; RV32I-NEXT: slli a1, a1, 8 1340; RV32I-NEXT: or a1, a5, a1 1341; RV32I-NEXT: and a3, a0, a3 1342; RV32I-NEXT: slli a0, a0, 24 1343; RV32I-NEXT: slli a3, a3, 8 1344; RV32I-NEXT: or a3, a0, a3 1345; RV32I-NEXT: or a0, a1, a2 1346; RV32I-NEXT: or a1, a3, a4 1347; RV32I-NEXT: ret 1348; 1349; RV32ZBB-LABEL: bswap_i64: 1350; RV32ZBB: # %bb.0: 1351; RV32ZBB-NEXT: rev8 a2, a1 1352; RV32ZBB-NEXT: rev8 a1, a0 1353; RV32ZBB-NEXT: mv a0, a2 1354; RV32ZBB-NEXT: ret 1355 %1 = call i64 @llvm.bswap.i64(i64 %a) 1356 ret i64 %1 1357} 1358 1359define i16 @orc_b_i16(i16 %a) { 1360; RV32I-LABEL: orc_b_i16: 1361; RV32I: # %bb.0: 1362; RV32I-NEXT: andi a0, a0, 257 1363; RV32I-NEXT: slli a1, a0, 8 1364; RV32I-NEXT: sub a0, a1, a0 1365; RV32I-NEXT: ret 1366; 1367; RV32ZBB-LABEL: orc_b_i16: 1368; RV32ZBB: # %bb.0: 1369; RV32ZBB-NEXT: andi a0, a0, 257 1370; RV32ZBB-NEXT: orc.b a0, a0 1371; RV32ZBB-NEXT: ret 1372 %1 = and i16 %a, 257 1373 %2 = mul nuw i16 %1, 255 1374 ret i16 %2 1375} 1376 1377define i32 @orc_b_i32(i32 %a) { 1378; RV32I-LABEL: orc_b_i32: 1379; RV32I: # %bb.0: 1380; RV32I-NEXT: lui a1, 4112 1381; RV32I-NEXT: addi a1, a1, 257 1382; RV32I-NEXT: and a0, a0, a1 1383; RV32I-NEXT: slli a1, a0, 8 1384; RV32I-NEXT: sub a0, a1, a0 1385; RV32I-NEXT: ret 1386; 1387; RV32ZBB-LABEL: orc_b_i32: 1388; RV32ZBB: # %bb.0: 1389; RV32ZBB-NEXT: lui a1, 4112 1390; RV32ZBB-NEXT: addi a1, a1, 257 1391; RV32ZBB-NEXT: and a0, a0, a1 1392; RV32ZBB-NEXT: orc.b a0, a0 1393; RV32ZBB-NEXT: ret 1394 %1 = and i32 %a, 16843009 1395 %2 = mul nuw i32 %1, 255 1396 ret i32 %2 1397} 1398 1399define i64 @orc_b_i64(i64 %a) { 1400; CHECK-LABEL: orc_b_i64: 1401; CHECK: # %bb.0: 1402; CHECK-NEXT: lui a2, 4112 1403; CHECK-NEXT: addi a2, a2, 257 1404; CHECK-NEXT: and a1, a1, a2 1405; CHECK-NEXT: and a0, a0, a2 1406; CHECK-NEXT: slli a2, a0, 8 1407; CHECK-NEXT: srli a3, a0, 24 1408; CHECK-NEXT: slli a4, a1, 8 1409; CHECK-NEXT: sltu a5, a2, a0 1410; CHECK-NEXT: or a3, a4, a3 1411; CHECK-NEXT: sub a1, a3, a1 1412; CHECK-NEXT: sub a1, a1, a5 1413; CHECK-NEXT: sub a0, a2, a0 1414; CHECK-NEXT: ret 1415 %1 = and i64 %a, 72340172838076673 1416 %2 = mul nuw i64 %1, 255 1417 ret i64 %2 1418} 1419 1420define i32 @srai_slli(i16 signext %0) { 1421; CHECK-LABEL: srai_slli: 1422; CHECK: # %bb.0: 1423; CHECK-NEXT: slli a0, a0, 25 1424; CHECK-NEXT: srai a0, a0, 31 1425; CHECK-NEXT: ret 1426 %2 = shl i16 %0, 9 1427 %sext = ashr i16 %2, 15 1428 %3 = sext i16 %sext to i32 1429 ret i32 %3 1430} 1431 1432define i32 @srai_slli2(i16 signext %0) { 1433; CHECK-LABEL: srai_slli2: 1434; CHECK: # %bb.0: 1435; CHECK-NEXT: slli a0, a0, 25 1436; CHECK-NEXT: srai a0, a0, 30 1437; CHECK-NEXT: ret 1438 %2 = shl i16 %0, 9 1439 %sext = ashr i16 %2, 14 1440 %3 = sext i16 %sext to i32 1441 ret i32 %3 1442} 1443 1444define i1 @ctpop32_eq_one_nonzero(i32 %x) { 1445; RV32I-LABEL: ctpop32_eq_one_nonzero: 1446; RV32I: # %bb.0: # %entry 1447; RV32I-NEXT: addi a1, a0, -1 1448; RV32I-NEXT: and a0, a0, a1 1449; RV32I-NEXT: seqz a0, a0 1450; RV32I-NEXT: ret 1451; 1452; RV32ZBB-LABEL: ctpop32_eq_one_nonzero: 1453; RV32ZBB: # %bb.0: # %entry 1454; RV32ZBB-NEXT: cpop a0, a0 1455; RV32ZBB-NEXT: sltiu a0, a0, 2 1456; RV32ZBB-NEXT: ret 1457entry: 1458 %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) 1459 %cmp = icmp eq i32 %popcnt, 1 1460 ret i1 %cmp 1461} 1462 1463define i1 @ctpop32_ne_one_nonzero(i32 %x) { 1464; RV32I-LABEL: ctpop32_ne_one_nonzero: 1465; RV32I: # %bb.0: # %entry 1466; RV32I-NEXT: addi a1, a0, -1 1467; RV32I-NEXT: and a0, a0, a1 1468; RV32I-NEXT: snez a0, a0 1469; RV32I-NEXT: ret 1470; 1471; RV32ZBB-LABEL: ctpop32_ne_one_nonzero: 1472; RV32ZBB: # %bb.0: # %entry 1473; RV32ZBB-NEXT: cpop a0, a0 1474; RV32ZBB-NEXT: sltiu a0, a0, 2 1475; RV32ZBB-NEXT: xori a0, a0, 1 1476; RV32ZBB-NEXT: ret 1477entry: 1478 %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) 1479 %cmp = icmp ne i32 %popcnt, 1 1480 ret i1 %cmp 1481} 1482