1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefix=RV64I 4; RUN: llc -mtriple=riscv64 -mattr=+xtheadbb -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefix=RV64XTHEADBB 6 7declare i32 @llvm.ctlz.i32(i32, i1) 8 9define signext i32 @ctlz_i32(i32 signext %a) nounwind { 10; RV64I-LABEL: ctlz_i32: 11; RV64I: # %bb.0: 12; RV64I-NEXT: beqz a0, .LBB0_2 13; RV64I-NEXT: # %bb.1: # %cond.false 14; RV64I-NEXT: srliw a1, a0, 1 15; RV64I-NEXT: lui a2, 349525 16; RV64I-NEXT: or a0, a0, a1 17; RV64I-NEXT: addiw a1, a2, 1365 18; RV64I-NEXT: srliw a2, a0, 2 19; RV64I-NEXT: or a0, a0, a2 20; RV64I-NEXT: srliw a2, a0, 4 21; RV64I-NEXT: or a0, a0, a2 22; RV64I-NEXT: srliw a2, a0, 8 23; RV64I-NEXT: or a0, a0, a2 24; RV64I-NEXT: srliw a2, a0, 16 25; RV64I-NEXT: or a0, a0, a2 26; RV64I-NEXT: not a0, a0 27; RV64I-NEXT: srli a2, a0, 1 28; RV64I-NEXT: and a1, a2, a1 29; RV64I-NEXT: lui a2, 209715 30; RV64I-NEXT: addiw a2, a2, 819 31; RV64I-NEXT: sub a0, a0, a1 32; RV64I-NEXT: and a1, a0, a2 33; RV64I-NEXT: srli a0, a0, 2 34; RV64I-NEXT: and a0, a0, a2 35; RV64I-NEXT: lui a2, 61681 36; RV64I-NEXT: add a0, a1, a0 37; RV64I-NEXT: srli a1, a0, 4 38; RV64I-NEXT: add a0, a0, a1 39; RV64I-NEXT: addi a1, a2, -241 40; RV64I-NEXT: and a0, a0, a1 41; RV64I-NEXT: slli a1, a0, 8 42; RV64I-NEXT: add a0, a0, a1 43; RV64I-NEXT: slli a1, a0, 16 44; RV64I-NEXT: add a0, a0, a1 45; RV64I-NEXT: srliw a0, a0, 24 46; RV64I-NEXT: ret 47; RV64I-NEXT: .LBB0_2: 48; RV64I-NEXT: li a0, 32 49; RV64I-NEXT: ret 50; 51; RV64XTHEADBB-LABEL: ctlz_i32: 52; RV64XTHEADBB: # %bb.0: 53; RV64XTHEADBB-NEXT: not a0, a0 54; RV64XTHEADBB-NEXT: slli a0, a0, 32 55; RV64XTHEADBB-NEXT: th.ff0 a0, a0 56; RV64XTHEADBB-NEXT: ret 57 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) 58 ret i32 %1 59} 60 61define signext i32 @log2_i32(i32 signext %a) nounwind { 62; RV64I-LABEL: log2_i32: 63; RV64I: # %bb.0: 64; RV64I-NEXT: beqz a0, .LBB1_2 65; RV64I-NEXT: # %bb.1: # %cond.false 66; RV64I-NEXT: srliw a1, a0, 1 67; RV64I-NEXT: lui a2, 349525 68; RV64I-NEXT: or a0, a0, a1 69; RV64I-NEXT: addiw a1, a2, 1365 70; RV64I-NEXT: srliw a2, a0, 2 71; RV64I-NEXT: or a0, a0, a2 72; RV64I-NEXT: srliw a2, a0, 4 73; RV64I-NEXT: or a0, a0, a2 74; RV64I-NEXT: srliw a2, a0, 8 75; RV64I-NEXT: or a0, a0, a2 76; RV64I-NEXT: srliw a2, a0, 16 77; RV64I-NEXT: or a0, a0, a2 78; RV64I-NEXT: not a0, a0 79; RV64I-NEXT: srli a2, a0, 1 80; RV64I-NEXT: and a1, a2, a1 81; RV64I-NEXT: lui a2, 209715 82; RV64I-NEXT: addiw a2, a2, 819 83; RV64I-NEXT: sub a0, a0, a1 84; RV64I-NEXT: and a1, a0, a2 85; RV64I-NEXT: srli a0, a0, 2 86; RV64I-NEXT: and a0, a0, a2 87; RV64I-NEXT: lui a2, 61681 88; RV64I-NEXT: add a0, a1, a0 89; RV64I-NEXT: srli a1, a0, 4 90; RV64I-NEXT: add a0, a0, a1 91; RV64I-NEXT: addi a1, a2, -241 92; RV64I-NEXT: and a0, a0, a1 93; RV64I-NEXT: slli a1, a0, 8 94; RV64I-NEXT: add a0, a0, a1 95; RV64I-NEXT: slli a1, a0, 16 96; RV64I-NEXT: add a0, a0, a1 97; RV64I-NEXT: srliw a0, a0, 24 98; RV64I-NEXT: j .LBB1_3 99; RV64I-NEXT: .LBB1_2: 100; RV64I-NEXT: li a0, 32 101; RV64I-NEXT: .LBB1_3: # %cond.end 102; RV64I-NEXT: li a1, 31 103; RV64I-NEXT: sub a0, a1, a0 104; RV64I-NEXT: ret 105; 106; RV64XTHEADBB-LABEL: log2_i32: 107; RV64XTHEADBB: # %bb.0: 108; RV64XTHEADBB-NEXT: not a0, a0 109; RV64XTHEADBB-NEXT: slli a0, a0, 32 110; RV64XTHEADBB-NEXT: th.ff0 a0, a0 111; RV64XTHEADBB-NEXT: li a1, 31 112; RV64XTHEADBB-NEXT: sub a0, a1, a0 113; RV64XTHEADBB-NEXT: ret 114 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) 115 %2 = sub i32 31, %1 116 ret i32 %2 117} 118 119define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { 120; RV64I-LABEL: log2_ceil_i32: 121; RV64I: # %bb.0: 122; RV64I-NEXT: addiw a1, a0, -1 123; RV64I-NEXT: li a0, 32 124; RV64I-NEXT: li a2, 32 125; RV64I-NEXT: beqz a1, .LBB2_2 126; RV64I-NEXT: # %bb.1: # %cond.false 127; RV64I-NEXT: srliw a2, a1, 1 128; RV64I-NEXT: lui a3, 349525 129; RV64I-NEXT: or a1, a1, a2 130; RV64I-NEXT: addiw a2, a3, 1365 131; RV64I-NEXT: srliw a3, a1, 2 132; RV64I-NEXT: or a1, a1, a3 133; RV64I-NEXT: srliw a3, a1, 4 134; RV64I-NEXT: or a1, a1, a3 135; RV64I-NEXT: srliw a3, a1, 8 136; RV64I-NEXT: or a1, a1, a3 137; RV64I-NEXT: srliw a3, a1, 16 138; RV64I-NEXT: or a1, a1, a3 139; RV64I-NEXT: not a1, a1 140; RV64I-NEXT: srli a3, a1, 1 141; RV64I-NEXT: and a2, a3, a2 142; RV64I-NEXT: lui a3, 209715 143; RV64I-NEXT: addiw a3, a3, 819 144; RV64I-NEXT: sub a1, a1, a2 145; RV64I-NEXT: and a2, a1, a3 146; RV64I-NEXT: srli a1, a1, 2 147; RV64I-NEXT: and a1, a1, a3 148; RV64I-NEXT: lui a3, 61681 149; RV64I-NEXT: add a1, a2, a1 150; RV64I-NEXT: srli a2, a1, 4 151; RV64I-NEXT: add a1, a1, a2 152; RV64I-NEXT: addi a2, a3, -241 153; RV64I-NEXT: and a1, a1, a2 154; RV64I-NEXT: slli a2, a1, 8 155; RV64I-NEXT: add a1, a1, a2 156; RV64I-NEXT: slli a2, a1, 16 157; RV64I-NEXT: add a1, a1, a2 158; RV64I-NEXT: srliw a2, a1, 24 159; RV64I-NEXT: .LBB2_2: # %cond.end 160; RV64I-NEXT: sub a0, a0, a2 161; RV64I-NEXT: ret 162; 163; RV64XTHEADBB-LABEL: log2_ceil_i32: 164; RV64XTHEADBB: # %bb.0: 165; RV64XTHEADBB-NEXT: addi a0, a0, -1 166; RV64XTHEADBB-NEXT: not a0, a0 167; RV64XTHEADBB-NEXT: slli a0, a0, 32 168; RV64XTHEADBB-NEXT: th.ff0 a0, a0 169; RV64XTHEADBB-NEXT: li a1, 32 170; RV64XTHEADBB-NEXT: sub a0, a1, a0 171; RV64XTHEADBB-NEXT: ret 172 %1 = sub i32 %a, 1 173 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false) 174 %3 = sub i32 32, %2 175 ret i32 %3 176} 177 178define signext i32 @findLastSet_i32(i32 signext %a) nounwind { 179; RV64I-LABEL: findLastSet_i32: 180; RV64I: # %bb.0: 181; RV64I-NEXT: srliw a1, a0, 1 182; RV64I-NEXT: lui a2, 349525 183; RV64I-NEXT: or a1, a0, a1 184; RV64I-NEXT: addiw a2, a2, 1365 185; RV64I-NEXT: srliw a3, a1, 2 186; RV64I-NEXT: or a1, a1, a3 187; RV64I-NEXT: srliw a3, a1, 4 188; RV64I-NEXT: or a1, a1, a3 189; RV64I-NEXT: srliw a3, a1, 8 190; RV64I-NEXT: or a1, a1, a3 191; RV64I-NEXT: srliw a3, a1, 16 192; RV64I-NEXT: or a1, a1, a3 193; RV64I-NEXT: not a1, a1 194; RV64I-NEXT: srli a3, a1, 1 195; RV64I-NEXT: and a2, a3, a2 196; RV64I-NEXT: lui a3, 209715 197; RV64I-NEXT: addiw a3, a3, 819 198; RV64I-NEXT: sub a1, a1, a2 199; RV64I-NEXT: and a2, a1, a3 200; RV64I-NEXT: srli a1, a1, 2 201; RV64I-NEXT: and a1, a1, a3 202; RV64I-NEXT: lui a3, 61681 203; RV64I-NEXT: snez a0, a0 204; RV64I-NEXT: addi a3, a3, -241 205; RV64I-NEXT: add a1, a2, a1 206; RV64I-NEXT: srli a2, a1, 4 207; RV64I-NEXT: add a1, a1, a2 208; RV64I-NEXT: and a1, a1, a3 209; RV64I-NEXT: slli a2, a1, 8 210; RV64I-NEXT: add a1, a1, a2 211; RV64I-NEXT: slli a2, a1, 16 212; RV64I-NEXT: add a1, a1, a2 213; RV64I-NEXT: srliw a1, a1, 24 214; RV64I-NEXT: xori a1, a1, 31 215; RV64I-NEXT: addi a0, a0, -1 216; RV64I-NEXT: or a0, a0, a1 217; RV64I-NEXT: ret 218; 219; RV64XTHEADBB-LABEL: findLastSet_i32: 220; RV64XTHEADBB: # %bb.0: 221; RV64XTHEADBB-NEXT: not a1, a0 222; RV64XTHEADBB-NEXT: snez a0, a0 223; RV64XTHEADBB-NEXT: slli a1, a1, 32 224; RV64XTHEADBB-NEXT: th.ff0 a1, a1 225; RV64XTHEADBB-NEXT: xori a1, a1, 31 226; RV64XTHEADBB-NEXT: addi a0, a0, -1 227; RV64XTHEADBB-NEXT: or a0, a0, a1 228; RV64XTHEADBB-NEXT: ret 229 %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) 230 %2 = xor i32 31, %1 231 %3 = icmp eq i32 %a, 0 232 %4 = select i1 %3, i32 -1, i32 %2 233 ret i32 %4 234} 235 236define i32 @ctlz_lshr_i32(i32 signext %a) { 237; RV64I-LABEL: ctlz_lshr_i32: 238; RV64I: # %bb.0: 239; RV64I-NEXT: srliw a0, a0, 1 240; RV64I-NEXT: beqz a0, .LBB4_2 241; RV64I-NEXT: # %bb.1: # %cond.false 242; RV64I-NEXT: srliw a1, a0, 1 243; RV64I-NEXT: lui a2, 349525 244; RV64I-NEXT: or a0, a0, a1 245; RV64I-NEXT: addiw a1, a2, 1365 246; RV64I-NEXT: srliw a2, a0, 2 247; RV64I-NEXT: or a0, a0, a2 248; RV64I-NEXT: srliw a2, a0, 4 249; RV64I-NEXT: or a0, a0, a2 250; RV64I-NEXT: srliw a2, a0, 8 251; RV64I-NEXT: or a0, a0, a2 252; RV64I-NEXT: srliw a2, a0, 16 253; RV64I-NEXT: or a0, a0, a2 254; RV64I-NEXT: not a0, a0 255; RV64I-NEXT: srli a2, a0, 1 256; RV64I-NEXT: and a1, a2, a1 257; RV64I-NEXT: lui a2, 209715 258; RV64I-NEXT: addiw a2, a2, 819 259; RV64I-NEXT: sub a0, a0, a1 260; RV64I-NEXT: and a1, a0, a2 261; RV64I-NEXT: srli a0, a0, 2 262; RV64I-NEXT: and a0, a0, a2 263; RV64I-NEXT: lui a2, 61681 264; RV64I-NEXT: add a0, a1, a0 265; RV64I-NEXT: srli a1, a0, 4 266; RV64I-NEXT: add a0, a0, a1 267; RV64I-NEXT: addi a1, a2, -241 268; RV64I-NEXT: and a0, a0, a1 269; RV64I-NEXT: slli a1, a0, 8 270; RV64I-NEXT: add a0, a0, a1 271; RV64I-NEXT: slli a1, a0, 16 272; RV64I-NEXT: add a0, a0, a1 273; RV64I-NEXT: srliw a0, a0, 24 274; RV64I-NEXT: ret 275; RV64I-NEXT: .LBB4_2: 276; RV64I-NEXT: li a0, 32 277; RV64I-NEXT: ret 278; 279; RV64XTHEADBB-LABEL: ctlz_lshr_i32: 280; RV64XTHEADBB: # %bb.0: 281; RV64XTHEADBB-NEXT: srliw a0, a0, 1 282; RV64XTHEADBB-NEXT: not a0, a0 283; RV64XTHEADBB-NEXT: slli a0, a0, 32 284; RV64XTHEADBB-NEXT: th.ff0 a0, a0 285; RV64XTHEADBB-NEXT: ret 286 %1 = lshr i32 %a, 1 287 %2 = call i32 @llvm.ctlz.i32(i32 %1, i1 false) 288 ret i32 %2 289} 290 291declare i64 @llvm.ctlz.i64(i64, i1) 292 293define i64 @ctlz_i64(i64 %a) nounwind { 294; RV64I-LABEL: ctlz_i64: 295; RV64I: # %bb.0: 296; RV64I-NEXT: beqz a0, .LBB5_2 297; RV64I-NEXT: # %bb.1: # %cond.false 298; RV64I-NEXT: srli a1, a0, 1 299; RV64I-NEXT: lui a2, 349525 300; RV64I-NEXT: lui a3, 209715 301; RV64I-NEXT: or a0, a0, a1 302; RV64I-NEXT: addiw a1, a2, 1365 303; RV64I-NEXT: addiw a2, a3, 819 304; RV64I-NEXT: srli a3, a0, 2 305; RV64I-NEXT: or a0, a0, a3 306; RV64I-NEXT: slli a3, a1, 32 307; RV64I-NEXT: add a1, a1, a3 308; RV64I-NEXT: slli a3, a2, 32 309; RV64I-NEXT: add a2, a2, a3 310; RV64I-NEXT: srli a3, a0, 4 311; RV64I-NEXT: or a0, a0, a3 312; RV64I-NEXT: srli a3, a0, 8 313; RV64I-NEXT: or a0, a0, a3 314; RV64I-NEXT: srli a3, a0, 16 315; RV64I-NEXT: or a0, a0, a3 316; RV64I-NEXT: srli a3, a0, 32 317; RV64I-NEXT: or a0, a0, a3 318; RV64I-NEXT: not a0, a0 319; RV64I-NEXT: srli a3, a0, 1 320; RV64I-NEXT: and a1, a3, a1 321; RV64I-NEXT: lui a3, 61681 322; RV64I-NEXT: addiw a3, a3, -241 323; RV64I-NEXT: sub a0, a0, a1 324; RV64I-NEXT: and a1, a0, a2 325; RV64I-NEXT: srli a0, a0, 2 326; RV64I-NEXT: and a0, a0, a2 327; RV64I-NEXT: slli a2, a3, 32 328; RV64I-NEXT: add a0, a1, a0 329; RV64I-NEXT: srli a1, a0, 4 330; RV64I-NEXT: add a0, a0, a1 331; RV64I-NEXT: add a2, a3, a2 332; RV64I-NEXT: and a0, a0, a2 333; RV64I-NEXT: slli a1, a0, 8 334; RV64I-NEXT: add a0, a0, a1 335; RV64I-NEXT: slli a1, a0, 16 336; RV64I-NEXT: add a0, a0, a1 337; RV64I-NEXT: slli a1, a0, 32 338; RV64I-NEXT: add a0, a0, a1 339; RV64I-NEXT: srli a0, a0, 56 340; RV64I-NEXT: ret 341; RV64I-NEXT: .LBB5_2: 342; RV64I-NEXT: li a0, 64 343; RV64I-NEXT: ret 344; 345; RV64XTHEADBB-LABEL: ctlz_i64: 346; RV64XTHEADBB: # %bb.0: 347; RV64XTHEADBB-NEXT: th.ff1 a0, a0 348; RV64XTHEADBB-NEXT: ret 349 %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) 350 ret i64 %1 351} 352 353declare i32 @llvm.cttz.i32(i32, i1) 354 355define signext i32 @cttz_i32(i32 signext %a) nounwind { 356; RV64I-LABEL: cttz_i32: 357; RV64I: # %bb.0: 358; RV64I-NEXT: beqz a0, .LBB6_2 359; RV64I-NEXT: # %bb.1: # %cond.false 360; RV64I-NEXT: addi sp, sp, -16 361; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 362; RV64I-NEXT: neg a1, a0 363; RV64I-NEXT: and a0, a0, a1 364; RV64I-NEXT: lui a1, 30667 365; RV64I-NEXT: addiw a1, a1, 1329 366; RV64I-NEXT: call __muldi3 367; RV64I-NEXT: srliw a0, a0, 27 368; RV64I-NEXT: lui a1, %hi(.LCPI6_0) 369; RV64I-NEXT: addi a1, a1, %lo(.LCPI6_0) 370; RV64I-NEXT: add a0, a1, a0 371; RV64I-NEXT: lbu a0, 0(a0) 372; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 373; RV64I-NEXT: addi sp, sp, 16 374; RV64I-NEXT: ret 375; RV64I-NEXT: .LBB6_2: 376; RV64I-NEXT: li a0, 32 377; RV64I-NEXT: ret 378; 379; RV64XTHEADBB-LABEL: cttz_i32: 380; RV64XTHEADBB: # %bb.0: 381; RV64XTHEADBB-NEXT: beqz a0, .LBB6_2 382; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false 383; RV64XTHEADBB-NEXT: addi a1, a0, -1 384; RV64XTHEADBB-NEXT: not a0, a0 385; RV64XTHEADBB-NEXT: and a0, a0, a1 386; RV64XTHEADBB-NEXT: th.ff1 a0, a0 387; RV64XTHEADBB-NEXT: li a1, 64 388; RV64XTHEADBB-NEXT: sub a0, a1, a0 389; RV64XTHEADBB-NEXT: ret 390; RV64XTHEADBB-NEXT: .LBB6_2: 391; RV64XTHEADBB-NEXT: li a0, 32 392; RV64XTHEADBB-NEXT: ret 393 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false) 394 ret i32 %1 395} 396 397define signext i32 @cttz_zero_undef_i32(i32 signext %a) nounwind { 398; RV64I-LABEL: cttz_zero_undef_i32: 399; RV64I: # %bb.0: 400; RV64I-NEXT: addi sp, sp, -16 401; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 402; RV64I-NEXT: neg a1, a0 403; RV64I-NEXT: and a0, a0, a1 404; RV64I-NEXT: lui a1, 30667 405; RV64I-NEXT: addiw a1, a1, 1329 406; RV64I-NEXT: call __muldi3 407; RV64I-NEXT: srliw a0, a0, 27 408; RV64I-NEXT: lui a1, %hi(.LCPI7_0) 409; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0) 410; RV64I-NEXT: add a0, a1, a0 411; RV64I-NEXT: lbu a0, 0(a0) 412; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 413; RV64I-NEXT: addi sp, sp, 16 414; RV64I-NEXT: ret 415; 416; RV64XTHEADBB-LABEL: cttz_zero_undef_i32: 417; RV64XTHEADBB: # %bb.0: 418; RV64XTHEADBB-NEXT: addi a1, a0, -1 419; RV64XTHEADBB-NEXT: not a0, a0 420; RV64XTHEADBB-NEXT: and a0, a0, a1 421; RV64XTHEADBB-NEXT: th.ff1 a0, a0 422; RV64XTHEADBB-NEXT: li a1, 64 423; RV64XTHEADBB-NEXT: sub a0, a1, a0 424; RV64XTHEADBB-NEXT: ret 425 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) 426 ret i32 %1 427} 428 429define signext i32 @findFirstSet_i32(i32 signext %a) nounwind { 430; RV64I-LABEL: findFirstSet_i32: 431; RV64I: # %bb.0: 432; RV64I-NEXT: addi sp, sp, -16 433; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 434; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 435; RV64I-NEXT: mv s0, a0 436; RV64I-NEXT: neg a0, a0 437; RV64I-NEXT: and a0, s0, a0 438; RV64I-NEXT: lui a1, 30667 439; RV64I-NEXT: addiw a1, a1, 1329 440; RV64I-NEXT: call __muldi3 441; RV64I-NEXT: srliw a0, a0, 27 442; RV64I-NEXT: lui a1, %hi(.LCPI8_0) 443; RV64I-NEXT: addi a1, a1, %lo(.LCPI8_0) 444; RV64I-NEXT: add a0, a1, a0 445; RV64I-NEXT: lbu a0, 0(a0) 446; RV64I-NEXT: snez a1, s0 447; RV64I-NEXT: addi a1, a1, -1 448; RV64I-NEXT: or a0, a1, a0 449; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 450; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 451; RV64I-NEXT: addi sp, sp, 16 452; RV64I-NEXT: ret 453; 454; RV64XTHEADBB-LABEL: findFirstSet_i32: 455; RV64XTHEADBB: # %bb.0: 456; RV64XTHEADBB-NEXT: addi a1, a0, -1 457; RV64XTHEADBB-NEXT: not a2, a0 458; RV64XTHEADBB-NEXT: and a1, a2, a1 459; RV64XTHEADBB-NEXT: li a2, 64 460; RV64XTHEADBB-NEXT: snez a0, a0 461; RV64XTHEADBB-NEXT: th.ff1 a1, a1 462; RV64XTHEADBB-NEXT: sub a2, a2, a1 463; RV64XTHEADBB-NEXT: addi a0, a0, -1 464; RV64XTHEADBB-NEXT: or a0, a0, a2 465; RV64XTHEADBB-NEXT: ret 466 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) 467 %2 = icmp eq i32 %a, 0 468 %3 = select i1 %2, i32 -1, i32 %1 469 ret i32 %3 470} 471 472define signext i32 @ffs_i32(i32 signext %a) nounwind { 473; RV64I-LABEL: ffs_i32: 474; RV64I: # %bb.0: 475; RV64I-NEXT: addi sp, sp, -16 476; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 477; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 478; RV64I-NEXT: mv s0, a0 479; RV64I-NEXT: neg a0, a0 480; RV64I-NEXT: and a0, s0, a0 481; RV64I-NEXT: lui a1, 30667 482; RV64I-NEXT: addiw a1, a1, 1329 483; RV64I-NEXT: call __muldi3 484; RV64I-NEXT: srliw a0, a0, 27 485; RV64I-NEXT: lui a1, %hi(.LCPI9_0) 486; RV64I-NEXT: addi a1, a1, %lo(.LCPI9_0) 487; RV64I-NEXT: add a0, a1, a0 488; RV64I-NEXT: lbu a0, 0(a0) 489; RV64I-NEXT: seqz a1, s0 490; RV64I-NEXT: addi a0, a0, 1 491; RV64I-NEXT: addi a1, a1, -1 492; RV64I-NEXT: and a0, a1, a0 493; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 494; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 495; RV64I-NEXT: addi sp, sp, 16 496; RV64I-NEXT: ret 497; 498; RV64XTHEADBB-LABEL: ffs_i32: 499; RV64XTHEADBB: # %bb.0: 500; RV64XTHEADBB-NEXT: addi a1, a0, -1 501; RV64XTHEADBB-NEXT: not a2, a0 502; RV64XTHEADBB-NEXT: and a1, a2, a1 503; RV64XTHEADBB-NEXT: li a2, 65 504; RV64XTHEADBB-NEXT: seqz a0, a0 505; RV64XTHEADBB-NEXT: th.ff1 a1, a1 506; RV64XTHEADBB-NEXT: sub a2, a2, a1 507; RV64XTHEADBB-NEXT: addi a0, a0, -1 508; RV64XTHEADBB-NEXT: and a0, a0, a2 509; RV64XTHEADBB-NEXT: ret 510 %1 = call i32 @llvm.cttz.i32(i32 %a, i1 true) 511 %2 = add i32 %1, 1 512 %3 = icmp eq i32 %a, 0 513 %4 = select i1 %3, i32 0, i32 %2 514 ret i32 %4 515} 516 517declare i64 @llvm.cttz.i64(i64, i1) 518 519define i64 @cttz_i64(i64 %a) nounwind { 520; RV64I-LABEL: cttz_i64: 521; RV64I: # %bb.0: 522; RV64I-NEXT: beqz a0, .LBB10_2 523; RV64I-NEXT: # %bb.1: # %cond.false 524; RV64I-NEXT: addi sp, sp, -16 525; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 526; RV64I-NEXT: neg a1, a0 527; RV64I-NEXT: and a0, a0, a1 528; RV64I-NEXT: lui a1, %hi(.LCPI10_0) 529; RV64I-NEXT: ld a1, %lo(.LCPI10_0)(a1) 530; RV64I-NEXT: call __muldi3 531; RV64I-NEXT: srli a0, a0, 58 532; RV64I-NEXT: lui a1, %hi(.LCPI10_1) 533; RV64I-NEXT: addi a1, a1, %lo(.LCPI10_1) 534; RV64I-NEXT: add a0, a1, a0 535; RV64I-NEXT: lbu a0, 0(a0) 536; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 537; RV64I-NEXT: addi sp, sp, 16 538; RV64I-NEXT: ret 539; RV64I-NEXT: .LBB10_2: 540; RV64I-NEXT: li a0, 64 541; RV64I-NEXT: ret 542; 543; RV64XTHEADBB-LABEL: cttz_i64: 544; RV64XTHEADBB: # %bb.0: 545; RV64XTHEADBB-NEXT: beqz a0, .LBB10_2 546; RV64XTHEADBB-NEXT: # %bb.1: # %cond.false 547; RV64XTHEADBB-NEXT: addi a1, a0, -1 548; RV64XTHEADBB-NEXT: not a0, a0 549; RV64XTHEADBB-NEXT: and a0, a0, a1 550; RV64XTHEADBB-NEXT: th.ff1 a0, a0 551; RV64XTHEADBB-NEXT: li a1, 64 552; RV64XTHEADBB-NEXT: sub a0, a1, a0 553; RV64XTHEADBB-NEXT: ret 554; RV64XTHEADBB-NEXT: .LBB10_2: 555; RV64XTHEADBB-NEXT: li a0, 64 556; RV64XTHEADBB-NEXT: ret 557 %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false) 558 ret i64 %1 559} 560 561define signext i32 @sexti1_i32(i32 signext %a) nounwind { 562; RV64I-LABEL: sexti1_i32: 563; RV64I: # %bb.0: 564; RV64I-NEXT: slli a0, a0, 63 565; RV64I-NEXT: srai a0, a0, 63 566; RV64I-NEXT: ret 567; 568; RV64XTHEADBB-LABEL: sexti1_i32: 569; RV64XTHEADBB: # %bb.0: 570; RV64XTHEADBB-NEXT: th.ext a0, a0, 0, 0 571; RV64XTHEADBB-NEXT: ret 572 %shl = shl i32 %a, 31 573 %shr = ashr exact i32 %shl, 31 574 ret i32 %shr 575} 576 577define signext i32 @sexti1_i32_2(i1 %a) nounwind { 578; RV64I-LABEL: sexti1_i32_2: 579; RV64I: # %bb.0: 580; RV64I-NEXT: slli a0, a0, 63 581; RV64I-NEXT: srai a0, a0, 63 582; RV64I-NEXT: ret 583; 584; RV64XTHEADBB-LABEL: sexti1_i32_2: 585; RV64XTHEADBB: # %bb.0: 586; RV64XTHEADBB-NEXT: th.ext a0, a0, 0, 0 587; RV64XTHEADBB-NEXT: ret 588 %sext = sext i1 %a to i32 589 ret i32 %sext 590} 591 592define i64 @sexti1_i64(i64 %a) nounwind { 593; RV64I-LABEL: sexti1_i64: 594; RV64I: # %bb.0: 595; RV64I-NEXT: slli a0, a0, 63 596; RV64I-NEXT: srai a0, a0, 63 597; RV64I-NEXT: ret 598; 599; RV64XTHEADBB-LABEL: sexti1_i64: 600; RV64XTHEADBB: # %bb.0: 601; RV64XTHEADBB-NEXT: th.ext a0, a0, 0, 0 602; RV64XTHEADBB-NEXT: ret 603 %shl = shl i64 %a, 63 604 %shr = ashr exact i64 %shl, 63 605 ret i64 %shr 606} 607 608define i64 @sexti1_i64_2(i1 %a) nounwind { 609; RV64I-LABEL: sexti1_i64_2: 610; RV64I: # %bb.0: 611; RV64I-NEXT: slli a0, a0, 63 612; RV64I-NEXT: srai a0, a0, 63 613; RV64I-NEXT: ret 614; 615; RV64XTHEADBB-LABEL: sexti1_i64_2: 616; RV64XTHEADBB: # %bb.0: 617; RV64XTHEADBB-NEXT: th.ext a0, a0, 0, 0 618; RV64XTHEADBB-NEXT: ret 619 %sext = sext i1 %a to i64 620 ret i64 %sext 621} 622 623define signext i32 @sextb_i32(i32 signext %a) nounwind { 624; RV64I-LABEL: sextb_i32: 625; RV64I: # %bb.0: 626; RV64I-NEXT: slli a0, a0, 56 627; RV64I-NEXT: srai a0, a0, 56 628; RV64I-NEXT: ret 629; 630; RV64XTHEADBB-LABEL: sextb_i32: 631; RV64XTHEADBB: # %bb.0: 632; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0 633; RV64XTHEADBB-NEXT: ret 634 %shl = shl i32 %a, 24 635 %shr = ashr exact i32 %shl, 24 636 ret i32 %shr 637} 638 639define i64 @sextb_i64(i64 %a) nounwind { 640; RV64I-LABEL: sextb_i64: 641; RV64I: # %bb.0: 642; RV64I-NEXT: slli a0, a0, 56 643; RV64I-NEXT: srai a0, a0, 56 644; RV64I-NEXT: ret 645; 646; RV64XTHEADBB-LABEL: sextb_i64: 647; RV64XTHEADBB: # %bb.0: 648; RV64XTHEADBB-NEXT: th.ext a0, a0, 7, 0 649; RV64XTHEADBB-NEXT: ret 650 %shl = shl i64 %a, 56 651 %shr = ashr exact i64 %shl, 56 652 ret i64 %shr 653} 654 655define signext i32 @sexth_i32(i32 signext %a) nounwind { 656; RV64I-LABEL: sexth_i32: 657; RV64I: # %bb.0: 658; RV64I-NEXT: slli a0, a0, 48 659; RV64I-NEXT: srai a0, a0, 48 660; RV64I-NEXT: ret 661; 662; RV64XTHEADBB-LABEL: sexth_i32: 663; RV64XTHEADBB: # %bb.0: 664; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0 665; RV64XTHEADBB-NEXT: ret 666 %shl = shl i32 %a, 16 667 %shr = ashr exact i32 %shl, 16 668 ret i32 %shr 669} 670 671define signext i32 @no_sexth_i32(i32 signext %a) nounwind { 672; RV64I-LABEL: no_sexth_i32: 673; RV64I: # %bb.0: 674; RV64I-NEXT: slli a0, a0, 49 675; RV64I-NEXT: srai a0, a0, 48 676; RV64I-NEXT: ret 677; 678; RV64XTHEADBB-LABEL: no_sexth_i32: 679; RV64XTHEADBB: # %bb.0: 680; RV64XTHEADBB-NEXT: slli a0, a0, 49 681; RV64XTHEADBB-NEXT: srai a0, a0, 48 682; RV64XTHEADBB-NEXT: ret 683 %shl = shl i32 %a, 17 684 %shr = ashr exact i32 %shl, 16 685 ret i32 %shr 686} 687 688define i64 @sexth_i64(i64 %a) nounwind { 689; RV64I-LABEL: sexth_i64: 690; RV64I: # %bb.0: 691; RV64I-NEXT: slli a0, a0, 48 692; RV64I-NEXT: srai a0, a0, 48 693; RV64I-NEXT: ret 694; 695; RV64XTHEADBB-LABEL: sexth_i64: 696; RV64XTHEADBB: # %bb.0: 697; RV64XTHEADBB-NEXT: th.ext a0, a0, 15, 0 698; RV64XTHEADBB-NEXT: ret 699 %shl = shl i64 %a, 48 700 %shr = ashr exact i64 %shl, 48 701 ret i64 %shr 702} 703 704define i64 @no_sexth_i64(i64 %a) nounwind { 705; RV64I-LABEL: no_sexth_i64: 706; RV64I: # %bb.0: 707; RV64I-NEXT: slli a0, a0, 49 708; RV64I-NEXT: srai a0, a0, 48 709; RV64I-NEXT: ret 710; 711; RV64XTHEADBB-LABEL: no_sexth_i64: 712; RV64XTHEADBB: # %bb.0: 713; RV64XTHEADBB-NEXT: slli a0, a0, 49 714; RV64XTHEADBB-NEXT: srai a0, a0, 48 715; RV64XTHEADBB-NEXT: ret 716 %shl = shl i64 %a, 49 717 %shr = ashr exact i64 %shl, 48 718 ret i64 %shr 719} 720 721define i32 @zexth_i32(i32 %a) nounwind { 722; RV64I-LABEL: zexth_i32: 723; RV64I: # %bb.0: 724; RV64I-NEXT: slli a0, a0, 48 725; RV64I-NEXT: srli a0, a0, 48 726; RV64I-NEXT: ret 727; 728; RV64XTHEADBB-LABEL: zexth_i32: 729; RV64XTHEADBB: # %bb.0: 730; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 731; RV64XTHEADBB-NEXT: ret 732 %and = and i32 %a, 65535 733 ret i32 %and 734} 735 736define i64 @zexth_i64(i64 %a) nounwind { 737; RV64I-LABEL: zexth_i64: 738; RV64I: # %bb.0: 739; RV64I-NEXT: slli a0, a0, 48 740; RV64I-NEXT: srli a0, a0, 48 741; RV64I-NEXT: ret 742; 743; RV64XTHEADBB-LABEL: zexth_i64: 744; RV64XTHEADBB: # %bb.0: 745; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 746; RV64XTHEADBB-NEXT: ret 747 %and = and i64 %a, 65535 748 ret i64 %and 749} 750 751define i64 @zext_bf_i64(i64 %a) nounwind { 752; RV64I-LABEL: zext_bf_i64: 753; RV64I: # %bb.0: 754; RV64I-NEXT: slli a0, a0, 47 755; RV64I-NEXT: srli a0, a0, 48 756; RV64I-NEXT: ret 757; 758; RV64XTHEADBB-LABEL: zext_bf_i64: 759; RV64XTHEADBB: # %bb.0: 760; RV64XTHEADBB-NEXT: th.extu a0, a0, 16, 1 761; RV64XTHEADBB-NEXT: ret 762 %1 = lshr i64 %a, 1 763 %and = and i64 %1, 65535 764 ret i64 %and 765} 766 767define i64 @zext_bf2_i64(i64 %a) nounwind { 768; RV64I-LABEL: zext_bf2_i64: 769; RV64I: # %bb.0: 770; RV64I-NEXT: slli a0, a0, 48 771; RV64I-NEXT: srli a0, a0, 49 772; RV64I-NEXT: ret 773; 774; RV64XTHEADBB-LABEL: zext_bf2_i64: 775; RV64XTHEADBB: # %bb.0: 776; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 1 777; RV64XTHEADBB-NEXT: ret 778 %t0 = and i64 %a, 65535 779 %result = lshr i64 %t0, 1 780 ret i64 %result 781} 782 783define i64 @zext_i64_srliw(i64 %a) nounwind { 784; RV64I-LABEL: zext_i64_srliw: 785; RV64I: # %bb.0: 786; RV64I-NEXT: srliw a0, a0, 16 787; RV64I-NEXT: ret 788; 789; RV64XTHEADBB-LABEL: zext_i64_srliw: 790; RV64XTHEADBB: # %bb.0: 791; RV64XTHEADBB-NEXT: srliw a0, a0, 16 792; RV64XTHEADBB-NEXT: ret 793 %1 = lshr i64 %a, 16 794 %and = and i64 %1, 65535 795 ret i64 %and 796} 797 798declare i32 @llvm.bswap.i32(i32) 799 800define signext i32 @bswap_i32(i32 signext %a) nounwind { 801; RV64I-LABEL: bswap_i32: 802; RV64I: # %bb.0: 803; RV64I-NEXT: srli a1, a0, 8 804; RV64I-NEXT: lui a2, 16 805; RV64I-NEXT: srliw a3, a0, 24 806; RV64I-NEXT: addiw a2, a2, -256 807; RV64I-NEXT: and a1, a1, a2 808; RV64I-NEXT: and a2, a0, a2 809; RV64I-NEXT: or a1, a1, a3 810; RV64I-NEXT: slli a2, a2, 8 811; RV64I-NEXT: slliw a0, a0, 24 812; RV64I-NEXT: or a0, a0, a2 813; RV64I-NEXT: or a0, a0, a1 814; RV64I-NEXT: ret 815; 816; RV64XTHEADBB-LABEL: bswap_i32: 817; RV64XTHEADBB: # %bb.0: 818; RV64XTHEADBB-NEXT: th.revw a0, a0 819; RV64XTHEADBB-NEXT: ret 820 %1 = tail call i32 @llvm.bswap.i32(i32 %a) 821 ret i32 %1 822} 823 824; Similar to bswap_i32 but the result is not sign extended. 825define void @bswap_i32_nosext(i32 signext %a, ptr %x) nounwind { 826; RV64I-LABEL: bswap_i32_nosext: 827; RV64I: # %bb.0: 828; RV64I-NEXT: srli a2, a0, 8 829; RV64I-NEXT: lui a3, 16 830; RV64I-NEXT: srliw a4, a0, 24 831; RV64I-NEXT: addi a3, a3, -256 832; RV64I-NEXT: and a2, a2, a3 833; RV64I-NEXT: and a3, a0, a3 834; RV64I-NEXT: or a2, a2, a4 835; RV64I-NEXT: slli a3, a3, 8 836; RV64I-NEXT: slli a0, a0, 24 837; RV64I-NEXT: or a0, a0, a3 838; RV64I-NEXT: or a0, a0, a2 839; RV64I-NEXT: sw a0, 0(a1) 840; RV64I-NEXT: ret 841; 842; RV64XTHEADBB-LABEL: bswap_i32_nosext: 843; RV64XTHEADBB: # %bb.0: 844; RV64XTHEADBB-NEXT: th.revw a0, a0 845; RV64XTHEADBB-NEXT: sw a0, 0(a1) 846; RV64XTHEADBB-NEXT: ret 847 %1 = tail call i32 @llvm.bswap.i32(i32 %a) 848 store i32 %1, ptr %x 849 ret void 850} 851 852declare i64 @llvm.bswap.i64(i64) 853 854define i64 @bswap_i64(i64 %a) { 855; RV64I-LABEL: bswap_i64: 856; RV64I: # %bb.0: 857; RV64I-NEXT: srli a1, a0, 40 858; RV64I-NEXT: lui a2, 16 859; RV64I-NEXT: srli a3, a0, 56 860; RV64I-NEXT: srli a4, a0, 24 861; RV64I-NEXT: lui a5, 4080 862; RV64I-NEXT: addiw a2, a2, -256 863; RV64I-NEXT: and a1, a1, a2 864; RV64I-NEXT: or a1, a1, a3 865; RV64I-NEXT: srli a3, a0, 8 866; RV64I-NEXT: and a4, a4, a5 867; RV64I-NEXT: srliw a3, a3, 24 868; RV64I-NEXT: slli a3, a3, 24 869; RV64I-NEXT: or a3, a3, a4 870; RV64I-NEXT: srliw a4, a0, 24 871; RV64I-NEXT: and a5, a0, a5 872; RV64I-NEXT: and a2, a0, a2 873; RV64I-NEXT: slli a0, a0, 56 874; RV64I-NEXT: slli a4, a4, 32 875; RV64I-NEXT: slli a5, a5, 24 876; RV64I-NEXT: or a4, a5, a4 877; RV64I-NEXT: slli a2, a2, 40 878; RV64I-NEXT: or a1, a3, a1 879; RV64I-NEXT: or a0, a0, a2 880; RV64I-NEXT: or a0, a0, a4 881; RV64I-NEXT: or a0, a0, a1 882; RV64I-NEXT: ret 883; 884; RV64XTHEADBB-LABEL: bswap_i64: 885; RV64XTHEADBB: # %bb.0: 886; RV64XTHEADBB-NEXT: th.rev a0, a0 887; RV64XTHEADBB-NEXT: ret 888 %1 = call i64 @llvm.bswap.i64(i64 %a) 889 ret i64 %1 890} 891