1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefix=RV32I 4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefix=RV64I 6 7; Basic shift support is tested as part of ALU.ll. This file ensures that 8; shifts which may not be supported natively are lowered properly. 9 10declare i64 @llvm.fshr.i64(i64, i64, i64) 11declare i128 @llvm.fshr.i128(i128, i128, i128) 12 13define i64 @lshr64(i64 %a, i64 %b) nounwind { 14; RV32I-LABEL: lshr64: 15; RV32I: # %bb.0: 16; RV32I-NEXT: addi a4, a2, -32 17; RV32I-NEXT: srl a3, a1, a2 18; RV32I-NEXT: bltz a4, .LBB0_2 19; RV32I-NEXT: # %bb.1: 20; RV32I-NEXT: mv a0, a3 21; RV32I-NEXT: j .LBB0_3 22; RV32I-NEXT: .LBB0_2: 23; RV32I-NEXT: srl a0, a0, a2 24; RV32I-NEXT: not a2, a2 25; RV32I-NEXT: slli a1, a1, 1 26; RV32I-NEXT: sll a1, a1, a2 27; RV32I-NEXT: or a0, a0, a1 28; RV32I-NEXT: .LBB0_3: 29; RV32I-NEXT: srai a1, a4, 31 30; RV32I-NEXT: and a1, a1, a3 31; RV32I-NEXT: ret 32; 33; RV64I-LABEL: lshr64: 34; RV64I: # %bb.0: 35; RV64I-NEXT: srl a0, a0, a1 36; RV64I-NEXT: ret 37 %1 = lshr i64 %a, %b 38 ret i64 %1 39} 40 41define i64 @lshr64_minsize(i64 %a, i64 %b) minsize nounwind { 42; RV32I-LABEL: lshr64_minsize: 43; RV32I: # %bb.0: 44; RV32I-NEXT: addi sp, sp, -16 45; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 46; RV32I-NEXT: call __lshrdi3 47; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 48; RV32I-NEXT: addi sp, sp, 16 49; RV32I-NEXT: ret 50; 51; RV64I-LABEL: lshr64_minsize: 52; RV64I: # %bb.0: 53; RV64I-NEXT: srl a0, a0, a1 54; RV64I-NEXT: ret 55 %1 = lshr i64 %a, %b 56 ret i64 %1 57} 58 59define i64 @ashr64(i64 %a, i64 %b) nounwind { 60; RV32I-LABEL: ashr64: 61; RV32I: # %bb.0: 62; RV32I-NEXT: mv a3, a1 63; RV32I-NEXT: addi a4, a2, -32 64; RV32I-NEXT: sra a1, a1, a2 65; RV32I-NEXT: bltz a4, .LBB2_2 66; RV32I-NEXT: # %bb.1: 67; RV32I-NEXT: srai a3, a3, 31 68; RV32I-NEXT: mv a0, a1 69; RV32I-NEXT: mv a1, a3 70; RV32I-NEXT: ret 71; RV32I-NEXT: .LBB2_2: 72; RV32I-NEXT: srl a0, a0, a2 73; RV32I-NEXT: not a2, a2 74; RV32I-NEXT: slli a3, a3, 1 75; RV32I-NEXT: sll a2, a3, a2 76; RV32I-NEXT: or a0, a0, a2 77; RV32I-NEXT: ret 78; 79; RV64I-LABEL: ashr64: 80; RV64I: # %bb.0: 81; RV64I-NEXT: sra a0, a0, a1 82; RV64I-NEXT: ret 83 %1 = ashr i64 %a, %b 84 ret i64 %1 85} 86 87define i64 @ashr64_minsize(i64 %a, i64 %b) minsize nounwind { 88; RV32I-LABEL: ashr64_minsize: 89; RV32I: # %bb.0: 90; RV32I-NEXT: addi sp, sp, -16 91; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 92; RV32I-NEXT: call __ashrdi3 93; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 94; RV32I-NEXT: addi sp, sp, 16 95; RV32I-NEXT: ret 96; 97; RV64I-LABEL: ashr64_minsize: 98; RV64I: # %bb.0: 99; RV64I-NEXT: sra a0, a0, a1 100; RV64I-NEXT: ret 101 %1 = ashr i64 %a, %b 102 ret i64 %1 103} 104 105define i64 @shl64(i64 %a, i64 %b) nounwind { 106; RV32I-LABEL: shl64: 107; RV32I: # %bb.0: 108; RV32I-NEXT: addi a4, a2, -32 109; RV32I-NEXT: sll a3, a0, a2 110; RV32I-NEXT: bltz a4, .LBB4_2 111; RV32I-NEXT: # %bb.1: 112; RV32I-NEXT: mv a1, a3 113; RV32I-NEXT: j .LBB4_3 114; RV32I-NEXT: .LBB4_2: 115; RV32I-NEXT: sll a1, a1, a2 116; RV32I-NEXT: not a2, a2 117; RV32I-NEXT: srli a0, a0, 1 118; RV32I-NEXT: srl a0, a0, a2 119; RV32I-NEXT: or a1, a1, a0 120; RV32I-NEXT: .LBB4_3: 121; RV32I-NEXT: srai a0, a4, 31 122; RV32I-NEXT: and a0, a0, a3 123; RV32I-NEXT: ret 124; 125; RV64I-LABEL: shl64: 126; RV64I: # %bb.0: 127; RV64I-NEXT: sll a0, a0, a1 128; RV64I-NEXT: ret 129 %1 = shl i64 %a, %b 130 ret i64 %1 131} 132 133define i64 @shl64_minsize(i64 %a, i64 %b) minsize nounwind { 134; RV32I-LABEL: shl64_minsize: 135; RV32I: # %bb.0: 136; RV32I-NEXT: addi sp, sp, -16 137; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 138; RV32I-NEXT: call __ashldi3 139; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 140; RV32I-NEXT: addi sp, sp, 16 141; RV32I-NEXT: ret 142; 143; RV64I-LABEL: shl64_minsize: 144; RV64I: # %bb.0: 145; RV64I-NEXT: sll a0, a0, a1 146; RV64I-NEXT: ret 147 %1 = shl i64 %a, %b 148 ret i64 %1 149} 150 151define i128 @lshr128(i128 %a, i128 %b) nounwind { 152; RV32I-LABEL: lshr128: 153; RV32I: # %bb.0: 154; RV32I-NEXT: addi sp, sp, -32 155; RV32I-NEXT: lw a2, 0(a2) 156; RV32I-NEXT: lw a3, 0(a1) 157; RV32I-NEXT: lw a4, 4(a1) 158; RV32I-NEXT: lw a5, 8(a1) 159; RV32I-NEXT: lw a1, 12(a1) 160; RV32I-NEXT: sw zero, 16(sp) 161; RV32I-NEXT: sw zero, 20(sp) 162; RV32I-NEXT: sw zero, 24(sp) 163; RV32I-NEXT: sw zero, 28(sp) 164; RV32I-NEXT: mv a6, sp 165; RV32I-NEXT: sw a3, 0(sp) 166; RV32I-NEXT: sw a4, 4(sp) 167; RV32I-NEXT: sw a5, 8(sp) 168; RV32I-NEXT: sw a1, 12(sp) 169; RV32I-NEXT: srli a1, a2, 3 170; RV32I-NEXT: andi a3, a2, 31 171; RV32I-NEXT: andi a1, a1, 12 172; RV32I-NEXT: xori a3, a3, 31 173; RV32I-NEXT: add a1, a6, a1 174; RV32I-NEXT: lw a4, 0(a1) 175; RV32I-NEXT: lw a5, 4(a1) 176; RV32I-NEXT: lw a6, 8(a1) 177; RV32I-NEXT: lw a1, 12(a1) 178; RV32I-NEXT: srl a4, a4, a2 179; RV32I-NEXT: slli a7, a5, 1 180; RV32I-NEXT: srl a5, a5, a2 181; RV32I-NEXT: slli t0, a6, 1 182; RV32I-NEXT: srl a6, a6, a2 183; RV32I-NEXT: srl a2, a1, a2 184; RV32I-NEXT: slli a1, a1, 1 185; RV32I-NEXT: sll a7, a7, a3 186; RV32I-NEXT: sll t0, t0, a3 187; RV32I-NEXT: sll a1, a1, a3 188; RV32I-NEXT: or a3, a4, a7 189; RV32I-NEXT: or a4, a5, t0 190; RV32I-NEXT: or a1, a6, a1 191; RV32I-NEXT: sw a3, 0(a0) 192; RV32I-NEXT: sw a4, 4(a0) 193; RV32I-NEXT: sw a1, 8(a0) 194; RV32I-NEXT: sw a2, 12(a0) 195; RV32I-NEXT: addi sp, sp, 32 196; RV32I-NEXT: ret 197; 198; RV64I-LABEL: lshr128: 199; RV64I: # %bb.0: 200; RV64I-NEXT: addi a4, a2, -64 201; RV64I-NEXT: srl a3, a1, a2 202; RV64I-NEXT: bltz a4, .LBB6_2 203; RV64I-NEXT: # %bb.1: 204; RV64I-NEXT: mv a0, a3 205; RV64I-NEXT: j .LBB6_3 206; RV64I-NEXT: .LBB6_2: 207; RV64I-NEXT: srl a0, a0, a2 208; RV64I-NEXT: not a2, a2 209; RV64I-NEXT: slli a1, a1, 1 210; RV64I-NEXT: sll a1, a1, a2 211; RV64I-NEXT: or a0, a0, a1 212; RV64I-NEXT: .LBB6_3: 213; RV64I-NEXT: srai a1, a4, 63 214; RV64I-NEXT: and a1, a1, a3 215; RV64I-NEXT: ret 216 %1 = lshr i128 %a, %b 217 ret i128 %1 218} 219 220define i128 @ashr128(i128 %a, i128 %b) nounwind { 221; RV32I-LABEL: ashr128: 222; RV32I: # %bb.0: 223; RV32I-NEXT: addi sp, sp, -32 224; RV32I-NEXT: lw a2, 0(a2) 225; RV32I-NEXT: lw a3, 0(a1) 226; RV32I-NEXT: lw a4, 4(a1) 227; RV32I-NEXT: lw a5, 8(a1) 228; RV32I-NEXT: lw a1, 12(a1) 229; RV32I-NEXT: mv a6, sp 230; RV32I-NEXT: sw a3, 0(sp) 231; RV32I-NEXT: sw a4, 4(sp) 232; RV32I-NEXT: sw a5, 8(sp) 233; RV32I-NEXT: sw a1, 12(sp) 234; RV32I-NEXT: srai a1, a1, 31 235; RV32I-NEXT: srli a3, a2, 3 236; RV32I-NEXT: andi a4, a2, 31 237; RV32I-NEXT: sw a1, 16(sp) 238; RV32I-NEXT: sw a1, 20(sp) 239; RV32I-NEXT: sw a1, 24(sp) 240; RV32I-NEXT: sw a1, 28(sp) 241; RV32I-NEXT: andi a3, a3, 12 242; RV32I-NEXT: xori a1, a4, 31 243; RV32I-NEXT: add a3, a6, a3 244; RV32I-NEXT: lw a4, 0(a3) 245; RV32I-NEXT: lw a5, 4(a3) 246; RV32I-NEXT: lw a6, 8(a3) 247; RV32I-NEXT: lw a3, 12(a3) 248; RV32I-NEXT: srl a4, a4, a2 249; RV32I-NEXT: slli a7, a5, 1 250; RV32I-NEXT: srl a5, a5, a2 251; RV32I-NEXT: slli t0, a6, 1 252; RV32I-NEXT: srl a6, a6, a2 253; RV32I-NEXT: sra a2, a3, a2 254; RV32I-NEXT: slli a3, a3, 1 255; RV32I-NEXT: sll a7, a7, a1 256; RV32I-NEXT: sll t0, t0, a1 257; RV32I-NEXT: sll a1, a3, a1 258; RV32I-NEXT: or a3, a4, a7 259; RV32I-NEXT: or a4, a5, t0 260; RV32I-NEXT: or a1, a6, a1 261; RV32I-NEXT: sw a3, 0(a0) 262; RV32I-NEXT: sw a4, 4(a0) 263; RV32I-NEXT: sw a1, 8(a0) 264; RV32I-NEXT: sw a2, 12(a0) 265; RV32I-NEXT: addi sp, sp, 32 266; RV32I-NEXT: ret 267; 268; RV64I-LABEL: ashr128: 269; RV64I: # %bb.0: 270; RV64I-NEXT: mv a3, a1 271; RV64I-NEXT: addi a4, a2, -64 272; RV64I-NEXT: sra a1, a1, a2 273; RV64I-NEXT: bltz a4, .LBB7_2 274; RV64I-NEXT: # %bb.1: 275; RV64I-NEXT: srai a3, a3, 63 276; RV64I-NEXT: mv a0, a1 277; RV64I-NEXT: mv a1, a3 278; RV64I-NEXT: ret 279; RV64I-NEXT: .LBB7_2: 280; RV64I-NEXT: srl a0, a0, a2 281; RV64I-NEXT: not a2, a2 282; RV64I-NEXT: slli a3, a3, 1 283; RV64I-NEXT: sll a2, a3, a2 284; RV64I-NEXT: or a0, a0, a2 285; RV64I-NEXT: ret 286 %1 = ashr i128 %a, %b 287 ret i128 %1 288} 289 290define i128 @shl128(i128 %a, i128 %b) nounwind { 291; RV32I-LABEL: shl128: 292; RV32I: # %bb.0: 293; RV32I-NEXT: addi sp, sp, -32 294; RV32I-NEXT: lw a2, 0(a2) 295; RV32I-NEXT: lw a3, 0(a1) 296; RV32I-NEXT: lw a4, 4(a1) 297; RV32I-NEXT: lw a5, 8(a1) 298; RV32I-NEXT: lw a1, 12(a1) 299; RV32I-NEXT: sw zero, 0(sp) 300; RV32I-NEXT: sw zero, 4(sp) 301; RV32I-NEXT: sw zero, 8(sp) 302; RV32I-NEXT: sw zero, 12(sp) 303; RV32I-NEXT: addi a6, sp, 16 304; RV32I-NEXT: sw a3, 16(sp) 305; RV32I-NEXT: sw a4, 20(sp) 306; RV32I-NEXT: sw a5, 24(sp) 307; RV32I-NEXT: sw a1, 28(sp) 308; RV32I-NEXT: srli a1, a2, 3 309; RV32I-NEXT: andi a3, a2, 31 310; RV32I-NEXT: andi a1, a1, 12 311; RV32I-NEXT: sub a1, a6, a1 312; RV32I-NEXT: lw a4, 0(a1) 313; RV32I-NEXT: lw a5, 4(a1) 314; RV32I-NEXT: lw a6, 8(a1) 315; RV32I-NEXT: lw a1, 12(a1) 316; RV32I-NEXT: xori a3, a3, 31 317; RV32I-NEXT: sll a7, a5, a2 318; RV32I-NEXT: srli t0, a4, 1 319; RV32I-NEXT: sll a1, a1, a2 320; RV32I-NEXT: sll a4, a4, a2 321; RV32I-NEXT: sll a2, a6, a2 322; RV32I-NEXT: srli a5, a5, 1 323; RV32I-NEXT: srli a6, a6, 1 324; RV32I-NEXT: srl t0, t0, a3 325; RV32I-NEXT: srl a5, a5, a3 326; RV32I-NEXT: srl a3, a6, a3 327; RV32I-NEXT: or a6, a7, t0 328; RV32I-NEXT: or a2, a2, a5 329; RV32I-NEXT: or a1, a1, a3 330; RV32I-NEXT: sw a4, 0(a0) 331; RV32I-NEXT: sw a6, 4(a0) 332; RV32I-NEXT: sw a2, 8(a0) 333; RV32I-NEXT: sw a1, 12(a0) 334; RV32I-NEXT: addi sp, sp, 32 335; RV32I-NEXT: ret 336; 337; RV64I-LABEL: shl128: 338; RV64I: # %bb.0: 339; RV64I-NEXT: addi a4, a2, -64 340; RV64I-NEXT: sll a3, a0, a2 341; RV64I-NEXT: bltz a4, .LBB8_2 342; RV64I-NEXT: # %bb.1: 343; RV64I-NEXT: mv a1, a3 344; RV64I-NEXT: j .LBB8_3 345; RV64I-NEXT: .LBB8_2: 346; RV64I-NEXT: sll a1, a1, a2 347; RV64I-NEXT: not a2, a2 348; RV64I-NEXT: srli a0, a0, 1 349; RV64I-NEXT: srl a0, a0, a2 350; RV64I-NEXT: or a1, a1, a0 351; RV64I-NEXT: .LBB8_3: 352; RV64I-NEXT: srai a0, a4, 63 353; RV64I-NEXT: and a0, a0, a3 354; RV64I-NEXT: ret 355 %1 = shl i128 %a, %b 356 ret i128 %1 357} 358 359define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { 360; RV32I-LABEL: fshr64_minsize: 361; RV32I: # %bb.0: 362; RV32I-NEXT: andi a5, a2, 32 363; RV32I-NEXT: mv a3, a0 364; RV32I-NEXT: beqz a5, .LBB9_2 365; RV32I-NEXT: # %bb.1: 366; RV32I-NEXT: mv a3, a1 367; RV32I-NEXT: .LBB9_2: 368; RV32I-NEXT: srl a4, a3, a2 369; RV32I-NEXT: beqz a5, .LBB9_4 370; RV32I-NEXT: # %bb.3: 371; RV32I-NEXT: mv a1, a0 372; RV32I-NEXT: .LBB9_4: 373; RV32I-NEXT: slli a0, a1, 1 374; RV32I-NEXT: not a5, a2 375; RV32I-NEXT: srl a1, a1, a2 376; RV32I-NEXT: slli a3, a3, 1 377; RV32I-NEXT: sll a0, a0, a5 378; RV32I-NEXT: sll a2, a3, a5 379; RV32I-NEXT: or a0, a0, a4 380; RV32I-NEXT: or a1, a2, a1 381; RV32I-NEXT: ret 382; 383; RV64I-LABEL: fshr64_minsize: 384; RV64I: # %bb.0: 385; RV64I-NEXT: srl a2, a0, a1 386; RV64I-NEXT: negw a1, a1 387; RV64I-NEXT: sll a0, a0, a1 388; RV64I-NEXT: or a0, a2, a0 389; RV64I-NEXT: ret 390 %res = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) 391 ret i64 %res 392} 393 394define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { 395; RV32I-LABEL: fshr128_minsize: 396; RV32I: # %bb.0: 397; RV32I-NEXT: lw a2, 0(a2) 398; RV32I-NEXT: lw t1, 0(a1) 399; RV32I-NEXT: lw a7, 4(a1) 400; RV32I-NEXT: lw a4, 8(a1) 401; RV32I-NEXT: lw a1, 12(a1) 402; RV32I-NEXT: andi t2, a2, 64 403; RV32I-NEXT: mv t0, a7 404; RV32I-NEXT: mv a3, t1 405; RV32I-NEXT: beqz t2, .LBB10_2 406; RV32I-NEXT: # %bb.1: 407; RV32I-NEXT: mv t0, a1 408; RV32I-NEXT: mv a3, a4 409; RV32I-NEXT: .LBB10_2: 410; RV32I-NEXT: andi a6, a2, 32 411; RV32I-NEXT: mv a5, a3 412; RV32I-NEXT: bnez a6, .LBB10_13 413; RV32I-NEXT: # %bb.3: 414; RV32I-NEXT: bnez t2, .LBB10_14 415; RV32I-NEXT: .LBB10_4: 416; RV32I-NEXT: beqz a6, .LBB10_6 417; RV32I-NEXT: .LBB10_5: 418; RV32I-NEXT: mv t0, a4 419; RV32I-NEXT: .LBB10_6: 420; RV32I-NEXT: slli t3, t0, 1 421; RV32I-NEXT: not t1, a2 422; RV32I-NEXT: beqz t2, .LBB10_8 423; RV32I-NEXT: # %bb.7: 424; RV32I-NEXT: mv a1, a7 425; RV32I-NEXT: .LBB10_8: 426; RV32I-NEXT: srl a7, a5, a2 427; RV32I-NEXT: sll t2, t3, t1 428; RV32I-NEXT: srl t0, t0, a2 429; RV32I-NEXT: beqz a6, .LBB10_10 430; RV32I-NEXT: # %bb.9: 431; RV32I-NEXT: mv a4, a1 432; RV32I-NEXT: .LBB10_10: 433; RV32I-NEXT: or a7, t2, a7 434; RV32I-NEXT: slli t2, a4, 1 435; RV32I-NEXT: sll t2, t2, t1 436; RV32I-NEXT: or t0, t2, t0 437; RV32I-NEXT: srl a4, a4, a2 438; RV32I-NEXT: beqz a6, .LBB10_12 439; RV32I-NEXT: # %bb.11: 440; RV32I-NEXT: mv a1, a3 441; RV32I-NEXT: .LBB10_12: 442; RV32I-NEXT: slli a3, a1, 1 443; RV32I-NEXT: srl a1, a1, a2 444; RV32I-NEXT: slli a5, a5, 1 445; RV32I-NEXT: sll a2, a3, t1 446; RV32I-NEXT: sll a3, a5, t1 447; RV32I-NEXT: or a2, a2, a4 448; RV32I-NEXT: or a1, a3, a1 449; RV32I-NEXT: sw a7, 0(a0) 450; RV32I-NEXT: sw t0, 4(a0) 451; RV32I-NEXT: sw a2, 8(a0) 452; RV32I-NEXT: sw a1, 12(a0) 453; RV32I-NEXT: ret 454; RV32I-NEXT: .LBB10_13: 455; RV32I-NEXT: mv a5, t0 456; RV32I-NEXT: beqz t2, .LBB10_4 457; RV32I-NEXT: .LBB10_14: 458; RV32I-NEXT: mv a4, t1 459; RV32I-NEXT: bnez a6, .LBB10_5 460; RV32I-NEXT: j .LBB10_6 461; 462; RV64I-LABEL: fshr128_minsize: 463; RV64I: # %bb.0: 464; RV64I-NEXT: andi a5, a2, 64 465; RV64I-NEXT: mv a3, a0 466; RV64I-NEXT: beqz a5, .LBB10_2 467; RV64I-NEXT: # %bb.1: 468; RV64I-NEXT: mv a3, a1 469; RV64I-NEXT: .LBB10_2: 470; RV64I-NEXT: srl a4, a3, a2 471; RV64I-NEXT: beqz a5, .LBB10_4 472; RV64I-NEXT: # %bb.3: 473; RV64I-NEXT: mv a1, a0 474; RV64I-NEXT: .LBB10_4: 475; RV64I-NEXT: slli a0, a1, 1 476; RV64I-NEXT: not a5, a2 477; RV64I-NEXT: srl a1, a1, a2 478; RV64I-NEXT: slli a3, a3, 1 479; RV64I-NEXT: sll a0, a0, a5 480; RV64I-NEXT: sll a2, a3, a5 481; RV64I-NEXT: or a0, a0, a4 482; RV64I-NEXT: or a1, a2, a1 483; RV64I-NEXT: ret 484 %res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b) 485 ret i128 %res 486} 487