1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s 4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s 6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s 8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s 10 11define i32 @fold_urem_positive_odd(i32 %x) nounwind { 12; RV32I-LABEL: fold_urem_positive_odd: 13; RV32I: # %bb.0: 14; RV32I-NEXT: li a1, 95 15; RV32I-NEXT: tail __umodsi3 16; 17; RV32IM-LABEL: fold_urem_positive_odd: 18; RV32IM: # %bb.0: 19; RV32IM-NEXT: lui a1, 364242 20; RV32IM-NEXT: addi a1, a1, 777 21; RV32IM-NEXT: mulhu a1, a0, a1 22; RV32IM-NEXT: sub a2, a0, a1 23; RV32IM-NEXT: srli a2, a2, 1 24; RV32IM-NEXT: add a1, a2, a1 25; RV32IM-NEXT: srli a1, a1, 6 26; RV32IM-NEXT: li a2, 95 27; RV32IM-NEXT: mul a1, a1, a2 28; RV32IM-NEXT: sub a0, a0, a1 29; RV32IM-NEXT: ret 30; 31; RV64I-LABEL: fold_urem_positive_odd: 32; RV64I: # %bb.0: 33; RV64I-NEXT: addi sp, sp, -16 34; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 35; RV64I-NEXT: slli a0, a0, 32 36; RV64I-NEXT: srli a0, a0, 32 37; RV64I-NEXT: li a1, 95 38; RV64I-NEXT: call __umoddi3 39; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 40; RV64I-NEXT: addi sp, sp, 16 41; RV64I-NEXT: ret 42; 43; RV64IM-LABEL: fold_urem_positive_odd: 44; RV64IM: # %bb.0: 45; RV64IM-NEXT: slli a1, a0, 32 46; RV64IM-NEXT: lui a2, 364242 47; RV64IM-NEXT: addi a2, a2, 777 48; RV64IM-NEXT: slli a2, a2, 32 49; RV64IM-NEXT: mulhu a1, a1, a2 50; RV64IM-NEXT: srli a1, a1, 32 51; RV64IM-NEXT: subw a2, a0, a1 52; RV64IM-NEXT: srliw a2, a2, 1 53; RV64IM-NEXT: add a1, a2, a1 54; RV64IM-NEXT: srli a1, a1, 6 55; RV64IM-NEXT: li a2, 95 56; RV64IM-NEXT: mul a1, a1, a2 57; RV64IM-NEXT: subw a0, a0, a1 58; RV64IM-NEXT: ret 59 %1 = urem i32 %x, 95 60 ret i32 %1 61} 62 63 64define i32 @fold_urem_positive_even(i32 %x) nounwind { 65; RV32I-LABEL: fold_urem_positive_even: 66; RV32I: # %bb.0: 67; RV32I-NEXT: li a1, 1060 68; RV32I-NEXT: tail __umodsi3 69; 70; RV32IM-LABEL: fold_urem_positive_even: 71; RV32IM: # %bb.0: 72; RV32IM-NEXT: lui a1, 1012964 73; RV32IM-NEXT: addi a1, a1, -61 74; RV32IM-NEXT: mulhu a1, a0, a1 75; RV32IM-NEXT: srli a1, a1, 10 76; RV32IM-NEXT: li a2, 1060 77; RV32IM-NEXT: mul a1, a1, a2 78; RV32IM-NEXT: sub a0, a0, a1 79; RV32IM-NEXT: ret 80; 81; RV64I-LABEL: fold_urem_positive_even: 82; RV64I: # %bb.0: 83; RV64I-NEXT: addi sp, sp, -16 84; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 85; RV64I-NEXT: slli a0, a0, 32 86; RV64I-NEXT: srli a0, a0, 32 87; RV64I-NEXT: li a1, 1060 88; RV64I-NEXT: call __umoddi3 89; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 90; RV64I-NEXT: addi sp, sp, 16 91; RV64I-NEXT: ret 92; 93; RV64IM-LABEL: fold_urem_positive_even: 94; RV64IM: # %bb.0: 95; RV64IM-NEXT: slli a1, a0, 32 96; RV64IM-NEXT: lui a2, 1012964 97; RV64IM-NEXT: addi a2, a2, -61 98; RV64IM-NEXT: slli a2, a2, 32 99; RV64IM-NEXT: mulhu a1, a1, a2 100; RV64IM-NEXT: srli a1, a1, 42 101; RV64IM-NEXT: li a2, 1060 102; RV64IM-NEXT: mul a1, a1, a2 103; RV64IM-NEXT: subw a0, a0, a1 104; RV64IM-NEXT: ret 105 %1 = urem i32 %x, 1060 106 ret i32 %1 107} 108 109 110; Don't fold if we can combine urem with udiv. 111define i32 @combine_urem_udiv(i32 %x) nounwind { 112; RV32I-LABEL: combine_urem_udiv: 113; RV32I: # %bb.0: 114; RV32I-NEXT: addi sp, sp, -16 115; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 116; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 117; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 118; RV32I-NEXT: mv s0, a0 119; RV32I-NEXT: li a1, 95 120; RV32I-NEXT: call __umodsi3 121; RV32I-NEXT: mv s1, a0 122; RV32I-NEXT: li a1, 95 123; RV32I-NEXT: mv a0, s0 124; RV32I-NEXT: call __udivsi3 125; RV32I-NEXT: add a0, s1, a0 126; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 127; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 128; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 129; RV32I-NEXT: addi sp, sp, 16 130; RV32I-NEXT: ret 131; 132; RV32IM-LABEL: combine_urem_udiv: 133; RV32IM: # %bb.0: 134; RV32IM-NEXT: lui a1, 364242 135; RV32IM-NEXT: addi a1, a1, 777 136; RV32IM-NEXT: mulhu a1, a0, a1 137; RV32IM-NEXT: sub a2, a0, a1 138; RV32IM-NEXT: srli a2, a2, 1 139; RV32IM-NEXT: add a1, a2, a1 140; RV32IM-NEXT: li a2, 95 141; RV32IM-NEXT: srli a1, a1, 6 142; RV32IM-NEXT: mul a2, a1, a2 143; RV32IM-NEXT: add a0, a0, a1 144; RV32IM-NEXT: sub a0, a0, a2 145; RV32IM-NEXT: ret 146; 147; RV64I-LABEL: combine_urem_udiv: 148; RV64I: # %bb.0: 149; RV64I-NEXT: addi sp, sp, -32 150; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 151; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 152; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 153; RV64I-NEXT: slli a0, a0, 32 154; RV64I-NEXT: srli s0, a0, 32 155; RV64I-NEXT: li a1, 95 156; RV64I-NEXT: mv a0, s0 157; RV64I-NEXT: call __umoddi3 158; RV64I-NEXT: mv s1, a0 159; RV64I-NEXT: li a1, 95 160; RV64I-NEXT: mv a0, s0 161; RV64I-NEXT: call __udivdi3 162; RV64I-NEXT: add a0, s1, a0 163; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 164; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 165; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 166; RV64I-NEXT: addi sp, sp, 32 167; RV64I-NEXT: ret 168; 169; RV64IM-LABEL: combine_urem_udiv: 170; RV64IM: # %bb.0: 171; RV64IM-NEXT: slli a1, a0, 32 172; RV64IM-NEXT: lui a2, 364242 173; RV64IM-NEXT: addi a2, a2, 777 174; RV64IM-NEXT: slli a2, a2, 32 175; RV64IM-NEXT: mulhu a1, a1, a2 176; RV64IM-NEXT: srli a1, a1, 32 177; RV64IM-NEXT: subw a2, a0, a1 178; RV64IM-NEXT: srliw a2, a2, 1 179; RV64IM-NEXT: add a1, a2, a1 180; RV64IM-NEXT: li a2, 95 181; RV64IM-NEXT: srli a1, a1, 6 182; RV64IM-NEXT: mul a2, a1, a2 183; RV64IM-NEXT: add a0, a0, a1 184; RV64IM-NEXT: subw a0, a0, a2 185; RV64IM-NEXT: ret 186 %1 = urem i32 %x, 95 187 %2 = udiv i32 %x, 95 188 %3 = add i32 %1, %2 189 ret i32 %3 190} 191 192; Don't fold for divisors that are a power of two. 193define i32 @dont_fold_urem_power_of_two(i32 %x) nounwind { 194; CHECK-LABEL: dont_fold_urem_power_of_two: 195; CHECK: # %bb.0: 196; CHECK-NEXT: andi a0, a0, 63 197; CHECK-NEXT: ret 198 %1 = urem i32 %x, 64 199 ret i32 %1 200} 201 202; Don't fold if the divisor is one. 203define i32 @dont_fold_urem_one(i32 %x) nounwind { 204; CHECK-LABEL: dont_fold_urem_one: 205; CHECK: # %bb.0: 206; CHECK-NEXT: li a0, 0 207; CHECK-NEXT: ret 208 %1 = urem i32 %x, 1 209 ret i32 %1 210} 211 212; Don't fold if the divisor is 2^32. 213define i32 @dont_fold_urem_i32_umax(i32 %x) nounwind { 214; CHECK-LABEL: dont_fold_urem_i32_umax: 215; CHECK: # %bb.0: 216; CHECK-NEXT: ret 217 %1 = urem i32 %x, 4294967296 218 ret i32 %1 219} 220 221; Don't fold i64 urem 222define i64 @dont_fold_urem_i64(i64 %x) nounwind { 223; RV32I-LABEL: dont_fold_urem_i64: 224; RV32I: # %bb.0: 225; RV32I-NEXT: addi sp, sp, -16 226; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 227; RV32I-NEXT: li a2, 98 228; RV32I-NEXT: li a3, 0 229; RV32I-NEXT: call __umoddi3 230; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 231; RV32I-NEXT: addi sp, sp, 16 232; RV32I-NEXT: ret 233; 234; RV32IM-LABEL: dont_fold_urem_i64: 235; RV32IM: # %bb.0: 236; RV32IM-NEXT: addi sp, sp, -16 237; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 238; RV32IM-NEXT: li a2, 98 239; RV32IM-NEXT: li a3, 0 240; RV32IM-NEXT: call __umoddi3 241; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 242; RV32IM-NEXT: addi sp, sp, 16 243; RV32IM-NEXT: ret 244; 245; RV64I-LABEL: dont_fold_urem_i64: 246; RV64I: # %bb.0: 247; RV64I-NEXT: li a1, 98 248; RV64I-NEXT: tail __umoddi3 249; 250; RV64IM-LABEL: dont_fold_urem_i64: 251; RV64IM: # %bb.0: 252; RV64IM-NEXT: lui a1, %hi(.LCPI6_0) 253; RV64IM-NEXT: ld a1, %lo(.LCPI6_0)(a1) 254; RV64IM-NEXT: srli a2, a0, 1 255; RV64IM-NEXT: mulhu a1, a2, a1 256; RV64IM-NEXT: srli a1, a1, 4 257; RV64IM-NEXT: li a2, 98 258; RV64IM-NEXT: mul a1, a1, a2 259; RV64IM-NEXT: sub a0, a0, a1 260; RV64IM-NEXT: ret 261 %1 = urem i64 %x, 98 262 ret i64 %1 263} 264