1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s 4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s 6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s 8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s 10 11define i32 @fold_srem_positive_odd(i32 %x) nounwind { 12; RV32I-LABEL: fold_srem_positive_odd: 13; RV32I: # %bb.0: 14; RV32I-NEXT: li a1, 95 15; RV32I-NEXT: tail __modsi3 16; 17; RV32IM-LABEL: fold_srem_positive_odd: 18; RV32IM: # %bb.0: 19; RV32IM-NEXT: lui a1, 706409 20; RV32IM-NEXT: addi a1, a1, 389 21; RV32IM-NEXT: mulh a1, a0, a1 22; RV32IM-NEXT: add a1, a1, a0 23; RV32IM-NEXT: srli a2, a1, 31 24; RV32IM-NEXT: srai a1, a1, 6 25; RV32IM-NEXT: add a1, a1, a2 26; RV32IM-NEXT: li a2, 95 27; RV32IM-NEXT: mul a1, a1, a2 28; RV32IM-NEXT: sub a0, a0, a1 29; RV32IM-NEXT: ret 30; 31; RV64I-LABEL: fold_srem_positive_odd: 32; RV64I: # %bb.0: 33; RV64I-NEXT: addi sp, sp, -16 34; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 35; RV64I-NEXT: sext.w a0, a0 36; RV64I-NEXT: li a1, 95 37; RV64I-NEXT: call __moddi3 38; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 39; RV64I-NEXT: addi sp, sp, 16 40; RV64I-NEXT: ret 41; 42; RV64IM-LABEL: fold_srem_positive_odd: 43; RV64IM: # %bb.0: 44; RV64IM-NEXT: sext.w a1, a0 45; RV64IM-NEXT: lui a2, 706409 46; RV64IM-NEXT: addiw a2, a2, 389 47; RV64IM-NEXT: mul a1, a1, a2 48; RV64IM-NEXT: srli a1, a1, 32 49; RV64IM-NEXT: add a1, a1, a0 50; RV64IM-NEXT: srliw a2, a1, 31 51; RV64IM-NEXT: sraiw a1, a1, 6 52; RV64IM-NEXT: add a1, a1, a2 53; RV64IM-NEXT: li a2, 95 54; RV64IM-NEXT: mul a1, a1, a2 55; RV64IM-NEXT: subw a0, a0, a1 56; RV64IM-NEXT: ret 57 %1 = srem i32 %x, 95 58 ret i32 %1 59} 60 61 62define i32 @fold_srem_positive_even(i32 %x) nounwind { 63; RV32I-LABEL: fold_srem_positive_even: 64; RV32I: # %bb.0: 65; RV32I-NEXT: li a1, 1060 66; RV32I-NEXT: tail __modsi3 67; 68; RV32IM-LABEL: fold_srem_positive_even: 69; RV32IM: # %bb.0: 70; RV32IM-NEXT: lui a1, 253241 71; RV32IM-NEXT: addi a1, a1, -15 72; RV32IM-NEXT: mulh a1, a0, a1 73; RV32IM-NEXT: srli a2, a1, 31 74; RV32IM-NEXT: srai a1, a1, 8 75; RV32IM-NEXT: add a1, a1, a2 76; RV32IM-NEXT: li a2, 1060 77; RV32IM-NEXT: mul a1, a1, a2 78; RV32IM-NEXT: sub a0, a0, a1 79; RV32IM-NEXT: ret 80; 81; RV64I-LABEL: fold_srem_positive_even: 82; RV64I: # %bb.0: 83; RV64I-NEXT: addi sp, sp, -16 84; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 85; RV64I-NEXT: sext.w a0, a0 86; RV64I-NEXT: li a1, 1060 87; RV64I-NEXT: call __moddi3 88; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 89; RV64I-NEXT: addi sp, sp, 16 90; RV64I-NEXT: ret 91; 92; RV64IM-LABEL: fold_srem_positive_even: 93; RV64IM: # %bb.0: 94; RV64IM-NEXT: sext.w a1, a0 95; RV64IM-NEXT: lui a2, 253241 96; RV64IM-NEXT: addiw a2, a2, -15 97; RV64IM-NEXT: mul a1, a1, a2 98; RV64IM-NEXT: srli a2, a1, 63 99; RV64IM-NEXT: srai a1, a1, 40 100; RV64IM-NEXT: add a1, a1, a2 101; RV64IM-NEXT: li a2, 1060 102; RV64IM-NEXT: mul a1, a1, a2 103; RV64IM-NEXT: subw a0, a0, a1 104; RV64IM-NEXT: ret 105 %1 = srem i32 %x, 1060 106 ret i32 %1 107} 108 109 110define i32 @fold_srem_negative_odd(i32 %x) nounwind { 111; RV32I-LABEL: fold_srem_negative_odd: 112; RV32I: # %bb.0: 113; RV32I-NEXT: li a1, -723 114; RV32I-NEXT: tail __modsi3 115; 116; RV32IM-LABEL: fold_srem_negative_odd: 117; RV32IM: # %bb.0: 118; RV32IM-NEXT: lui a1, 677296 119; RV32IM-NEXT: addi a1, a1, -91 120; RV32IM-NEXT: mulh a1, a0, a1 121; RV32IM-NEXT: srli a2, a1, 31 122; RV32IM-NEXT: srai a1, a1, 8 123; RV32IM-NEXT: add a1, a1, a2 124; RV32IM-NEXT: li a2, -723 125; RV32IM-NEXT: mul a1, a1, a2 126; RV32IM-NEXT: sub a0, a0, a1 127; RV32IM-NEXT: ret 128; 129; RV64I-LABEL: fold_srem_negative_odd: 130; RV64I: # %bb.0: 131; RV64I-NEXT: addi sp, sp, -16 132; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 133; RV64I-NEXT: sext.w a0, a0 134; RV64I-NEXT: li a1, -723 135; RV64I-NEXT: call __moddi3 136; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 137; RV64I-NEXT: addi sp, sp, 16 138; RV64I-NEXT: ret 139; 140; RV64IM-LABEL: fold_srem_negative_odd: 141; RV64IM: # %bb.0: 142; RV64IM-NEXT: sext.w a1, a0 143; RV64IM-NEXT: lui a2, 677296 144; RV64IM-NEXT: addiw a2, a2, -91 145; RV64IM-NEXT: mul a1, a1, a2 146; RV64IM-NEXT: srli a2, a1, 63 147; RV64IM-NEXT: srai a1, a1, 40 148; RV64IM-NEXT: add a1, a1, a2 149; RV64IM-NEXT: li a2, -723 150; RV64IM-NEXT: mul a1, a1, a2 151; RV64IM-NEXT: subw a0, a0, a1 152; RV64IM-NEXT: ret 153 %1 = srem i32 %x, -723 154 ret i32 %1 155} 156 157 158define i32 @fold_srem_negative_even(i32 %x) nounwind { 159; RV32I-LABEL: fold_srem_negative_even: 160; RV32I: # %bb.0: 161; RV32I-NEXT: lui a1, 1048570 162; RV32I-NEXT: addi a1, a1, 1595 163; RV32I-NEXT: tail __modsi3 164; 165; RV32IM-LABEL: fold_srem_negative_even: 166; RV32IM: # %bb.0: 167; RV32IM-NEXT: lui a1, 1036895 168; RV32IM-NEXT: addi a1, a1, 999 169; RV32IM-NEXT: mulh a1, a0, a1 170; RV32IM-NEXT: srli a2, a1, 31 171; RV32IM-NEXT: srai a1, a1, 8 172; RV32IM-NEXT: add a1, a1, a2 173; RV32IM-NEXT: lui a2, 1048570 174; RV32IM-NEXT: addi a2, a2, 1595 175; RV32IM-NEXT: mul a1, a1, a2 176; RV32IM-NEXT: sub a0, a0, a1 177; RV32IM-NEXT: ret 178; 179; RV64I-LABEL: fold_srem_negative_even: 180; RV64I: # %bb.0: 181; RV64I-NEXT: addi sp, sp, -16 182; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 183; RV64I-NEXT: sext.w a0, a0 184; RV64I-NEXT: lui a1, 1048570 185; RV64I-NEXT: addiw a1, a1, 1595 186; RV64I-NEXT: call __moddi3 187; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 188; RV64I-NEXT: addi sp, sp, 16 189; RV64I-NEXT: ret 190; 191; RV64IM-LABEL: fold_srem_negative_even: 192; RV64IM: # %bb.0: 193; RV64IM-NEXT: sext.w a1, a0 194; RV64IM-NEXT: lui a2, 1036895 195; RV64IM-NEXT: addiw a2, a2, 999 196; RV64IM-NEXT: mul a1, a1, a2 197; RV64IM-NEXT: srli a2, a1, 63 198; RV64IM-NEXT: srai a1, a1, 40 199; RV64IM-NEXT: add a1, a1, a2 200; RV64IM-NEXT: lui a2, 1048570 201; RV64IM-NEXT: addi a2, a2, 1595 202; RV64IM-NEXT: mul a1, a1, a2 203; RV64IM-NEXT: subw a0, a0, a1 204; RV64IM-NEXT: ret 205 %1 = srem i32 %x, -22981 206 ret i32 %1 207} 208 209 210; Don't fold if we can combine srem with sdiv. 211define i32 @combine_srem_sdiv(i32 %x) nounwind { 212; RV32I-LABEL: combine_srem_sdiv: 213; RV32I: # %bb.0: 214; RV32I-NEXT: addi sp, sp, -16 215; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 216; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 217; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 218; RV32I-NEXT: mv s0, a0 219; RV32I-NEXT: li a1, 95 220; RV32I-NEXT: call __modsi3 221; RV32I-NEXT: mv s1, a0 222; RV32I-NEXT: li a1, 95 223; RV32I-NEXT: mv a0, s0 224; RV32I-NEXT: call __divsi3 225; RV32I-NEXT: add a0, s1, a0 226; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 227; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 228; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 229; RV32I-NEXT: addi sp, sp, 16 230; RV32I-NEXT: ret 231; 232; RV32IM-LABEL: combine_srem_sdiv: 233; RV32IM: # %bb.0: 234; RV32IM-NEXT: lui a1, 706409 235; RV32IM-NEXT: addi a1, a1, 389 236; RV32IM-NEXT: mulh a1, a0, a1 237; RV32IM-NEXT: add a1, a1, a0 238; RV32IM-NEXT: srli a2, a1, 31 239; RV32IM-NEXT: srai a1, a1, 6 240; RV32IM-NEXT: add a1, a1, a2 241; RV32IM-NEXT: li a2, 95 242; RV32IM-NEXT: mul a2, a1, a2 243; RV32IM-NEXT: add a0, a0, a1 244; RV32IM-NEXT: sub a0, a0, a2 245; RV32IM-NEXT: ret 246; 247; RV64I-LABEL: combine_srem_sdiv: 248; RV64I: # %bb.0: 249; RV64I-NEXT: addi sp, sp, -32 250; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 251; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 252; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 253; RV64I-NEXT: sext.w s0, a0 254; RV64I-NEXT: li a1, 95 255; RV64I-NEXT: mv a0, s0 256; RV64I-NEXT: call __moddi3 257; RV64I-NEXT: mv s1, a0 258; RV64I-NEXT: li a1, 95 259; RV64I-NEXT: mv a0, s0 260; RV64I-NEXT: call __divdi3 261; RV64I-NEXT: addw a0, s1, a0 262; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 263; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 264; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 265; RV64I-NEXT: addi sp, sp, 32 266; RV64I-NEXT: ret 267; 268; RV64IM-LABEL: combine_srem_sdiv: 269; RV64IM: # %bb.0: 270; RV64IM-NEXT: sext.w a1, a0 271; RV64IM-NEXT: lui a2, 706409 272; RV64IM-NEXT: addiw a2, a2, 389 273; RV64IM-NEXT: mul a1, a1, a2 274; RV64IM-NEXT: srli a1, a1, 32 275; RV64IM-NEXT: add a1, a1, a0 276; RV64IM-NEXT: srliw a2, a1, 31 277; RV64IM-NEXT: sraiw a1, a1, 6 278; RV64IM-NEXT: add a1, a1, a2 279; RV64IM-NEXT: li a2, 95 280; RV64IM-NEXT: mul a2, a1, a2 281; RV64IM-NEXT: add a0, a0, a1 282; RV64IM-NEXT: subw a0, a0, a2 283; RV64IM-NEXT: ret 284 %1 = srem i32 %x, 95 285 %2 = sdiv i32 %x, 95 286 %3 = add i32 %1, %2 287 ret i32 %3 288} 289 290; Don't fold for divisors that are a power of two. 291define i32 @dont_fold_srem_power_of_two(i32 %x) nounwind { 292; RV32I-LABEL: dont_fold_srem_power_of_two: 293; RV32I: # %bb.0: 294; RV32I-NEXT: srai a1, a0, 31 295; RV32I-NEXT: srli a1, a1, 26 296; RV32I-NEXT: add a1, a0, a1 297; RV32I-NEXT: andi a1, a1, -64 298; RV32I-NEXT: sub a0, a0, a1 299; RV32I-NEXT: ret 300; 301; RV32IM-LABEL: dont_fold_srem_power_of_two: 302; RV32IM: # %bb.0: 303; RV32IM-NEXT: srai a1, a0, 31 304; RV32IM-NEXT: srli a1, a1, 26 305; RV32IM-NEXT: add a1, a0, a1 306; RV32IM-NEXT: andi a1, a1, -64 307; RV32IM-NEXT: sub a0, a0, a1 308; RV32IM-NEXT: ret 309; 310; RV64I-LABEL: dont_fold_srem_power_of_two: 311; RV64I: # %bb.0: 312; RV64I-NEXT: sraiw a1, a0, 31 313; RV64I-NEXT: srliw a1, a1, 26 314; RV64I-NEXT: add a1, a0, a1 315; RV64I-NEXT: andi a1, a1, -64 316; RV64I-NEXT: subw a0, a0, a1 317; RV64I-NEXT: ret 318; 319; RV64IM-LABEL: dont_fold_srem_power_of_two: 320; RV64IM: # %bb.0: 321; RV64IM-NEXT: sraiw a1, a0, 31 322; RV64IM-NEXT: srliw a1, a1, 26 323; RV64IM-NEXT: add a1, a0, a1 324; RV64IM-NEXT: andi a1, a1, -64 325; RV64IM-NEXT: subw a0, a0, a1 326; RV64IM-NEXT: ret 327 %1 = srem i32 %x, 64 328 ret i32 %1 329} 330 331; Don't fold if the divisor is one. 332define i32 @dont_fold_srem_one(i32 %x) nounwind { 333; CHECK-LABEL: dont_fold_srem_one: 334; CHECK: # %bb.0: 335; CHECK-NEXT: li a0, 0 336; CHECK-NEXT: ret 337 %1 = srem i32 %x, 1 338 ret i32 %1 339} 340 341; Don't fold if the divisor is 2^31. 342define i32 @dont_fold_srem_i32_smax(i32 %x) nounwind { 343; RV32I-LABEL: dont_fold_srem_i32_smax: 344; RV32I: # %bb.0: 345; RV32I-NEXT: srai a1, a0, 31 346; RV32I-NEXT: srli a1, a1, 1 347; RV32I-NEXT: add a1, a0, a1 348; RV32I-NEXT: lui a2, 524288 349; RV32I-NEXT: and a1, a1, a2 350; RV32I-NEXT: add a0, a0, a1 351; RV32I-NEXT: ret 352; 353; RV32IM-LABEL: dont_fold_srem_i32_smax: 354; RV32IM: # %bb.0: 355; RV32IM-NEXT: srai a1, a0, 31 356; RV32IM-NEXT: srli a1, a1, 1 357; RV32IM-NEXT: add a1, a0, a1 358; RV32IM-NEXT: lui a2, 524288 359; RV32IM-NEXT: and a1, a1, a2 360; RV32IM-NEXT: add a0, a0, a1 361; RV32IM-NEXT: ret 362; 363; RV64I-LABEL: dont_fold_srem_i32_smax: 364; RV64I: # %bb.0: 365; RV64I-NEXT: sraiw a1, a0, 31 366; RV64I-NEXT: srliw a1, a1, 1 367; RV64I-NEXT: add a1, a0, a1 368; RV64I-NEXT: lui a2, 524288 369; RV64I-NEXT: and a1, a1, a2 370; RV64I-NEXT: addw a0, a0, a1 371; RV64I-NEXT: ret 372; 373; RV64IM-LABEL: dont_fold_srem_i32_smax: 374; RV64IM: # %bb.0: 375; RV64IM-NEXT: sraiw a1, a0, 31 376; RV64IM-NEXT: srliw a1, a1, 1 377; RV64IM-NEXT: add a1, a0, a1 378; RV64IM-NEXT: lui a2, 524288 379; RV64IM-NEXT: and a1, a1, a2 380; RV64IM-NEXT: addw a0, a0, a1 381; RV64IM-NEXT: ret 382 %1 = srem i32 %x, 2147483648 383 ret i32 %1 384} 385 386; Don't fold i64 srem 387define i64 @dont_fold_srem_i64(i64 %x) nounwind { 388; RV32I-LABEL: dont_fold_srem_i64: 389; RV32I: # %bb.0: 390; RV32I-NEXT: addi sp, sp, -16 391; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 392; RV32I-NEXT: li a2, 98 393; RV32I-NEXT: li a3, 0 394; RV32I-NEXT: call __moddi3 395; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 396; RV32I-NEXT: addi sp, sp, 16 397; RV32I-NEXT: ret 398; 399; RV32IM-LABEL: dont_fold_srem_i64: 400; RV32IM: # %bb.0: 401; RV32IM-NEXT: addi sp, sp, -16 402; RV32IM-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 403; RV32IM-NEXT: li a2, 98 404; RV32IM-NEXT: li a3, 0 405; RV32IM-NEXT: call __moddi3 406; RV32IM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 407; RV32IM-NEXT: addi sp, sp, 16 408; RV32IM-NEXT: ret 409; 410; RV64I-LABEL: dont_fold_srem_i64: 411; RV64I: # %bb.0: 412; RV64I-NEXT: li a1, 98 413; RV64I-NEXT: tail __moddi3 414; 415; RV64IM-LABEL: dont_fold_srem_i64: 416; RV64IM: # %bb.0: 417; RV64IM-NEXT: lui a1, %hi(.LCPI8_0) 418; RV64IM-NEXT: ld a1, %lo(.LCPI8_0)(a1) 419; RV64IM-NEXT: mulh a1, a0, a1 420; RV64IM-NEXT: srli a2, a1, 63 421; RV64IM-NEXT: srai a1, a1, 5 422; RV64IM-NEXT: add a1, a1, a2 423; RV64IM-NEXT: li a2, 98 424; RV64IM-NEXT: mul a1, a1, a2 425; RV64IM-NEXT: sub a0, a0, a1 426; RV64IM-NEXT: ret 427 %1 = srem i64 %x, 98 428 ret i64 %1 429} 430