1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefixes=CHECK,RV32I %s 4; RUN: llc -mtriple=riscv32 -mattr=+m -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefixes=CHECK,RV32IM %s 6; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefixes=CHECK,RV64I %s 8; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefixes=CHECK,RV64IM %s 10 11 12define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { 13; RV32I-LABEL: fold_urem_vec_1: 14; RV32I: # %bb.0: 15; RV32I-NEXT: addi sp, sp, -32 16; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 17; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 18; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 19; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 20; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 21; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill 22; RV32I-NEXT: lhu a2, 0(a1) 23; RV32I-NEXT: lhu s0, 4(a1) 24; RV32I-NEXT: lhu s1, 8(a1) 25; RV32I-NEXT: lhu s2, 12(a1) 26; RV32I-NEXT: mv s3, a0 27; RV32I-NEXT: li a1, 95 28; RV32I-NEXT: mv a0, a2 29; RV32I-NEXT: call __umodsi3 30; RV32I-NEXT: mv s4, a0 31; RV32I-NEXT: li a1, 124 32; RV32I-NEXT: mv a0, s0 33; RV32I-NEXT: call __umodsi3 34; RV32I-NEXT: mv s0, a0 35; RV32I-NEXT: li a1, 98 36; RV32I-NEXT: mv a0, s1 37; RV32I-NEXT: call __umodsi3 38; RV32I-NEXT: mv s1, a0 39; RV32I-NEXT: li a1, 1003 40; RV32I-NEXT: mv a0, s2 41; RV32I-NEXT: call __umodsi3 42; RV32I-NEXT: sh s4, 0(s3) 43; RV32I-NEXT: sh s0, 2(s3) 44; RV32I-NEXT: sh s1, 4(s3) 45; RV32I-NEXT: sh a0, 6(s3) 46; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 47; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 48; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 49; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 50; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 51; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload 52; RV32I-NEXT: addi sp, sp, 32 53; RV32I-NEXT: ret 54; 55; RV32IM-LABEL: fold_urem_vec_1: 56; RV32IM: # %bb.0: 57; RV32IM-NEXT: lhu a2, 0(a1) 58; RV32IM-NEXT: lhu a3, 4(a1) 59; RV32IM-NEXT: lhu a4, 8(a1) 60; RV32IM-NEXT: lhu a1, 12(a1) 61; RV32IM-NEXT: lui a5, 8456 62; RV32IM-NEXT: lui a6, 11038 63; RV32IM-NEXT: li a7, 95 64; RV32IM-NEXT: lui t0, 10700 65; RV32IM-NEXT: li t1, 98 66; RV32IM-NEXT: addi a6, a6, -1465 67; RV32IM-NEXT: mulhu a6, a2, a6 68; RV32IM-NEXT: mul a6, a6, a7 69; RV32IM-NEXT: lui a7, 1045 70; RV32IM-NEXT: addi t0, t0, -1003 71; RV32IM-NEXT: mulhu t0, a4, t0 72; RV32IM-NEXT: mul t0, t0, t1 73; RV32IM-NEXT: li t1, 1003 74; RV32IM-NEXT: addi a5, a5, 1058 75; RV32IM-NEXT: addi a7, a7, 1801 76; RV32IM-NEXT: mulhu a5, a3, a5 77; RV32IM-NEXT: mulhu a7, a1, a7 78; RV32IM-NEXT: mul a7, a7, t1 79; RV32IM-NEXT: slli t1, a5, 7 80; RV32IM-NEXT: slli a5, a5, 2 81; RV32IM-NEXT: sub a5, a5, t1 82; RV32IM-NEXT: sub a2, a2, a6 83; RV32IM-NEXT: sub a4, a4, t0 84; RV32IM-NEXT: sub a1, a1, a7 85; RV32IM-NEXT: add a3, a3, a5 86; RV32IM-NEXT: sh a2, 0(a0) 87; RV32IM-NEXT: sh a3, 2(a0) 88; RV32IM-NEXT: sh a4, 4(a0) 89; RV32IM-NEXT: sh a1, 6(a0) 90; RV32IM-NEXT: ret 91; 92; RV64I-LABEL: fold_urem_vec_1: 93; RV64I: # %bb.0: 94; RV64I-NEXT: addi sp, sp, -48 95; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 96; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 97; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 98; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 99; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 100; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill 101; RV64I-NEXT: lhu a2, 0(a1) 102; RV64I-NEXT: lhu s0, 8(a1) 103; RV64I-NEXT: lhu s1, 16(a1) 104; RV64I-NEXT: lhu s2, 24(a1) 105; RV64I-NEXT: mv s3, a0 106; RV64I-NEXT: li a1, 95 107; RV64I-NEXT: mv a0, a2 108; RV64I-NEXT: call __umoddi3 109; RV64I-NEXT: mv s4, a0 110; RV64I-NEXT: li a1, 124 111; RV64I-NEXT: mv a0, s0 112; RV64I-NEXT: call __umoddi3 113; RV64I-NEXT: mv s0, a0 114; RV64I-NEXT: li a1, 98 115; RV64I-NEXT: mv a0, s1 116; RV64I-NEXT: call __umoddi3 117; RV64I-NEXT: mv s1, a0 118; RV64I-NEXT: li a1, 1003 119; RV64I-NEXT: mv a0, s2 120; RV64I-NEXT: call __umoddi3 121; RV64I-NEXT: sh s4, 0(s3) 122; RV64I-NEXT: sh s0, 2(s3) 123; RV64I-NEXT: sh s1, 4(s3) 124; RV64I-NEXT: sh a0, 6(s3) 125; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 126; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 127; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 128; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 129; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 130; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload 131; RV64I-NEXT: addi sp, sp, 48 132; RV64I-NEXT: ret 133; 134; RV64IM-LABEL: fold_urem_vec_1: 135; RV64IM: # %bb.0: 136; RV64IM-NEXT: lhu a2, 0(a1) 137; RV64IM-NEXT: lhu a3, 8(a1) 138; RV64IM-NEXT: lhu a4, 16(a1) 139; RV64IM-NEXT: lhu a1, 24(a1) 140; RV64IM-NEXT: lui a5, %hi(.LCPI0_0) 141; RV64IM-NEXT: lui a6, %hi(.LCPI0_1) 142; RV64IM-NEXT: li a7, 95 143; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6) 144; RV64IM-NEXT: lui t0, %hi(.LCPI0_2) 145; RV64IM-NEXT: li t1, 98 146; RV64IM-NEXT: ld t0, %lo(.LCPI0_2)(t0) 147; RV64IM-NEXT: mulhu a6, a2, a6 148; RV64IM-NEXT: mul a6, a6, a7 149; RV64IM-NEXT: lui a7, %hi(.LCPI0_3) 150; RV64IM-NEXT: ld a5, %lo(.LCPI0_0)(a5) 151; RV64IM-NEXT: ld a7, %lo(.LCPI0_3)(a7) 152; RV64IM-NEXT: mulhu t0, a4, t0 153; RV64IM-NEXT: mul t0, t0, t1 154; RV64IM-NEXT: li t1, 1003 155; RV64IM-NEXT: mulhu a5, a3, a5 156; RV64IM-NEXT: mulhu a7, a1, a7 157; RV64IM-NEXT: mul a7, a7, t1 158; RV64IM-NEXT: slli t1, a5, 7 159; RV64IM-NEXT: slli a5, a5, 2 160; RV64IM-NEXT: subw a5, a5, t1 161; RV64IM-NEXT: subw a2, a2, a6 162; RV64IM-NEXT: subw a4, a4, t0 163; RV64IM-NEXT: subw a1, a1, a7 164; RV64IM-NEXT: add a3, a3, a5 165; RV64IM-NEXT: sh a2, 0(a0) 166; RV64IM-NEXT: sh a3, 2(a0) 167; RV64IM-NEXT: sh a4, 4(a0) 168; RV64IM-NEXT: sh a1, 6(a0) 169; RV64IM-NEXT: ret 170 %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003> 171 ret <4 x i16> %1 172} 173 174define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { 175; RV32I-LABEL: fold_urem_vec_2: 176; RV32I: # %bb.0: 177; RV32I-NEXT: addi sp, sp, -32 178; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 179; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 180; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 181; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 182; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 183; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill 184; RV32I-NEXT: lhu a2, 0(a1) 185; RV32I-NEXT: lhu s0, 4(a1) 186; RV32I-NEXT: lhu s1, 8(a1) 187; RV32I-NEXT: lhu s2, 12(a1) 188; RV32I-NEXT: mv s3, a0 189; RV32I-NEXT: li a1, 95 190; RV32I-NEXT: mv a0, a2 191; RV32I-NEXT: call __umodsi3 192; RV32I-NEXT: mv s4, a0 193; RV32I-NEXT: li a1, 95 194; RV32I-NEXT: mv a0, s0 195; RV32I-NEXT: call __umodsi3 196; RV32I-NEXT: mv s0, a0 197; RV32I-NEXT: li a1, 95 198; RV32I-NEXT: mv a0, s1 199; RV32I-NEXT: call __umodsi3 200; RV32I-NEXT: mv s1, a0 201; RV32I-NEXT: li a1, 95 202; RV32I-NEXT: mv a0, s2 203; RV32I-NEXT: call __umodsi3 204; RV32I-NEXT: sh s4, 0(s3) 205; RV32I-NEXT: sh s0, 2(s3) 206; RV32I-NEXT: sh s1, 4(s3) 207; RV32I-NEXT: sh a0, 6(s3) 208; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 209; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 210; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 211; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 212; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 213; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload 214; RV32I-NEXT: addi sp, sp, 32 215; RV32I-NEXT: ret 216; 217; RV32IM-LABEL: fold_urem_vec_2: 218; RV32IM: # %bb.0: 219; RV32IM-NEXT: lhu a2, 0(a1) 220; RV32IM-NEXT: lhu a3, 4(a1) 221; RV32IM-NEXT: lhu a4, 8(a1) 222; RV32IM-NEXT: lhu a1, 12(a1) 223; RV32IM-NEXT: lui a5, 11038 224; RV32IM-NEXT: li a6, 95 225; RV32IM-NEXT: addi a5, a5, -1465 226; RV32IM-NEXT: mulhu a7, a2, a5 227; RV32IM-NEXT: mulhu t0, a3, a5 228; RV32IM-NEXT: mulhu t1, a4, a5 229; RV32IM-NEXT: mulhu a5, a1, a5 230; RV32IM-NEXT: mul a7, a7, a6 231; RV32IM-NEXT: mul t0, t0, a6 232; RV32IM-NEXT: mul t1, t1, a6 233; RV32IM-NEXT: mul a5, a5, a6 234; RV32IM-NEXT: sub a2, a2, a7 235; RV32IM-NEXT: sub a3, a3, t0 236; RV32IM-NEXT: sub a4, a4, t1 237; RV32IM-NEXT: sub a1, a1, a5 238; RV32IM-NEXT: sh a2, 0(a0) 239; RV32IM-NEXT: sh a3, 2(a0) 240; RV32IM-NEXT: sh a4, 4(a0) 241; RV32IM-NEXT: sh a1, 6(a0) 242; RV32IM-NEXT: ret 243; 244; RV64I-LABEL: fold_urem_vec_2: 245; RV64I: # %bb.0: 246; RV64I-NEXT: addi sp, sp, -48 247; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 248; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 249; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 250; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 251; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 252; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill 253; RV64I-NEXT: lhu a2, 0(a1) 254; RV64I-NEXT: lhu s0, 8(a1) 255; RV64I-NEXT: lhu s1, 16(a1) 256; RV64I-NEXT: lhu s2, 24(a1) 257; RV64I-NEXT: mv s3, a0 258; RV64I-NEXT: li a1, 95 259; RV64I-NEXT: mv a0, a2 260; RV64I-NEXT: call __umoddi3 261; RV64I-NEXT: mv s4, a0 262; RV64I-NEXT: li a1, 95 263; RV64I-NEXT: mv a0, s0 264; RV64I-NEXT: call __umoddi3 265; RV64I-NEXT: mv s0, a0 266; RV64I-NEXT: li a1, 95 267; RV64I-NEXT: mv a0, s1 268; RV64I-NEXT: call __umoddi3 269; RV64I-NEXT: mv s1, a0 270; RV64I-NEXT: li a1, 95 271; RV64I-NEXT: mv a0, s2 272; RV64I-NEXT: call __umoddi3 273; RV64I-NEXT: sh s4, 0(s3) 274; RV64I-NEXT: sh s0, 2(s3) 275; RV64I-NEXT: sh s1, 4(s3) 276; RV64I-NEXT: sh a0, 6(s3) 277; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 278; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 279; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 280; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 281; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 282; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload 283; RV64I-NEXT: addi sp, sp, 48 284; RV64I-NEXT: ret 285; 286; RV64IM-LABEL: fold_urem_vec_2: 287; RV64IM: # %bb.0: 288; RV64IM-NEXT: lui a2, %hi(.LCPI1_0) 289; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2) 290; RV64IM-NEXT: lhu a3, 0(a1) 291; RV64IM-NEXT: lhu a4, 8(a1) 292; RV64IM-NEXT: lhu a5, 16(a1) 293; RV64IM-NEXT: lhu a1, 24(a1) 294; RV64IM-NEXT: li a6, 95 295; RV64IM-NEXT: mulhu a7, a3, a2 296; RV64IM-NEXT: mulhu t0, a4, a2 297; RV64IM-NEXT: mulhu t1, a5, a2 298; RV64IM-NEXT: mulhu a2, a1, a2 299; RV64IM-NEXT: mul a7, a7, a6 300; RV64IM-NEXT: mul t0, t0, a6 301; RV64IM-NEXT: mul t1, t1, a6 302; RV64IM-NEXT: mul a2, a2, a6 303; RV64IM-NEXT: subw a3, a3, a7 304; RV64IM-NEXT: subw a4, a4, t0 305; RV64IM-NEXT: subw a5, a5, t1 306; RV64IM-NEXT: subw a1, a1, a2 307; RV64IM-NEXT: sh a3, 0(a0) 308; RV64IM-NEXT: sh a4, 2(a0) 309; RV64IM-NEXT: sh a5, 4(a0) 310; RV64IM-NEXT: sh a1, 6(a0) 311; RV64IM-NEXT: ret 312 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 313 ret <4 x i16> %1 314} 315 316 317; Don't fold if we can combine urem with udiv. 318define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { 319; RV32I-LABEL: combine_urem_udiv: 320; RV32I: # %bb.0: 321; RV32I-NEXT: addi sp, sp, -48 322; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill 323; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill 324; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill 325; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill 326; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill 327; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill 328; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill 329; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill 330; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill 331; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill 332; RV32I-NEXT: lhu s1, 0(a1) 333; RV32I-NEXT: lhu s2, 4(a1) 334; RV32I-NEXT: lhu s3, 8(a1) 335; RV32I-NEXT: lhu s4, 12(a1) 336; RV32I-NEXT: mv s0, a0 337; RV32I-NEXT: li a1, 95 338; RV32I-NEXT: mv a0, s4 339; RV32I-NEXT: call __umodsi3 340; RV32I-NEXT: mv s5, a0 341; RV32I-NEXT: li a1, 95 342; RV32I-NEXT: mv a0, s3 343; RV32I-NEXT: call __umodsi3 344; RV32I-NEXT: mv s6, a0 345; RV32I-NEXT: li a1, 95 346; RV32I-NEXT: mv a0, s2 347; RV32I-NEXT: call __umodsi3 348; RV32I-NEXT: mv s7, a0 349; RV32I-NEXT: li a1, 95 350; RV32I-NEXT: mv a0, s1 351; RV32I-NEXT: call __umodsi3 352; RV32I-NEXT: mv s8, a0 353; RV32I-NEXT: li a1, 95 354; RV32I-NEXT: mv a0, s4 355; RV32I-NEXT: call __udivsi3 356; RV32I-NEXT: mv s4, a0 357; RV32I-NEXT: li a1, 95 358; RV32I-NEXT: mv a0, s3 359; RV32I-NEXT: call __udivsi3 360; RV32I-NEXT: mv s3, a0 361; RV32I-NEXT: li a1, 95 362; RV32I-NEXT: mv a0, s2 363; RV32I-NEXT: call __udivsi3 364; RV32I-NEXT: mv s2, a0 365; RV32I-NEXT: li a1, 95 366; RV32I-NEXT: mv a0, s1 367; RV32I-NEXT: call __udivsi3 368; RV32I-NEXT: add a0, s8, a0 369; RV32I-NEXT: add s2, s7, s2 370; RV32I-NEXT: add s3, s6, s3 371; RV32I-NEXT: add s4, s5, s4 372; RV32I-NEXT: sh a0, 0(s0) 373; RV32I-NEXT: sh s2, 2(s0) 374; RV32I-NEXT: sh s3, 4(s0) 375; RV32I-NEXT: sh s4, 6(s0) 376; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload 377; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload 378; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload 379; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload 380; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload 381; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload 382; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload 383; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload 384; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload 385; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload 386; RV32I-NEXT: addi sp, sp, 48 387; RV32I-NEXT: ret 388; 389; RV32IM-LABEL: combine_urem_udiv: 390; RV32IM: # %bb.0: 391; RV32IM-NEXT: lhu a2, 0(a1) 392; RV32IM-NEXT: lhu a3, 4(a1) 393; RV32IM-NEXT: lhu a4, 8(a1) 394; RV32IM-NEXT: lhu a1, 12(a1) 395; RV32IM-NEXT: lui a5, 11038 396; RV32IM-NEXT: li a6, 95 397; RV32IM-NEXT: addi a5, a5, -1465 398; RV32IM-NEXT: mulhu a7, a1, a5 399; RV32IM-NEXT: mulhu t0, a4, a5 400; RV32IM-NEXT: mulhu t1, a3, a5 401; RV32IM-NEXT: mulhu a5, a2, a5 402; RV32IM-NEXT: mul t2, a7, a6 403; RV32IM-NEXT: mul t3, t0, a6 404; RV32IM-NEXT: mul t4, t1, a6 405; RV32IM-NEXT: mul a6, a5, a6 406; RV32IM-NEXT: add a2, a2, a5 407; RV32IM-NEXT: add a3, a3, t1 408; RV32IM-NEXT: add a4, a4, t0 409; RV32IM-NEXT: add a1, a1, a7 410; RV32IM-NEXT: sub a2, a2, a6 411; RV32IM-NEXT: sub a3, a3, t4 412; RV32IM-NEXT: sub a4, a4, t3 413; RV32IM-NEXT: sub a1, a1, t2 414; RV32IM-NEXT: sh a2, 0(a0) 415; RV32IM-NEXT: sh a3, 2(a0) 416; RV32IM-NEXT: sh a4, 4(a0) 417; RV32IM-NEXT: sh a1, 6(a0) 418; RV32IM-NEXT: ret 419; 420; RV64I-LABEL: combine_urem_udiv: 421; RV64I: # %bb.0: 422; RV64I-NEXT: addi sp, sp, -80 423; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 424; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 425; RV64I-NEXT: sd s1, 56(sp) # 8-byte Folded Spill 426; RV64I-NEXT: sd s2, 48(sp) # 8-byte Folded Spill 427; RV64I-NEXT: sd s3, 40(sp) # 8-byte Folded Spill 428; RV64I-NEXT: sd s4, 32(sp) # 8-byte Folded Spill 429; RV64I-NEXT: sd s5, 24(sp) # 8-byte Folded Spill 430; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill 431; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill 432; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill 433; RV64I-NEXT: lhu s1, 0(a1) 434; RV64I-NEXT: lhu s2, 8(a1) 435; RV64I-NEXT: lhu s3, 16(a1) 436; RV64I-NEXT: lhu s4, 24(a1) 437; RV64I-NEXT: mv s0, a0 438; RV64I-NEXT: li a1, 95 439; RV64I-NEXT: mv a0, s4 440; RV64I-NEXT: call __umoddi3 441; RV64I-NEXT: mv s5, a0 442; RV64I-NEXT: li a1, 95 443; RV64I-NEXT: mv a0, s3 444; RV64I-NEXT: call __umoddi3 445; RV64I-NEXT: mv s6, a0 446; RV64I-NEXT: li a1, 95 447; RV64I-NEXT: mv a0, s2 448; RV64I-NEXT: call __umoddi3 449; RV64I-NEXT: mv s7, a0 450; RV64I-NEXT: li a1, 95 451; RV64I-NEXT: mv a0, s1 452; RV64I-NEXT: call __umoddi3 453; RV64I-NEXT: mv s8, a0 454; RV64I-NEXT: li a1, 95 455; RV64I-NEXT: mv a0, s4 456; RV64I-NEXT: call __udivdi3 457; RV64I-NEXT: mv s4, a0 458; RV64I-NEXT: li a1, 95 459; RV64I-NEXT: mv a0, s3 460; RV64I-NEXT: call __udivdi3 461; RV64I-NEXT: mv s3, a0 462; RV64I-NEXT: li a1, 95 463; RV64I-NEXT: mv a0, s2 464; RV64I-NEXT: call __udivdi3 465; RV64I-NEXT: mv s2, a0 466; RV64I-NEXT: li a1, 95 467; RV64I-NEXT: mv a0, s1 468; RV64I-NEXT: call __udivdi3 469; RV64I-NEXT: add a0, s8, a0 470; RV64I-NEXT: add s2, s7, s2 471; RV64I-NEXT: add s3, s6, s3 472; RV64I-NEXT: add s4, s5, s4 473; RV64I-NEXT: sh a0, 0(s0) 474; RV64I-NEXT: sh s2, 2(s0) 475; RV64I-NEXT: sh s3, 4(s0) 476; RV64I-NEXT: sh s4, 6(s0) 477; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 478; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 479; RV64I-NEXT: ld s1, 56(sp) # 8-byte Folded Reload 480; RV64I-NEXT: ld s2, 48(sp) # 8-byte Folded Reload 481; RV64I-NEXT: ld s3, 40(sp) # 8-byte Folded Reload 482; RV64I-NEXT: ld s4, 32(sp) # 8-byte Folded Reload 483; RV64I-NEXT: ld s5, 24(sp) # 8-byte Folded Reload 484; RV64I-NEXT: ld s6, 16(sp) # 8-byte Folded Reload 485; RV64I-NEXT: ld s7, 8(sp) # 8-byte Folded Reload 486; RV64I-NEXT: ld s8, 0(sp) # 8-byte Folded Reload 487; RV64I-NEXT: addi sp, sp, 80 488; RV64I-NEXT: ret 489; 490; RV64IM-LABEL: combine_urem_udiv: 491; RV64IM: # %bb.0: 492; RV64IM-NEXT: lhu a2, 16(a1) 493; RV64IM-NEXT: lhu a3, 24(a1) 494; RV64IM-NEXT: lui a4, %hi(.LCPI2_0) 495; RV64IM-NEXT: ld a4, %lo(.LCPI2_0)(a4) 496; RV64IM-NEXT: lhu a5, 0(a1) 497; RV64IM-NEXT: lhu a1, 8(a1) 498; RV64IM-NEXT: li a6, 95 499; RV64IM-NEXT: mulhu a7, a3, a4 500; RV64IM-NEXT: mulhu t0, a2, a4 501; RV64IM-NEXT: mulhu t1, a1, a4 502; RV64IM-NEXT: mulhu a4, a5, a4 503; RV64IM-NEXT: mul t2, a7, a6 504; RV64IM-NEXT: mul t3, t0, a6 505; RV64IM-NEXT: mul t4, t1, a6 506; RV64IM-NEXT: mul a6, a4, a6 507; RV64IM-NEXT: add a4, a5, a4 508; RV64IM-NEXT: add a1, a1, t1 509; RV64IM-NEXT: add a2, a2, t0 510; RV64IM-NEXT: add a3, a3, a7 511; RV64IM-NEXT: subw a4, a4, a6 512; RV64IM-NEXT: subw a1, a1, t4 513; RV64IM-NEXT: subw a2, a2, t3 514; RV64IM-NEXT: subw a3, a3, t2 515; RV64IM-NEXT: sh a4, 0(a0) 516; RV64IM-NEXT: sh a1, 2(a0) 517; RV64IM-NEXT: sh a2, 4(a0) 518; RV64IM-NEXT: sh a3, 6(a0) 519; RV64IM-NEXT: ret 520 %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 521 %2 = udiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95> 522 %3 = add <4 x i16> %1, %2 523 ret <4 x i16> %3 524} 525 526; Don't fold for divisors that are a power of two. 527define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { 528; RV32I-LABEL: dont_fold_urem_power_of_two: 529; RV32I: # %bb.0: 530; RV32I-NEXT: addi sp, sp, -32 531; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 532; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 533; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 534; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 535; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 536; RV32I-NEXT: lhu s1, 0(a1) 537; RV32I-NEXT: lhu s2, 4(a1) 538; RV32I-NEXT: lhu s3, 8(a1) 539; RV32I-NEXT: lhu a2, 12(a1) 540; RV32I-NEXT: mv s0, a0 541; RV32I-NEXT: li a1, 95 542; RV32I-NEXT: mv a0, a2 543; RV32I-NEXT: call __umodsi3 544; RV32I-NEXT: andi a1, s1, 63 545; RV32I-NEXT: andi a2, s2, 31 546; RV32I-NEXT: andi a3, s3, 7 547; RV32I-NEXT: sh a1, 0(s0) 548; RV32I-NEXT: sh a2, 2(s0) 549; RV32I-NEXT: sh a3, 4(s0) 550; RV32I-NEXT: sh a0, 6(s0) 551; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 552; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 553; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 554; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 555; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 556; RV32I-NEXT: addi sp, sp, 32 557; RV32I-NEXT: ret 558; 559; RV32IM-LABEL: dont_fold_urem_power_of_two: 560; RV32IM: # %bb.0: 561; RV32IM-NEXT: lhu a2, 0(a1) 562; RV32IM-NEXT: lhu a3, 4(a1) 563; RV32IM-NEXT: lhu a4, 8(a1) 564; RV32IM-NEXT: lhu a1, 12(a1) 565; RV32IM-NEXT: lui a5, 11038 566; RV32IM-NEXT: li a6, 95 567; RV32IM-NEXT: addi a5, a5, -1465 568; RV32IM-NEXT: mulhu a5, a1, a5 569; RV32IM-NEXT: andi a2, a2, 63 570; RV32IM-NEXT: andi a3, a3, 31 571; RV32IM-NEXT: andi a4, a4, 7 572; RV32IM-NEXT: mul a5, a5, a6 573; RV32IM-NEXT: sub a1, a1, a5 574; RV32IM-NEXT: sh a2, 0(a0) 575; RV32IM-NEXT: sh a3, 2(a0) 576; RV32IM-NEXT: sh a4, 4(a0) 577; RV32IM-NEXT: sh a1, 6(a0) 578; RV32IM-NEXT: ret 579; 580; RV64I-LABEL: dont_fold_urem_power_of_two: 581; RV64I: # %bb.0: 582; RV64I-NEXT: addi sp, sp, -48 583; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 584; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 585; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 586; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 587; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 588; RV64I-NEXT: lhu s1, 0(a1) 589; RV64I-NEXT: lhu s2, 8(a1) 590; RV64I-NEXT: lhu s3, 16(a1) 591; RV64I-NEXT: lhu a2, 24(a1) 592; RV64I-NEXT: mv s0, a0 593; RV64I-NEXT: li a1, 95 594; RV64I-NEXT: mv a0, a2 595; RV64I-NEXT: call __umoddi3 596; RV64I-NEXT: andi a1, s1, 63 597; RV64I-NEXT: andi a2, s2, 31 598; RV64I-NEXT: andi a3, s3, 7 599; RV64I-NEXT: sh a1, 0(s0) 600; RV64I-NEXT: sh a2, 2(s0) 601; RV64I-NEXT: sh a3, 4(s0) 602; RV64I-NEXT: sh a0, 6(s0) 603; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 604; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 605; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 606; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 607; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 608; RV64I-NEXT: addi sp, sp, 48 609; RV64I-NEXT: ret 610; 611; RV64IM-LABEL: dont_fold_urem_power_of_two: 612; RV64IM: # %bb.0: 613; RV64IM-NEXT: lhu a2, 0(a1) 614; RV64IM-NEXT: lhu a3, 8(a1) 615; RV64IM-NEXT: lhu a4, 16(a1) 616; RV64IM-NEXT: lhu a1, 24(a1) 617; RV64IM-NEXT: lui a5, %hi(.LCPI3_0) 618; RV64IM-NEXT: li a6, 95 619; RV64IM-NEXT: ld a5, %lo(.LCPI3_0)(a5) 620; RV64IM-NEXT: andi a2, a2, 63 621; RV64IM-NEXT: andi a3, a3, 31 622; RV64IM-NEXT: andi a4, a4, 7 623; RV64IM-NEXT: mulhu a5, a1, a5 624; RV64IM-NEXT: mul a5, a5, a6 625; RV64IM-NEXT: subw a1, a1, a5 626; RV64IM-NEXT: sh a2, 0(a0) 627; RV64IM-NEXT: sh a3, 2(a0) 628; RV64IM-NEXT: sh a4, 4(a0) 629; RV64IM-NEXT: sh a1, 6(a0) 630; RV64IM-NEXT: ret 631 %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> 632 ret <4 x i16> %1 633} 634 635; Don't fold if the divisor is one. 636define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { 637; RV32I-LABEL: dont_fold_urem_one: 638; RV32I: # %bb.0: 639; RV32I-NEXT: addi sp, sp, -32 640; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 641; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 642; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 643; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 644; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 645; RV32I-NEXT: lhu a2, 4(a1) 646; RV32I-NEXT: lhu s0, 8(a1) 647; RV32I-NEXT: lhu s1, 12(a1) 648; RV32I-NEXT: mv s2, a0 649; RV32I-NEXT: li a1, 654 650; RV32I-NEXT: mv a0, a2 651; RV32I-NEXT: call __umodsi3 652; RV32I-NEXT: mv s3, a0 653; RV32I-NEXT: li a1, 23 654; RV32I-NEXT: mv a0, s0 655; RV32I-NEXT: call __umodsi3 656; RV32I-NEXT: mv s0, a0 657; RV32I-NEXT: lui a0, 1 658; RV32I-NEXT: addi a1, a0, 1327 659; RV32I-NEXT: mv a0, s1 660; RV32I-NEXT: call __umodsi3 661; RV32I-NEXT: sh zero, 0(s2) 662; RV32I-NEXT: sh s3, 2(s2) 663; RV32I-NEXT: sh s0, 4(s2) 664; RV32I-NEXT: sh a0, 6(s2) 665; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 666; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 667; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 668; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 669; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 670; RV32I-NEXT: addi sp, sp, 32 671; RV32I-NEXT: ret 672; 673; RV32IM-LABEL: dont_fold_urem_one: 674; RV32IM: # %bb.0: 675; RV32IM-NEXT: lhu a2, 4(a1) 676; RV32IM-NEXT: lhu a3, 8(a1) 677; RV32IM-NEXT: lhu a1, 12(a1) 678; RV32IM-NEXT: lui a4, 1603 679; RV32IM-NEXT: li a5, 654 680; RV32IM-NEXT: lui a6, 45590 681; RV32IM-NEXT: li a7, 23 682; RV32IM-NEXT: addi a4, a4, 1341 683; RV32IM-NEXT: mulhu a4, a2, a4 684; RV32IM-NEXT: mul a4, a4, a5 685; RV32IM-NEXT: lui a5, 193 686; RV32IM-NEXT: addi a6, a6, 1069 687; RV32IM-NEXT: mulhu a6, a3, a6 688; RV32IM-NEXT: mul a6, a6, a7 689; RV32IM-NEXT: lui a7, 1 690; RV32IM-NEXT: addi a5, a5, 1464 691; RV32IM-NEXT: addi a7, a7, 1327 692; RV32IM-NEXT: mulhu a5, a1, a5 693; RV32IM-NEXT: mul a5, a5, a7 694; RV32IM-NEXT: sub a2, a2, a4 695; RV32IM-NEXT: sub a3, a3, a6 696; RV32IM-NEXT: sub a1, a1, a5 697; RV32IM-NEXT: sh zero, 0(a0) 698; RV32IM-NEXT: sh a2, 2(a0) 699; RV32IM-NEXT: sh a3, 4(a0) 700; RV32IM-NEXT: sh a1, 6(a0) 701; RV32IM-NEXT: ret 702; 703; RV64I-LABEL: dont_fold_urem_one: 704; RV64I: # %bb.0: 705; RV64I-NEXT: addi sp, sp, -48 706; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 707; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 708; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 709; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 710; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 711; RV64I-NEXT: lhu a2, 8(a1) 712; RV64I-NEXT: lhu s0, 16(a1) 713; RV64I-NEXT: lhu s1, 24(a1) 714; RV64I-NEXT: mv s2, a0 715; RV64I-NEXT: li a1, 654 716; RV64I-NEXT: mv a0, a2 717; RV64I-NEXT: call __umoddi3 718; RV64I-NEXT: mv s3, a0 719; RV64I-NEXT: li a1, 23 720; RV64I-NEXT: mv a0, s0 721; RV64I-NEXT: call __umoddi3 722; RV64I-NEXT: mv s0, a0 723; RV64I-NEXT: lui a0, 1 724; RV64I-NEXT: addiw a1, a0, 1327 725; RV64I-NEXT: mv a0, s1 726; RV64I-NEXT: call __umoddi3 727; RV64I-NEXT: sh zero, 0(s2) 728; RV64I-NEXT: sh s3, 2(s2) 729; RV64I-NEXT: sh s0, 4(s2) 730; RV64I-NEXT: sh a0, 6(s2) 731; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 732; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 733; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 734; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 735; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 736; RV64I-NEXT: addi sp, sp, 48 737; RV64I-NEXT: ret 738; 739; RV64IM-LABEL: dont_fold_urem_one: 740; RV64IM: # %bb.0: 741; RV64IM-NEXT: lhu a2, 8(a1) 742; RV64IM-NEXT: lhu a3, 16(a1) 743; RV64IM-NEXT: lhu a1, 24(a1) 744; RV64IM-NEXT: lui a4, %hi(.LCPI4_0) 745; RV64IM-NEXT: li a5, 654 746; RV64IM-NEXT: ld a4, %lo(.LCPI4_0)(a4) 747; RV64IM-NEXT: lui a6, %hi(.LCPI4_1) 748; RV64IM-NEXT: li a7, 23 749; RV64IM-NEXT: ld a6, %lo(.LCPI4_1)(a6) 750; RV64IM-NEXT: mulhu a4, a2, a4 751; RV64IM-NEXT: mul a4, a4, a5 752; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) 753; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) 754; RV64IM-NEXT: mulhu a6, a3, a6 755; RV64IM-NEXT: mul a6, a6, a7 756; RV64IM-NEXT: lui a7, 1 757; RV64IM-NEXT: addi a7, a7, 1327 758; RV64IM-NEXT: mulhu a5, a1, a5 759; RV64IM-NEXT: mul a5, a5, a7 760; RV64IM-NEXT: subw a2, a2, a4 761; RV64IM-NEXT: subw a3, a3, a6 762; RV64IM-NEXT: subw a1, a1, a5 763; RV64IM-NEXT: sh zero, 0(a0) 764; RV64IM-NEXT: sh a2, 2(a0) 765; RV64IM-NEXT: sh a3, 4(a0) 766; RV64IM-NEXT: sh a1, 6(a0) 767; RV64IM-NEXT: ret 768 %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423> 769 ret <4 x i16> %1 770} 771 772; Don't fold if the divisor is 2^16. 773define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { 774; CHECK-LABEL: dont_fold_urem_i16_smax: 775; CHECK: # %bb.0: 776; CHECK-NEXT: ret 777 %1 = urem <4 x i16> %x, <i16 1, i16 65536, i16 23, i16 5423> 778 ret <4 x i16> %1 779} 780 781; Don't fold i64 urem. 782define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { 783; RV32I-LABEL: dont_fold_urem_i64: 784; RV32I: # %bb.0: 785; RV32I-NEXT: addi sp, sp, -48 786; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill 787; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill 788; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill 789; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill 790; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill 791; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill 792; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill 793; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill 794; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill 795; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill 796; RV32I-NEXT: lw s1, 16(a1) 797; RV32I-NEXT: lw s2, 20(a1) 798; RV32I-NEXT: lw s3, 24(a1) 799; RV32I-NEXT: lw s4, 28(a1) 800; RV32I-NEXT: lw a3, 0(a1) 801; RV32I-NEXT: lw a4, 4(a1) 802; RV32I-NEXT: lw s5, 8(a1) 803; RV32I-NEXT: lw s6, 12(a1) 804; RV32I-NEXT: mv s0, a0 805; RV32I-NEXT: li a2, 1 806; RV32I-NEXT: mv a0, a3 807; RV32I-NEXT: mv a1, a4 808; RV32I-NEXT: li a3, 0 809; RV32I-NEXT: call __umoddi3 810; RV32I-NEXT: mv s7, a0 811; RV32I-NEXT: mv s8, a1 812; RV32I-NEXT: li a2, 654 813; RV32I-NEXT: mv a0, s5 814; RV32I-NEXT: mv a1, s6 815; RV32I-NEXT: li a3, 0 816; RV32I-NEXT: call __umoddi3 817; RV32I-NEXT: mv s5, a0 818; RV32I-NEXT: mv s6, a1 819; RV32I-NEXT: li a2, 23 820; RV32I-NEXT: mv a0, s1 821; RV32I-NEXT: mv a1, s2 822; RV32I-NEXT: li a3, 0 823; RV32I-NEXT: call __umoddi3 824; RV32I-NEXT: mv s1, a0 825; RV32I-NEXT: mv s2, a1 826; RV32I-NEXT: lui a0, 1 827; RV32I-NEXT: addi a2, a0, 1327 828; RV32I-NEXT: mv a0, s3 829; RV32I-NEXT: mv a1, s4 830; RV32I-NEXT: li a3, 0 831; RV32I-NEXT: call __umoddi3 832; RV32I-NEXT: sw s1, 16(s0) 833; RV32I-NEXT: sw s2, 20(s0) 834; RV32I-NEXT: sw a0, 24(s0) 835; RV32I-NEXT: sw a1, 28(s0) 836; RV32I-NEXT: sw s7, 0(s0) 837; RV32I-NEXT: sw s8, 4(s0) 838; RV32I-NEXT: sw s5, 8(s0) 839; RV32I-NEXT: sw s6, 12(s0) 840; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload 841; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload 842; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload 843; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload 844; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload 845; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload 846; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload 847; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload 848; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload 849; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload 850; RV32I-NEXT: addi sp, sp, 48 851; RV32I-NEXT: ret 852; 853; RV32IM-LABEL: dont_fold_urem_i64: 854; RV32IM: # %bb.0: 855; RV32IM-NEXT: addi sp, sp, -48 856; RV32IM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill 857; RV32IM-NEXT: sw s0, 40(sp) # 4-byte Folded Spill 858; RV32IM-NEXT: sw s1, 36(sp) # 4-byte Folded Spill 859; RV32IM-NEXT: sw s2, 32(sp) # 4-byte Folded Spill 860; RV32IM-NEXT: sw s3, 28(sp) # 4-byte Folded Spill 861; RV32IM-NEXT: sw s4, 24(sp) # 4-byte Folded Spill 862; RV32IM-NEXT: sw s5, 20(sp) # 4-byte Folded Spill 863; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill 864; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill 865; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill 866; RV32IM-NEXT: lw s1, 16(a1) 867; RV32IM-NEXT: lw s2, 20(a1) 868; RV32IM-NEXT: lw s3, 24(a1) 869; RV32IM-NEXT: lw s4, 28(a1) 870; RV32IM-NEXT: lw a3, 0(a1) 871; RV32IM-NEXT: lw a4, 4(a1) 872; RV32IM-NEXT: lw s5, 8(a1) 873; RV32IM-NEXT: lw s6, 12(a1) 874; RV32IM-NEXT: mv s0, a0 875; RV32IM-NEXT: li a2, 1 876; RV32IM-NEXT: mv a0, a3 877; RV32IM-NEXT: mv a1, a4 878; RV32IM-NEXT: li a3, 0 879; RV32IM-NEXT: call __umoddi3 880; RV32IM-NEXT: mv s7, a0 881; RV32IM-NEXT: mv s8, a1 882; RV32IM-NEXT: li a2, 654 883; RV32IM-NEXT: mv a0, s5 884; RV32IM-NEXT: mv a1, s6 885; RV32IM-NEXT: li a3, 0 886; RV32IM-NEXT: call __umoddi3 887; RV32IM-NEXT: mv s5, a0 888; RV32IM-NEXT: mv s6, a1 889; RV32IM-NEXT: li a2, 23 890; RV32IM-NEXT: mv a0, s1 891; RV32IM-NEXT: mv a1, s2 892; RV32IM-NEXT: li a3, 0 893; RV32IM-NEXT: call __umoddi3 894; RV32IM-NEXT: mv s1, a0 895; RV32IM-NEXT: mv s2, a1 896; RV32IM-NEXT: lui a0, 1 897; RV32IM-NEXT: addi a2, a0, 1327 898; RV32IM-NEXT: mv a0, s3 899; RV32IM-NEXT: mv a1, s4 900; RV32IM-NEXT: li a3, 0 901; RV32IM-NEXT: call __umoddi3 902; RV32IM-NEXT: sw s1, 16(s0) 903; RV32IM-NEXT: sw s2, 20(s0) 904; RV32IM-NEXT: sw a0, 24(s0) 905; RV32IM-NEXT: sw a1, 28(s0) 906; RV32IM-NEXT: sw s7, 0(s0) 907; RV32IM-NEXT: sw s8, 4(s0) 908; RV32IM-NEXT: sw s5, 8(s0) 909; RV32IM-NEXT: sw s6, 12(s0) 910; RV32IM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload 911; RV32IM-NEXT: lw s0, 40(sp) # 4-byte Folded Reload 912; RV32IM-NEXT: lw s1, 36(sp) # 4-byte Folded Reload 913; RV32IM-NEXT: lw s2, 32(sp) # 4-byte Folded Reload 914; RV32IM-NEXT: lw s3, 28(sp) # 4-byte Folded Reload 915; RV32IM-NEXT: lw s4, 24(sp) # 4-byte Folded Reload 916; RV32IM-NEXT: lw s5, 20(sp) # 4-byte Folded Reload 917; RV32IM-NEXT: lw s6, 16(sp) # 4-byte Folded Reload 918; RV32IM-NEXT: lw s7, 12(sp) # 4-byte Folded Reload 919; RV32IM-NEXT: lw s8, 8(sp) # 4-byte Folded Reload 920; RV32IM-NEXT: addi sp, sp, 48 921; RV32IM-NEXT: ret 922; 923; RV64I-LABEL: dont_fold_urem_i64: 924; RV64I: # %bb.0: 925; RV64I-NEXT: addi sp, sp, -48 926; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 927; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 928; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 929; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 930; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 931; RV64I-NEXT: ld a2, 8(a1) 932; RV64I-NEXT: ld s0, 16(a1) 933; RV64I-NEXT: ld s1, 24(a1) 934; RV64I-NEXT: mv s2, a0 935; RV64I-NEXT: li a1, 654 936; RV64I-NEXT: mv a0, a2 937; RV64I-NEXT: call __umoddi3 938; RV64I-NEXT: mv s3, a0 939; RV64I-NEXT: li a1, 23 940; RV64I-NEXT: mv a0, s0 941; RV64I-NEXT: call __umoddi3 942; RV64I-NEXT: mv s0, a0 943; RV64I-NEXT: lui a0, 1 944; RV64I-NEXT: addiw a1, a0, 1327 945; RV64I-NEXT: mv a0, s1 946; RV64I-NEXT: call __umoddi3 947; RV64I-NEXT: sd zero, 0(s2) 948; RV64I-NEXT: sd s3, 8(s2) 949; RV64I-NEXT: sd s0, 16(s2) 950; RV64I-NEXT: sd a0, 24(s2) 951; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 952; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 953; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 954; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 955; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 956; RV64I-NEXT: addi sp, sp, 48 957; RV64I-NEXT: ret 958; 959; RV64IM-LABEL: dont_fold_urem_i64: 960; RV64IM: # %bb.0: 961; RV64IM-NEXT: ld a2, 8(a1) 962; RV64IM-NEXT: ld a3, 16(a1) 963; RV64IM-NEXT: ld a1, 24(a1) 964; RV64IM-NEXT: lui a4, %hi(.LCPI6_1) 965; RV64IM-NEXT: ld a4, %lo(.LCPI6_1)(a4) 966; RV64IM-NEXT: lui a5, %hi(.LCPI6_0) 967; RV64IM-NEXT: li a6, 654 968; RV64IM-NEXT: srli a7, a2, 1 969; RV64IM-NEXT: mulhu a4, a7, a4 970; RV64IM-NEXT: lui a7, %hi(.LCPI6_2) 971; RV64IM-NEXT: ld a5, %lo(.LCPI6_0)(a5) 972; RV64IM-NEXT: ld a7, %lo(.LCPI6_2)(a7) 973; RV64IM-NEXT: srli a4, a4, 7 974; RV64IM-NEXT: mul a4, a4, a6 975; RV64IM-NEXT: lui a6, 1 976; RV64IM-NEXT: addiw a6, a6, 1327 977; RV64IM-NEXT: mulhu a5, a3, a5 978; RV64IM-NEXT: mulhu a7, a1, a7 979; RV64IM-NEXT: srli a7, a7, 12 980; RV64IM-NEXT: mul a6, a7, a6 981; RV64IM-NEXT: sub a7, a3, a5 982; RV64IM-NEXT: srli a7, a7, 1 983; RV64IM-NEXT: add a5, a7, a5 984; RV64IM-NEXT: sub a2, a2, a4 985; RV64IM-NEXT: sub a1, a1, a6 986; RV64IM-NEXT: li a4, 23 987; RV64IM-NEXT: srli a5, a5, 4 988; RV64IM-NEXT: mul a4, a5, a4 989; RV64IM-NEXT: sub a3, a3, a4 990; RV64IM-NEXT: sd zero, 0(a0) 991; RV64IM-NEXT: sd a2, 8(a0) 992; RV64IM-NEXT: sd a3, 16(a0) 993; RV64IM-NEXT: sd a1, 24(a0) 994; RV64IM-NEXT: ret 995 %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423> 996 ret <4 x i64> %1 997} 998