1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefixes=RV64 6 7;Copy tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll 8;to test shouldFormOverflowOp on RISCV 9 10define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp { 11; RV32-LABEL: uaddo1_overflow_used: 12; RV32: # %bb.0: 13; RV32-NEXT: add a5, a3, a1 14; RV32-NEXT: add a4, a2, a0 15; RV32-NEXT: sltu a6, a4, a2 16; RV32-NEXT: add a5, a5, a6 17; RV32-NEXT: beq a5, a1, .LBB0_2 18; RV32-NEXT: # %bb.1: 19; RV32-NEXT: sltu a0, a5, a1 20; RV32-NEXT: beqz a0, .LBB0_3 21; RV32-NEXT: j .LBB0_4 22; RV32-NEXT: .LBB0_2: 23; RV32-NEXT: sltu a0, a4, a0 24; RV32-NEXT: bnez a0, .LBB0_4 25; RV32-NEXT: .LBB0_3: 26; RV32-NEXT: li a2, 42 27; RV32-NEXT: .LBB0_4: 28; RV32-NEXT: neg a1, a0 29; RV32-NEXT: and a1, a1, a3 30; RV32-NEXT: mv a0, a2 31; RV32-NEXT: ret 32; 33; RV64-LABEL: uaddo1_overflow_used: 34; RV64: # %bb.0: 35; RV64-NEXT: add a2, a1, a0 36; RV64-NEXT: bltu a2, a0, .LBB0_2 37; RV64-NEXT: # %bb.1: 38; RV64-NEXT: li a1, 42 39; RV64-NEXT: .LBB0_2: 40; RV64-NEXT: mv a0, a1 41; RV64-NEXT: ret 42 %add = add i64 %b, %a 43 %cmp = icmp ult i64 %add, %a 44 %Q = select i1 %cmp, i64 %b, i64 42 45 ret i64 %Q 46} 47 48define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 49; RV32-LABEL: uaddo1_math_overflow_used: 50; RV32: # %bb.0: 51; RV32-NEXT: add a5, a3, a1 52; RV32-NEXT: add a0, a2, a0 53; RV32-NEXT: sltu a1, a0, a2 54; RV32-NEXT: add a5, a5, a1 55; RV32-NEXT: beq a5, a3, .LBB1_2 56; RV32-NEXT: # %bb.1: 57; RV32-NEXT: sltu a1, a5, a3 58; RV32-NEXT: .LBB1_2: 59; RV32-NEXT: bnez a1, .LBB1_4 60; RV32-NEXT: # %bb.3: 61; RV32-NEXT: li a2, 42 62; RV32-NEXT: .LBB1_4: 63; RV32-NEXT: neg a1, a1 64; RV32-NEXT: and a1, a1, a3 65; RV32-NEXT: sw a0, 0(a4) 66; RV32-NEXT: sw a5, 4(a4) 67; RV32-NEXT: mv a0, a2 68; RV32-NEXT: ret 69; 70; RV64-LABEL: uaddo1_math_overflow_used: 71; RV64: # %bb.0: 72; RV64-NEXT: add a0, a1, a0 73; RV64-NEXT: bltu a0, a1, .LBB1_2 74; RV64-NEXT: # %bb.1: 75; RV64-NEXT: li a1, 42 76; RV64-NEXT: .LBB1_2: 77; RV64-NEXT: sd a0, 0(a2) 78; RV64-NEXT: mv a0, a1 79; RV64-NEXT: ret 80 %add = add i64 %b, %a 81 %cmp = icmp ult i64 %add, %a 82 %Q = select i1 %cmp, i64 %b, i64 42 83 store i64 %add, ptr %res 84 ret i64 %Q 85} 86 87define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp { 88; RV32-LABEL: uaddo2_overflow_used: 89; RV32: # %bb.0: 90; RV32-NEXT: add a1, a3, a1 91; RV32-NEXT: add a0, a2, a0 92; RV32-NEXT: sltu a0, a0, a2 93; RV32-NEXT: add a1, a1, a0 94; RV32-NEXT: beq a1, a3, .LBB2_2 95; RV32-NEXT: # %bb.1: 96; RV32-NEXT: sltu a0, a1, a3 97; RV32-NEXT: .LBB2_2: 98; RV32-NEXT: bnez a0, .LBB2_4 99; RV32-NEXT: # %bb.3: 100; RV32-NEXT: li a2, 42 101; RV32-NEXT: .LBB2_4: 102; RV32-NEXT: neg a1, a0 103; RV32-NEXT: and a1, a1, a3 104; RV32-NEXT: mv a0, a2 105; RV32-NEXT: ret 106; 107; RV64-LABEL: uaddo2_overflow_used: 108; RV64: # %bb.0: 109; RV64-NEXT: add a0, a1, a0 110; RV64-NEXT: bltu a0, a1, .LBB2_2 111; RV64-NEXT: # %bb.1: 112; RV64-NEXT: li a1, 42 113; RV64-NEXT: .LBB2_2: 114; RV64-NEXT: mv a0, a1 115; RV64-NEXT: ret 116 %add = add i64 %b, %a 117 %cmp = icmp ult i64 %add, %b 118 %Q = select i1 %cmp, i64 %b, i64 42 119 ret i64 %Q 120} 121 122define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 123; RV32-LABEL: uaddo2_math_overflow_used: 124; RV32: # %bb.0: 125; RV32-NEXT: add a5, a3, a1 126; RV32-NEXT: add a0, a2, a0 127; RV32-NEXT: sltu a1, a0, a2 128; RV32-NEXT: add a5, a5, a1 129; RV32-NEXT: beq a5, a3, .LBB3_2 130; RV32-NEXT: # %bb.1: 131; RV32-NEXT: sltu a1, a5, a3 132; RV32-NEXT: .LBB3_2: 133; RV32-NEXT: bnez a1, .LBB3_4 134; RV32-NEXT: # %bb.3: 135; RV32-NEXT: li a2, 42 136; RV32-NEXT: .LBB3_4: 137; RV32-NEXT: neg a1, a1 138; RV32-NEXT: and a1, a1, a3 139; RV32-NEXT: sw a0, 0(a4) 140; RV32-NEXT: sw a5, 4(a4) 141; RV32-NEXT: mv a0, a2 142; RV32-NEXT: ret 143; 144; RV64-LABEL: uaddo2_math_overflow_used: 145; RV64: # %bb.0: 146; RV64-NEXT: add a0, a1, a0 147; RV64-NEXT: bltu a0, a1, .LBB3_2 148; RV64-NEXT: # %bb.1: 149; RV64-NEXT: li a1, 42 150; RV64-NEXT: .LBB3_2: 151; RV64-NEXT: sd a0, 0(a2) 152; RV64-NEXT: mv a0, a1 153; RV64-NEXT: ret 154 %add = add i64 %b, %a 155 %cmp = icmp ult i64 %add, %b 156 %Q = select i1 %cmp, i64 %b, i64 42 157 store i64 %add, ptr %res 158 ret i64 %Q 159} 160 161define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp { 162; RV32-LABEL: uaddo3_overflow_used: 163; RV32: # %bb.0: 164; RV32-NEXT: add a1, a3, a1 165; RV32-NEXT: add a0, a2, a0 166; RV32-NEXT: sltu a0, a0, a2 167; RV32-NEXT: add a1, a1, a0 168; RV32-NEXT: beq a3, a1, .LBB4_2 169; RV32-NEXT: # %bb.1: 170; RV32-NEXT: sltu a0, a1, a3 171; RV32-NEXT: .LBB4_2: 172; RV32-NEXT: bnez a0, .LBB4_4 173; RV32-NEXT: # %bb.3: 174; RV32-NEXT: li a2, 42 175; RV32-NEXT: .LBB4_4: 176; RV32-NEXT: neg a1, a0 177; RV32-NEXT: and a1, a1, a3 178; RV32-NEXT: mv a0, a2 179; RV32-NEXT: ret 180; 181; RV64-LABEL: uaddo3_overflow_used: 182; RV64: # %bb.0: 183; RV64-NEXT: add a0, a1, a0 184; RV64-NEXT: bltu a0, a1, .LBB4_2 185; RV64-NEXT: # %bb.1: 186; RV64-NEXT: li a1, 42 187; RV64-NEXT: .LBB4_2: 188; RV64-NEXT: mv a0, a1 189; RV64-NEXT: ret 190 %add = add i64 %b, %a 191 %cmp = icmp ugt i64 %b, %add 192 %Q = select i1 %cmp, i64 %b, i64 42 193 ret i64 %Q 194} 195 196define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 197; RV32-LABEL: uaddo3_math_overflow_used: 198; RV32: # %bb.0: 199; RV32-NEXT: add a5, a3, a1 200; RV32-NEXT: add a0, a2, a0 201; RV32-NEXT: sltu a1, a0, a2 202; RV32-NEXT: add a5, a5, a1 203; RV32-NEXT: beq a5, a3, .LBB5_2 204; RV32-NEXT: # %bb.1: 205; RV32-NEXT: sltu a1, a5, a3 206; RV32-NEXT: .LBB5_2: 207; RV32-NEXT: bnez a1, .LBB5_4 208; RV32-NEXT: # %bb.3: 209; RV32-NEXT: li a2, 42 210; RV32-NEXT: .LBB5_4: 211; RV32-NEXT: neg a1, a1 212; RV32-NEXT: and a1, a1, a3 213; RV32-NEXT: sw a0, 0(a4) 214; RV32-NEXT: sw a5, 4(a4) 215; RV32-NEXT: mv a0, a2 216; RV32-NEXT: ret 217; 218; RV64-LABEL: uaddo3_math_overflow_used: 219; RV64: # %bb.0: 220; RV64-NEXT: add a0, a1, a0 221; RV64-NEXT: bltu a0, a1, .LBB5_2 222; RV64-NEXT: # %bb.1: 223; RV64-NEXT: li a1, 42 224; RV64-NEXT: .LBB5_2: 225; RV64-NEXT: sd a0, 0(a2) 226; RV64-NEXT: mv a0, a1 227; RV64-NEXT: ret 228 %add = add i64 %b, %a 229 %cmp = icmp ugt i64 %b, %add 230 %Q = select i1 %cmp, i64 %b, i64 42 231 store i64 %add, ptr %res 232 ret i64 %Q 233} 234 235; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic. 236 237define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp { 238; RV32-LABEL: uaddo4: 239; RV32: # %bb.0: # %entry 240; RV32-NEXT: andi a4, a4, 1 241; RV32-NEXT: beqz a4, .LBB6_6 242; RV32-NEXT: # %bb.1: # %next 243; RV32-NEXT: add a1, a3, a1 244; RV32-NEXT: add a0, a2, a0 245; RV32-NEXT: sltu a0, a0, a2 246; RV32-NEXT: add a1, a1, a0 247; RV32-NEXT: beq a3, a1, .LBB6_3 248; RV32-NEXT: # %bb.2: # %next 249; RV32-NEXT: sltu a0, a1, a3 250; RV32-NEXT: .LBB6_3: # %next 251; RV32-NEXT: bnez a0, .LBB6_5 252; RV32-NEXT: # %bb.4: # %next 253; RV32-NEXT: li a2, 42 254; RV32-NEXT: .LBB6_5: # %next 255; RV32-NEXT: neg a1, a0 256; RV32-NEXT: and a1, a1, a3 257; RV32-NEXT: mv a0, a2 258; RV32-NEXT: ret 259; RV32-NEXT: .LBB6_6: # %exit 260; RV32-NEXT: li a0, 0 261; RV32-NEXT: li a1, 0 262; RV32-NEXT: ret 263; 264; RV64-LABEL: uaddo4: 265; RV64: # %bb.0: # %entry 266; RV64-NEXT: andi a2, a2, 1 267; RV64-NEXT: beqz a2, .LBB6_4 268; RV64-NEXT: # %bb.1: # %next 269; RV64-NEXT: add a0, a1, a0 270; RV64-NEXT: bltu a0, a1, .LBB6_3 271; RV64-NEXT: # %bb.2: # %next 272; RV64-NEXT: li a1, 42 273; RV64-NEXT: .LBB6_3: # %next 274; RV64-NEXT: mv a0, a1 275; RV64-NEXT: ret 276; RV64-NEXT: .LBB6_4: # %exit 277; RV64-NEXT: li a0, 0 278; RV64-NEXT: ret 279entry: 280 %add = add i64 %b, %a 281 %cmp = icmp ugt i64 %b, %add 282 br i1 %c, label %next, label %exit 283 284next: 285 %Q = select i1 %cmp, i64 %b, i64 42 286 ret i64 %Q 287 288exit: 289 ret i64 0 290} 291 292define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp { 293; RV32-LABEL: uaddo5: 294; RV32: # %bb.0: # %entry 295; RV32-NEXT: andi a5, a5, 1 296; RV32-NEXT: add a1, a3, a1 297; RV32-NEXT: add a6, a2, a0 298; RV32-NEXT: sltu a0, a6, a2 299; RV32-NEXT: add a1, a1, a0 300; RV32-NEXT: sw a6, 0(a4) 301; RV32-NEXT: sw a1, 4(a4) 302; RV32-NEXT: beqz a5, .LBB7_6 303; RV32-NEXT: # %bb.1: # %next 304; RV32-NEXT: beq a3, a1, .LBB7_3 305; RV32-NEXT: # %bb.2: # %next 306; RV32-NEXT: sltu a0, a1, a3 307; RV32-NEXT: .LBB7_3: # %next 308; RV32-NEXT: bnez a0, .LBB7_5 309; RV32-NEXT: # %bb.4: # %next 310; RV32-NEXT: li a2, 42 311; RV32-NEXT: .LBB7_5: # %next 312; RV32-NEXT: neg a1, a0 313; RV32-NEXT: and a1, a1, a3 314; RV32-NEXT: mv a0, a2 315; RV32-NEXT: ret 316; RV32-NEXT: .LBB7_6: # %exit 317; RV32-NEXT: li a0, 0 318; RV32-NEXT: li a1, 0 319; RV32-NEXT: ret 320; 321; RV64-LABEL: uaddo5: 322; RV64: # %bb.0: # %entry 323; RV64-NEXT: andi a3, a3, 1 324; RV64-NEXT: add a0, a1, a0 325; RV64-NEXT: sd a0, 0(a2) 326; RV64-NEXT: beqz a3, .LBB7_4 327; RV64-NEXT: # %bb.1: # %next 328; RV64-NEXT: bltu a0, a1, .LBB7_3 329; RV64-NEXT: # %bb.2: # %next 330; RV64-NEXT: li a1, 42 331; RV64-NEXT: .LBB7_3: # %next 332; RV64-NEXT: mv a0, a1 333; RV64-NEXT: ret 334; RV64-NEXT: .LBB7_4: # %exit 335; RV64-NEXT: li a0, 0 336; RV64-NEXT: ret 337entry: 338 %add = add i64 %b, %a 339 store i64 %add, ptr %ptr 340 %cmp = icmp ugt i64 %b, %add 341 br i1 %c, label %next, label %exit 342 343next: 344 %Q = select i1 %cmp, i64 %b, i64 42 345 ret i64 %Q 346 347exit: 348 ret i64 0 349} 350 351; Instcombine folds (a + b <u a) to (a ^ -1 <u b). Make sure we match this 352; pattern as well. 353define i64 @uaddo6_xor(i64 %a, i64 %b) { 354; RV32-LABEL: uaddo6_xor: 355; RV32: # %bb.0: 356; RV32-NEXT: not a1, a1 357; RV32-NEXT: beq a1, a3, .LBB8_2 358; RV32-NEXT: # %bb.1: 359; RV32-NEXT: sltu a0, a1, a3 360; RV32-NEXT: beqz a0, .LBB8_3 361; RV32-NEXT: j .LBB8_4 362; RV32-NEXT: .LBB8_2: 363; RV32-NEXT: not a0, a0 364; RV32-NEXT: sltu a0, a0, a2 365; RV32-NEXT: bnez a0, .LBB8_4 366; RV32-NEXT: .LBB8_3: 367; RV32-NEXT: li a2, 42 368; RV32-NEXT: .LBB8_4: 369; RV32-NEXT: neg a1, a0 370; RV32-NEXT: and a1, a1, a3 371; RV32-NEXT: mv a0, a2 372; RV32-NEXT: ret 373; 374; RV64-LABEL: uaddo6_xor: 375; RV64: # %bb.0: 376; RV64-NEXT: not a2, a0 377; RV64-NEXT: mv a0, a1 378; RV64-NEXT: bltu a2, a1, .LBB8_2 379; RV64-NEXT: # %bb.1: 380; RV64-NEXT: li a0, 42 381; RV64-NEXT: .LBB8_2: 382; RV64-NEXT: ret 383 %x = xor i64 %a, -1 384 %cmp = icmp ult i64 %x, %b 385 %Q = select i1 %cmp, i64 %b, i64 42 386 ret i64 %Q 387} 388 389define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { 390; RV32-LABEL: uaddo6_xor_commuted: 391; RV32: # %bb.0: 392; RV32-NEXT: not a1, a1 393; RV32-NEXT: beq a1, a3, .LBB9_2 394; RV32-NEXT: # %bb.1: 395; RV32-NEXT: sltu a0, a1, a3 396; RV32-NEXT: beqz a0, .LBB9_3 397; RV32-NEXT: j .LBB9_4 398; RV32-NEXT: .LBB9_2: 399; RV32-NEXT: not a0, a0 400; RV32-NEXT: sltu a0, a0, a2 401; RV32-NEXT: bnez a0, .LBB9_4 402; RV32-NEXT: .LBB9_3: 403; RV32-NEXT: li a2, 42 404; RV32-NEXT: .LBB9_4: 405; RV32-NEXT: neg a1, a0 406; RV32-NEXT: and a1, a1, a3 407; RV32-NEXT: mv a0, a2 408; RV32-NEXT: ret 409; 410; RV64-LABEL: uaddo6_xor_commuted: 411; RV64: # %bb.0: 412; RV64-NEXT: not a2, a0 413; RV64-NEXT: mv a0, a1 414; RV64-NEXT: bltu a2, a1, .LBB9_2 415; RV64-NEXT: # %bb.1: 416; RV64-NEXT: li a0, 42 417; RV64-NEXT: .LBB9_2: 418; RV64-NEXT: ret 419 %x = xor i64 %a, -1 420 %cmp = icmp ult i64 %x, %b 421 %Q = select i1 %cmp, i64 %b, i64 42 422 ret i64 %Q 423} 424 425declare void @use(i64) 426 427define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { 428; RV32-LABEL: uaddo6_xor_multi_use: 429; RV32: # %bb.0: 430; RV32-NEXT: addi sp, sp, -16 431; RV32-NEXT: .cfi_def_cfa_offset 16 432; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 433; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 434; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 435; RV32-NEXT: .cfi_offset ra, -4 436; RV32-NEXT: .cfi_offset s0, -8 437; RV32-NEXT: .cfi_offset s1, -12 438; RV32-NEXT: mv s0, a2 439; RV32-NEXT: not a1, a1 440; RV32-NEXT: not a0, a0 441; RV32-NEXT: beq a1, a3, .LBB10_2 442; RV32-NEXT: # %bb.1: 443; RV32-NEXT: sltu a2, a1, a3 444; RV32-NEXT: beqz a2, .LBB10_3 445; RV32-NEXT: j .LBB10_4 446; RV32-NEXT: .LBB10_2: 447; RV32-NEXT: sltu a2, a0, s0 448; RV32-NEXT: bnez a2, .LBB10_4 449; RV32-NEXT: .LBB10_3: 450; RV32-NEXT: li s0, 42 451; RV32-NEXT: .LBB10_4: 452; RV32-NEXT: neg s1, a2 453; RV32-NEXT: and s1, s1, a3 454; RV32-NEXT: call use@plt 455; RV32-NEXT: mv a0, s0 456; RV32-NEXT: mv a1, s1 457; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 458; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 459; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 460; RV32-NEXT: addi sp, sp, 16 461; RV32-NEXT: ret 462; 463; RV64-LABEL: uaddo6_xor_multi_use: 464; RV64: # %bb.0: 465; RV64-NEXT: addi sp, sp, -16 466; RV64-NEXT: .cfi_def_cfa_offset 16 467; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 468; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 469; RV64-NEXT: .cfi_offset ra, -8 470; RV64-NEXT: .cfi_offset s0, -16 471; RV64-NEXT: not a0, a0 472; RV64-NEXT: mv s0, a1 473; RV64-NEXT: bltu a0, a1, .LBB10_2 474; RV64-NEXT: # %bb.1: 475; RV64-NEXT: li s0, 42 476; RV64-NEXT: .LBB10_2: 477; RV64-NEXT: call use@plt 478; RV64-NEXT: mv a0, s0 479; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 480; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 481; RV64-NEXT: addi sp, sp, 16 482; RV64-NEXT: ret 483 %x = xor i64 -1, %a 484 %cmp = icmp ult i64 %x, %b 485 %Q = select i1 %cmp, i64 %b, i64 42 486 call void @use(i64 %x) 487 ret i64 %Q 488} 489 490; Make sure we do not use the XOR binary operator as insert point, as it may 491; come before the second operand of the overflow intrinsic. 492define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) { 493; RV32-LABEL: uaddo6_xor_op_after_XOR: 494; RV32: # %bb.0: 495; RV32-NEXT: lw a1, 0(a1) 496; RV32-NEXT: not a0, a0 497; RV32-NEXT: sltu a0, a0, a1 498; RV32-NEXT: xori a0, a0, 1 499; RV32-NEXT: ret 500; 501; RV64-LABEL: uaddo6_xor_op_after_XOR: 502; RV64: # %bb.0: 503; RV64-NEXT: lw a1, 0(a1) 504; RV64-NEXT: not a0, a0 505; RV64-NEXT: sext.w a0, a0 506; RV64-NEXT: sltu a0, a0, a1 507; RV64-NEXT: xori a0, a0, 1 508; RV64-NEXT: ret 509 %x = xor i32 %a, -1 510 %b = load i32, ptr %b.ptr, align 8 511 %cmp14 = icmp ugt i32 %b, %x 512 %ov = xor i1 %cmp14, true 513 ret i1 %ov 514} 515 516; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization. 517; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754 518 519define i1 @uaddo_i64_increment(i64 %x, ptr %p) { 520; RV32-LABEL: uaddo_i64_increment: 521; RV32: # %bb.0: 522; RV32-NEXT: mv a3, a0 523; RV32-NEXT: addi a4, a0, 1 524; RV32-NEXT: sltu a0, a4, a0 525; RV32-NEXT: add a5, a1, a0 526; RV32-NEXT: bgeu a4, a3, .LBB12_2 527; RV32-NEXT: # %bb.1: 528; RV32-NEXT: sltu a0, a5, a1 529; RV32-NEXT: .LBB12_2: 530; RV32-NEXT: sw a4, 0(a2) 531; RV32-NEXT: sw a5, 4(a2) 532; RV32-NEXT: ret 533; 534; RV64-LABEL: uaddo_i64_increment: 535; RV64: # %bb.0: 536; RV64-NEXT: addi a2, a0, 1 537; RV64-NEXT: seqz a0, a2 538; RV64-NEXT: sd a2, 0(a1) 539; RV64-NEXT: ret 540 %a = add i64 %x, 1 541 %ov = icmp eq i64 %a, 0 542 store i64 %a, ptr %p 543 ret i1 %ov 544} 545 546define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) { 547; RV32-LABEL: uaddo_i8_increment_noncanonical_1: 548; RV32: # %bb.0: 549; RV32-NEXT: andi a0, a0, 255 550; RV32-NEXT: addi a2, a0, 1 551; RV32-NEXT: andi a0, a2, 255 552; RV32-NEXT: xor a0, a0, a2 553; RV32-NEXT: snez a0, a0 554; RV32-NEXT: sb a2, 0(a1) 555; RV32-NEXT: ret 556; 557; RV64-LABEL: uaddo_i8_increment_noncanonical_1: 558; RV64: # %bb.0: 559; RV64-NEXT: andi a0, a0, 255 560; RV64-NEXT: addi a2, a0, 1 561; RV64-NEXT: andi a0, a2, 255 562; RV64-NEXT: xor a0, a0, a2 563; RV64-NEXT: snez a0, a0 564; RV64-NEXT: sb a2, 0(a1) 565; RV64-NEXT: ret 566 %a = add i8 1, %x ; commute 567 %ov = icmp eq i8 %a, 0 568 store i8 %a, ptr %p 569 ret i1 %ov 570} 571 572define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, ptr %p) { 573; RV32-LABEL: uaddo_i32_increment_noncanonical_2: 574; RV32: # %bb.0: 575; RV32-NEXT: addi a2, a0, 1 576; RV32-NEXT: seqz a0, a2 577; RV32-NEXT: sw a2, 0(a1) 578; RV32-NEXT: ret 579; 580; RV64-LABEL: uaddo_i32_increment_noncanonical_2: 581; RV64: # %bb.0: 582; RV64-NEXT: addiw a2, a0, 1 583; RV64-NEXT: seqz a0, a2 584; RV64-NEXT: sw a2, 0(a1) 585; RV64-NEXT: ret 586 %a = add i32 %x, 1 587 %ov = icmp eq i32 0, %a ; commute 588 store i32 %a, ptr %p 589 ret i1 %ov 590} 591 592define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) { 593; RV32-LABEL: uaddo_i16_increment_noncanonical_3: 594; RV32: # %bb.0: 595; RV32-NEXT: lui a2, 16 596; RV32-NEXT: addi a2, a2, -1 597; RV32-NEXT: and a0, a0, a2 598; RV32-NEXT: addi a3, a0, 1 599; RV32-NEXT: and a2, a3, a2 600; RV32-NEXT: xor a2, a2, a3 601; RV32-NEXT: snez a0, a2 602; RV32-NEXT: sh a3, 0(a1) 603; RV32-NEXT: ret 604; 605; RV64-LABEL: uaddo_i16_increment_noncanonical_3: 606; RV64: # %bb.0: 607; RV64-NEXT: lui a2, 16 608; RV64-NEXT: addiw a2, a2, -1 609; RV64-NEXT: and a0, a0, a2 610; RV64-NEXT: addi a3, a0, 1 611; RV64-NEXT: and a2, a3, a2 612; RV64-NEXT: xor a2, a2, a3 613; RV64-NEXT: snez a0, a2 614; RV64-NEXT: sh a3, 0(a1) 615; RV64-NEXT: ret 616 %a = add i16 1, %x ; commute 617 %ov = icmp eq i16 0, %a ; commute 618 store i16 %a, ptr %p 619 ret i1 %ov 620} 621 622; The overflow check may be against the input rather than the sum. 623 624define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) { 625; RV32-LABEL: uaddo_i64_increment_alt: 626; RV32: # %bb.0: 627; RV32-NEXT: addi a3, a0, 1 628; RV32-NEXT: sltu a4, a3, a0 629; RV32-NEXT: add a4, a1, a4 630; RV32-NEXT: sw a3, 0(a2) 631; RV32-NEXT: and a0, a0, a1 632; RV32-NEXT: addi a0, a0, 1 633; RV32-NEXT: seqz a0, a0 634; RV32-NEXT: sw a4, 4(a2) 635; RV32-NEXT: ret 636; 637; RV64-LABEL: uaddo_i64_increment_alt: 638; RV64: # %bb.0: 639; RV64-NEXT: addi a2, a0, 1 640; RV64-NEXT: seqz a0, a2 641; RV64-NEXT: sd a2, 0(a1) 642; RV64-NEXT: ret 643 %a = add i64 %x, 1 644 store i64 %a, ptr %p 645 %ov = icmp eq i64 %x, -1 646 ret i1 %ov 647} 648 649; Make sure insertion is done correctly based on dominance. 650 651define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) { 652; RV32-LABEL: uaddo_i64_increment_alt_dom: 653; RV32: # %bb.0: 654; RV32-NEXT: and a3, a0, a1 655; RV32-NEXT: addi a3, a3, 1 656; RV32-NEXT: seqz a3, a3 657; RV32-NEXT: addi a4, a0, 1 658; RV32-NEXT: sltu a0, a4, a0 659; RV32-NEXT: add a0, a1, a0 660; RV32-NEXT: sw a4, 0(a2) 661; RV32-NEXT: sw a0, 4(a2) 662; RV32-NEXT: mv a0, a3 663; RV32-NEXT: ret 664; 665; RV64-LABEL: uaddo_i64_increment_alt_dom: 666; RV64: # %bb.0: 667; RV64-NEXT: addi a2, a0, 1 668; RV64-NEXT: seqz a0, a2 669; RV64-NEXT: sd a2, 0(a1) 670; RV64-NEXT: ret 671 %ov = icmp eq i64 %x, -1 672 %a = add i64 %x, 1 673 store i64 %a, ptr %p 674 ret i1 %ov 675} 676 677; The overflow check may be against the input rather than the sum. 678 679define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) { 680; RV32-LABEL: uaddo_i64_decrement_alt: 681; RV32: # %bb.0: 682; RV32-NEXT: addi a3, a0, -1 683; RV32-NEXT: sltu a4, a3, a0 684; RV32-NEXT: add a4, a1, a4 685; RV32-NEXT: addi a4, a4, -1 686; RV32-NEXT: sw a3, 0(a2) 687; RV32-NEXT: or a0, a0, a1 688; RV32-NEXT: snez a0, a0 689; RV32-NEXT: sw a4, 4(a2) 690; RV32-NEXT: ret 691; 692; RV64-LABEL: uaddo_i64_decrement_alt: 693; RV64: # %bb.0: 694; RV64-NEXT: addi a2, a0, -1 695; RV64-NEXT: snez a0, a0 696; RV64-NEXT: sd a2, 0(a1) 697; RV64-NEXT: ret 698 %a = add i64 %x, -1 699 store i64 %a, ptr %p 700 %ov = icmp ne i64 %x, 0 701 ret i1 %ov 702} 703 704; Make sure insertion is done correctly based on dominance. 705 706define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) { 707; RV32-LABEL: uaddo_i64_decrement_alt_dom: 708; RV32: # %bb.0: 709; RV32-NEXT: or a3, a0, a1 710; RV32-NEXT: snez a3, a3 711; RV32-NEXT: addi a4, a0, -1 712; RV32-NEXT: sltu a0, a4, a0 713; RV32-NEXT: add a0, a1, a0 714; RV32-NEXT: addi a0, a0, -1 715; RV32-NEXT: sw a4, 0(a2) 716; RV32-NEXT: sw a0, 4(a2) 717; RV32-NEXT: mv a0, a3 718; RV32-NEXT: ret 719; 720; RV64-LABEL: uaddo_i64_decrement_alt_dom: 721; RV64: # %bb.0: 722; RV64-NEXT: snez a2, a0 723; RV64-NEXT: addi a0, a0, -1 724; RV64-NEXT: sd a0, 0(a1) 725; RV64-NEXT: mv a0, a2 726; RV64-NEXT: ret 727 %ov = icmp ne i64 %x, 0 728 %a = add i64 %x, -1 729 store i64 %a, ptr %p 730 ret i1 %ov 731} 732 733; No transform for illegal types. 734 735define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) { 736; RV32-LABEL: uaddo_i42_increment_illegal_type: 737; RV32: # %bb.0: 738; RV32-NEXT: addi a3, a0, 1 739; RV32-NEXT: sltu a0, a3, a0 740; RV32-NEXT: add a0, a1, a0 741; RV32-NEXT: andi a1, a0, 1023 742; RV32-NEXT: or a0, a3, a1 743; RV32-NEXT: seqz a0, a0 744; RV32-NEXT: sw a3, 0(a2) 745; RV32-NEXT: sh a1, 4(a2) 746; RV32-NEXT: ret 747; 748; RV64-LABEL: uaddo_i42_increment_illegal_type: 749; RV64: # %bb.0: 750; RV64-NEXT: addi a2, a0, 1 751; RV64-NEXT: slli a0, a2, 22 752; RV64-NEXT: srli a3, a0, 22 753; RV64-NEXT: seqz a0, a3 754; RV64-NEXT: sw a2, 0(a1) 755; RV64-NEXT: srli a3, a3, 32 756; RV64-NEXT: sh a3, 4(a1) 757; RV64-NEXT: ret 758 %a = add i42 %x, 1 759 %ov = icmp eq i42 %a, 0 760 store i42 %a, ptr %p 761 ret i1 %ov 762} 763 764define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) { 765; RV32-LABEL: usubo_ult_i64_overflow_used: 766; RV32: # %bb.0: 767; RV32-NEXT: beq a1, a3, .LBB21_2 768; RV32-NEXT: # %bb.1: 769; RV32-NEXT: sltu a0, a1, a3 770; RV32-NEXT: ret 771; RV32-NEXT: .LBB21_2: 772; RV32-NEXT: sltu a0, a0, a2 773; RV32-NEXT: ret 774; 775; RV64-LABEL: usubo_ult_i64_overflow_used: 776; RV64: # %bb.0: 777; RV64-NEXT: sltu a0, a0, a1 778; RV64-NEXT: ret 779 %s = sub i64 %x, %y 780 %ov = icmp ult i64 %x, %y 781 ret i1 %ov 782} 783 784define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { 785; RV32-LABEL: usubo_ult_i64_math_overflow_used: 786; RV32: # %bb.0: 787; RV32-NEXT: mv a5, a0 788; RV32-NEXT: sltu a0, a0, a2 789; RV32-NEXT: sub a6, a1, a3 790; RV32-NEXT: sub a6, a6, a0 791; RV32-NEXT: sub a5, a5, a2 792; RV32-NEXT: sw a5, 0(a4) 793; RV32-NEXT: sw a6, 4(a4) 794; RV32-NEXT: beq a1, a3, .LBB22_2 795; RV32-NEXT: # %bb.1: 796; RV32-NEXT: sltu a0, a1, a3 797; RV32-NEXT: .LBB22_2: 798; RV32-NEXT: ret 799; 800; RV64-LABEL: usubo_ult_i64_math_overflow_used: 801; RV64: # %bb.0: 802; RV64-NEXT: sub a3, a0, a1 803; RV64-NEXT: sltu a0, a0, a1 804; RV64-NEXT: sd a3, 0(a2) 805; RV64-NEXT: ret 806 %s = sub i64 %x, %y 807 store i64 %s, ptr %p 808 %ov = icmp ult i64 %x, %y 809 ret i1 %ov 810} 811 812; Verify insertion point for single-BB. Toggle predicate. 813 814define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) { 815; RV32-LABEL: usubo_ugt_i32: 816; RV32: # %bb.0: 817; RV32-NEXT: sltu a3, a0, a1 818; RV32-NEXT: sub a0, a0, a1 819; RV32-NEXT: sw a0, 0(a2) 820; RV32-NEXT: mv a0, a3 821; RV32-NEXT: ret 822; 823; RV64-LABEL: usubo_ugt_i32: 824; RV64: # %bb.0: 825; RV64-NEXT: sext.w a3, a1 826; RV64-NEXT: sext.w a4, a0 827; RV64-NEXT: sltu a3, a4, a3 828; RV64-NEXT: subw a0, a0, a1 829; RV64-NEXT: sw a0, 0(a2) 830; RV64-NEXT: mv a0, a3 831; RV64-NEXT: ret 832 %ov = icmp ugt i32 %y, %x 833 %s = sub i32 %x, %y 834 store i32 %s, ptr %p 835 ret i1 %ov 836} 837 838; Constant operand should match. 839 840define i1 @usubo_ugt_constant_op0_i8(i8 %x, ptr %p) { 841; RV32-LABEL: usubo_ugt_constant_op0_i8: 842; RV32: # %bb.0: 843; RV32-NEXT: andi a2, a0, 255 844; RV32-NEXT: li a3, 42 845; RV32-NEXT: sub a3, a3, a0 846; RV32-NEXT: sltiu a0, a2, 43 847; RV32-NEXT: xori a0, a0, 1 848; RV32-NEXT: sb a3, 0(a1) 849; RV32-NEXT: ret 850; 851; RV64-LABEL: usubo_ugt_constant_op0_i8: 852; RV64: # %bb.0: 853; RV64-NEXT: andi a2, a0, 255 854; RV64-NEXT: li a3, 42 855; RV64-NEXT: subw a3, a3, a0 856; RV64-NEXT: sltiu a0, a2, 43 857; RV64-NEXT: xori a0, a0, 1 858; RV64-NEXT: sb a3, 0(a1) 859; RV64-NEXT: ret 860 %s = sub i8 42, %x 861 %ov = icmp ugt i8 %x, 42 862 store i8 %s, ptr %p 863 ret i1 %ov 864} 865 866; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form. 867 868define i1 @usubo_ult_constant_op0_i16(i16 %x, ptr %p) { 869; RV32-LABEL: usubo_ult_constant_op0_i16: 870; RV32: # %bb.0: 871; RV32-NEXT: slli a2, a0, 16 872; RV32-NEXT: srli a2, a2, 16 873; RV32-NEXT: li a3, 43 874; RV32-NEXT: sub a3, a3, a0 875; RV32-NEXT: sltiu a0, a2, 44 876; RV32-NEXT: xori a0, a0, 1 877; RV32-NEXT: sh a3, 0(a1) 878; RV32-NEXT: ret 879; 880; RV64-LABEL: usubo_ult_constant_op0_i16: 881; RV64: # %bb.0: 882; RV64-NEXT: slli a2, a0, 48 883; RV64-NEXT: srli a2, a2, 48 884; RV64-NEXT: li a3, 43 885; RV64-NEXT: subw a3, a3, a0 886; RV64-NEXT: sltiu a0, a2, 44 887; RV64-NEXT: xori a0, a0, 1 888; RV64-NEXT: sh a3, 0(a1) 889; RV64-NEXT: ret 890 %s = sub i16 43, %x 891 %ov = icmp ult i16 43, %x 892 store i16 %s, ptr %p 893 ret i1 %ov 894} 895 896; Subtract with constant operand 1 is canonicalized to add. 897 898define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) { 899; RV32-LABEL: usubo_ult_constant_op1_i16: 900; RV32: # %bb.0: 901; RV32-NEXT: slli a2, a0, 16 902; RV32-NEXT: srli a2, a2, 16 903; RV32-NEXT: addi a3, a0, -44 904; RV32-NEXT: sltiu a0, a2, 44 905; RV32-NEXT: sh a3, 0(a1) 906; RV32-NEXT: ret 907; 908; RV64-LABEL: usubo_ult_constant_op1_i16: 909; RV64: # %bb.0: 910; RV64-NEXT: slli a2, a0, 48 911; RV64-NEXT: srli a2, a2, 48 912; RV64-NEXT: addiw a3, a0, -44 913; RV64-NEXT: sltiu a0, a2, 44 914; RV64-NEXT: sh a3, 0(a1) 915; RV64-NEXT: ret 916 %s = add i16 %x, -44 917 %ov = icmp ult i16 %x, 44 918 store i16 %s, ptr %p 919 ret i1 %ov 920} 921 922define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) { 923; RV32-LABEL: usubo_ugt_constant_op1_i8: 924; RV32: # %bb.0: 925; RV32-NEXT: andi a2, a0, 255 926; RV32-NEXT: sltiu a2, a2, 45 927; RV32-NEXT: addi a0, a0, -45 928; RV32-NEXT: sb a0, 0(a1) 929; RV32-NEXT: mv a0, a2 930; RV32-NEXT: ret 931; 932; RV64-LABEL: usubo_ugt_constant_op1_i8: 933; RV64: # %bb.0: 934; RV64-NEXT: andi a2, a0, 255 935; RV64-NEXT: sltiu a2, a2, 45 936; RV64-NEXT: addiw a0, a0, -45 937; RV64-NEXT: sb a0, 0(a1) 938; RV64-NEXT: mv a0, a2 939; RV64-NEXT: ret 940 %ov = icmp ugt i8 45, %x 941 %s = add i8 %x, -45 942 store i8 %s, ptr %p 943 ret i1 %ov 944} 945 946; Special-case: subtract 1 changes the compare predicate and constant. 947 948define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) { 949; RV32-LABEL: usubo_eq_constant1_op1_i32: 950; RV32: # %bb.0: 951; RV32-NEXT: addi a2, a0, -1 952; RV32-NEXT: seqz a0, a0 953; RV32-NEXT: sw a2, 0(a1) 954; RV32-NEXT: ret 955; 956; RV64-LABEL: usubo_eq_constant1_op1_i32: 957; RV64: # %bb.0: 958; RV64-NEXT: sext.w a2, a0 959; RV64-NEXT: addiw a3, a0, -1 960; RV64-NEXT: seqz a0, a2 961; RV64-NEXT: sw a3, 0(a1) 962; RV64-NEXT: ret 963 %s = add i32 %x, -1 964 %ov = icmp eq i32 %x, 0 965 store i32 %s, ptr %p 966 ret i1 %ov 967} 968 969; Special-case: subtract from 0 (negate) changes the compare predicate. 970 971define i1 @usubo_ne_constant0_op1_i32(i32 %x, ptr %p) { 972; RV32-LABEL: usubo_ne_constant0_op1_i32: 973; RV32: # %bb.0: 974; RV32-NEXT: neg a2, a0 975; RV32-NEXT: snez a0, a0 976; RV32-NEXT: sw a2, 0(a1) 977; RV32-NEXT: ret 978; 979; RV64-LABEL: usubo_ne_constant0_op1_i32: 980; RV64: # %bb.0: 981; RV64-NEXT: sext.w a2, a0 982; RV64-NEXT: negw a3, a0 983; RV64-NEXT: snez a0, a2 984; RV64-NEXT: sw a3, 0(a1) 985; RV64-NEXT: ret 986 %s = sub i32 0, %x 987 %ov = icmp ne i32 %x, 0 988 store i32 %s, ptr %p 989 ret i1 %ov 990} 991 992; This used to verify insertion point for multi-BB, but now we just bail out. 993 994declare void @call(i1) 995 996define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { 997; RV32-LABEL: usubo_ult_sub_dominates_i64: 998; RV32: # %bb.0: # %entry 999; RV32-NEXT: andi a7, a5, 1 1000; RV32-NEXT: beqz a7, .LBB30_5 1001; RV32-NEXT: # %bb.1: # %t 1002; RV32-NEXT: mv a6, a0 1003; RV32-NEXT: sltu a0, a0, a2 1004; RV32-NEXT: sub t0, a1, a3 1005; RV32-NEXT: sub t0, t0, a0 1006; RV32-NEXT: sub a2, a6, a2 1007; RV32-NEXT: sw a2, 0(a4) 1008; RV32-NEXT: sw t0, 4(a4) 1009; RV32-NEXT: beqz a7, .LBB30_5 1010; RV32-NEXT: # %bb.2: # %end 1011; RV32-NEXT: beq a1, a3, .LBB30_4 1012; RV32-NEXT: # %bb.3: # %end 1013; RV32-NEXT: sltu a0, a1, a3 1014; RV32-NEXT: .LBB30_4: # %end 1015; RV32-NEXT: ret 1016; RV32-NEXT: .LBB30_5: # %f 1017; RV32-NEXT: mv a0, a5 1018; RV32-NEXT: ret 1019; 1020; RV64-LABEL: usubo_ult_sub_dominates_i64: 1021; RV64: # %bb.0: # %entry 1022; RV64-NEXT: andi a4, a3, 1 1023; RV64-NEXT: beqz a4, .LBB30_3 1024; RV64-NEXT: # %bb.1: # %t 1025; RV64-NEXT: sub a5, a0, a1 1026; RV64-NEXT: sd a5, 0(a2) 1027; RV64-NEXT: beqz a4, .LBB30_3 1028; RV64-NEXT: # %bb.2: # %end 1029; RV64-NEXT: sltu a0, a0, a1 1030; RV64-NEXT: ret 1031; RV64-NEXT: .LBB30_3: # %f 1032; RV64-NEXT: mv a0, a3 1033; RV64-NEXT: ret 1034entry: 1035 br i1 %cond, label %t, label %f 1036 1037t: 1038 %s = sub i64 %x, %y 1039 store i64 %s, ptr %p 1040 br i1 %cond, label %end, label %f 1041 1042f: 1043 ret i1 %cond 1044 1045end: 1046 %ov = icmp ult i64 %x, %y 1047 ret i1 %ov 1048} 1049 1050define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { 1051; RV32-LABEL: usubo_ult_cmp_dominates_i64: 1052; RV32: # %bb.0: # %entry 1053; RV32-NEXT: addi sp, sp, -32 1054; RV32-NEXT: .cfi_def_cfa_offset 32 1055; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1056; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1057; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1058; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1059; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 1060; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill 1061; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill 1062; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill 1063; RV32-NEXT: .cfi_offset ra, -4 1064; RV32-NEXT: .cfi_offset s0, -8 1065; RV32-NEXT: .cfi_offset s1, -12 1066; RV32-NEXT: .cfi_offset s2, -16 1067; RV32-NEXT: .cfi_offset s3, -20 1068; RV32-NEXT: .cfi_offset s4, -24 1069; RV32-NEXT: .cfi_offset s5, -28 1070; RV32-NEXT: .cfi_offset s6, -32 1071; RV32-NEXT: mv s4, a5 1072; RV32-NEXT: andi a5, a5, 1 1073; RV32-NEXT: beqz a5, .LBB31_8 1074; RV32-NEXT: # %bb.1: # %t 1075; RV32-NEXT: mv s0, a4 1076; RV32-NEXT: mv s3, a3 1077; RV32-NEXT: mv s1, a2 1078; RV32-NEXT: mv s5, a1 1079; RV32-NEXT: mv s2, a0 1080; RV32-NEXT: beq a1, a3, .LBB31_3 1081; RV32-NEXT: # %bb.2: # %t 1082; RV32-NEXT: sltu s6, s5, s3 1083; RV32-NEXT: j .LBB31_4 1084; RV32-NEXT: .LBB31_3: 1085; RV32-NEXT: sltu s6, s2, s1 1086; RV32-NEXT: .LBB31_4: # %t 1087; RV32-NEXT: mv a0, s6 1088; RV32-NEXT: call call@plt 1089; RV32-NEXT: beqz s6, .LBB31_8 1090; RV32-NEXT: # %bb.5: # %end 1091; RV32-NEXT: sltu a1, s2, s1 1092; RV32-NEXT: mv a0, a1 1093; RV32-NEXT: beq s5, s3, .LBB31_7 1094; RV32-NEXT: # %bb.6: # %end 1095; RV32-NEXT: sltu a0, s5, s3 1096; RV32-NEXT: .LBB31_7: # %end 1097; RV32-NEXT: sub a2, s5, s3 1098; RV32-NEXT: sub a2, a2, a1 1099; RV32-NEXT: sub a1, s2, s1 1100; RV32-NEXT: sw a1, 0(s0) 1101; RV32-NEXT: sw a2, 4(s0) 1102; RV32-NEXT: j .LBB31_9 1103; RV32-NEXT: .LBB31_8: # %f 1104; RV32-NEXT: mv a0, s4 1105; RV32-NEXT: .LBB31_9: # %f 1106; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1107; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1108; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1109; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1110; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 1111; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload 1112; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload 1113; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload 1114; RV32-NEXT: addi sp, sp, 32 1115; RV32-NEXT: ret 1116; 1117; RV64-LABEL: usubo_ult_cmp_dominates_i64: 1118; RV64: # %bb.0: # %entry 1119; RV64-NEXT: addi sp, sp, -48 1120; RV64-NEXT: .cfi_def_cfa_offset 48 1121; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 1122; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 1123; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 1124; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 1125; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 1126; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill 1127; RV64-NEXT: .cfi_offset ra, -8 1128; RV64-NEXT: .cfi_offset s0, -16 1129; RV64-NEXT: .cfi_offset s1, -24 1130; RV64-NEXT: .cfi_offset s2, -32 1131; RV64-NEXT: .cfi_offset s3, -40 1132; RV64-NEXT: .cfi_offset s4, -48 1133; RV64-NEXT: mv s0, a3 1134; RV64-NEXT: andi a3, a3, 1 1135; RV64-NEXT: beqz a3, .LBB31_3 1136; RV64-NEXT: # %bb.1: # %t 1137; RV64-NEXT: mv s1, a2 1138; RV64-NEXT: mv s2, a1 1139; RV64-NEXT: mv s3, a0 1140; RV64-NEXT: sltu s4, a0, a1 1141; RV64-NEXT: mv a0, s4 1142; RV64-NEXT: call call@plt 1143; RV64-NEXT: bgeu s3, s2, .LBB31_3 1144; RV64-NEXT: # %bb.2: # %end 1145; RV64-NEXT: sub a0, s3, s2 1146; RV64-NEXT: sd a0, 0(s1) 1147; RV64-NEXT: mv a0, s4 1148; RV64-NEXT: j .LBB31_4 1149; RV64-NEXT: .LBB31_3: # %f 1150; RV64-NEXT: mv a0, s0 1151; RV64-NEXT: .LBB31_4: # %f 1152; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 1153; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 1154; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 1155; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 1156; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 1157; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload 1158; RV64-NEXT: addi sp, sp, 48 1159; RV64-NEXT: ret 1160entry: 1161 br i1 %cond, label %t, label %f 1162 1163t: 1164 %ov = icmp ult i64 %x, %y 1165 call void @call(i1 %ov) 1166 br i1 %ov, label %end, label %f 1167 1168f: 1169 ret i1 %cond 1170 1171end: 1172 %s = sub i64 %x, %y 1173 store i64 %s, ptr %p 1174 ret i1 %ov 1175} 1176 1177; Verify that crazy/non-canonical code does not crash. 1178 1179define void @bar() { 1180; RV32-LABEL: bar: 1181; RV32: # %bb.0: 1182; 1183; RV64-LABEL: bar: 1184; RV64: # %bb.0: 1185 %cmp = icmp eq i64 1, -1 1186 %frombool = zext i1 %cmp to i8 1187 unreachable 1188} 1189 1190define void @foo() { 1191; RV32-LABEL: foo: 1192; RV32: # %bb.0: 1193; 1194; RV64-LABEL: foo: 1195; RV64: # %bb.0: 1196 %sub = add nsw i64 1, 1 1197 %conv = trunc i64 %sub to i32 1198 unreachable 1199} 1200 1201; Similarly for usubo. 1202 1203define i1 @bar2() { 1204; RV32-LABEL: bar2: 1205; RV32: # %bb.0: 1206; RV32-NEXT: li a0, 0 1207; RV32-NEXT: ret 1208; 1209; RV64-LABEL: bar2: 1210; RV64: # %bb.0: 1211; RV64-NEXT: li a0, 0 1212; RV64-NEXT: ret 1213 %cmp = icmp eq i64 1, 0 1214 ret i1 %cmp 1215} 1216 1217define i64 @foo2(ptr %p) { 1218; RV32-LABEL: foo2: 1219; RV32: # %bb.0: 1220; RV32-NEXT: li a0, 0 1221; RV32-NEXT: li a1, 0 1222; RV32-NEXT: ret 1223; 1224; RV64-LABEL: foo2: 1225; RV64: # %bb.0: 1226; RV64-NEXT: li a0, 0 1227; RV64-NEXT: ret 1228 %sub = add nsw i64 1, -1 1229 ret i64 %sub 1230} 1231 1232; Avoid hoisting a math op into a dominating block which would 1233; increase the critical path. 1234 1235define void @PR41129(ptr %p64) { 1236; RV32-LABEL: PR41129: 1237; RV32: # %bb.0: # %entry 1238; RV32-NEXT: lw a1, 4(a0) 1239; RV32-NEXT: lw a2, 0(a0) 1240; RV32-NEXT: or a3, a2, a1 1241; RV32-NEXT: beqz a3, .LBB36_2 1242; RV32-NEXT: # %bb.1: # %false 1243; RV32-NEXT: andi a2, a2, 7 1244; RV32-NEXT: sw zero, 4(a0) 1245; RV32-NEXT: sw a2, 0(a0) 1246; RV32-NEXT: ret 1247; RV32-NEXT: .LBB36_2: # %true 1248; RV32-NEXT: addi a3, a2, -1 1249; RV32-NEXT: sltu a2, a3, a2 1250; RV32-NEXT: add a1, a1, a2 1251; RV32-NEXT: addi a1, a1, -1 1252; RV32-NEXT: sw a3, 0(a0) 1253; RV32-NEXT: sw a1, 4(a0) 1254; RV32-NEXT: ret 1255; 1256; RV64-LABEL: PR41129: 1257; RV64: # %bb.0: # %entry 1258; RV64-NEXT: ld a1, 0(a0) 1259; RV64-NEXT: beqz a1, .LBB36_2 1260; RV64-NEXT: # %bb.1: # %false 1261; RV64-NEXT: andi a1, a1, 7 1262; RV64-NEXT: sd a1, 0(a0) 1263; RV64-NEXT: ret 1264; RV64-NEXT: .LBB36_2: # %true 1265; RV64-NEXT: addi a1, a1, -1 1266; RV64-NEXT: sd a1, 0(a0) 1267; RV64-NEXT: ret 1268entry: 1269 %key = load i64, ptr %p64, align 8 1270 %cond17 = icmp eq i64 %key, 0 1271 br i1 %cond17, label %true, label %false 1272 1273false: 1274 %andval = and i64 %key, 7 1275 store i64 %andval, ptr %p64 1276 br label %exit 1277 1278true: 1279 %svalue = add i64 %key, -1 1280 store i64 %svalue, ptr %p64 1281 br label %exit 1282 1283exit: 1284 ret void 1285} 1286 1287