1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefixes=RV64 6 7;Copy tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll 8;to test shouldFormOverflowOp on RISCV 9 10define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp { 11; RV32-LABEL: uaddo1_overflow_used: 12; RV32: # %bb.0: 13; RV32-NEXT: add a5, a3, a1 14; RV32-NEXT: add a4, a2, a0 15; RV32-NEXT: sltu a6, a4, a2 16; RV32-NEXT: add a5, a5, a6 17; RV32-NEXT: beq a5, a1, .LBB0_2 18; RV32-NEXT: # %bb.1: 19; RV32-NEXT: sltu a0, a5, a1 20; RV32-NEXT: beqz a0, .LBB0_3 21; RV32-NEXT: j .LBB0_4 22; RV32-NEXT: .LBB0_2: 23; RV32-NEXT: sltu a0, a4, a0 24; RV32-NEXT: bnez a0, .LBB0_4 25; RV32-NEXT: .LBB0_3: 26; RV32-NEXT: li a2, 42 27; RV32-NEXT: .LBB0_4: 28; RV32-NEXT: neg a1, a0 29; RV32-NEXT: and a1, a1, a3 30; RV32-NEXT: mv a0, a2 31; RV32-NEXT: ret 32; 33; RV64-LABEL: uaddo1_overflow_used: 34; RV64: # %bb.0: 35; RV64-NEXT: add a2, a1, a0 36; RV64-NEXT: bltu a2, a0, .LBB0_2 37; RV64-NEXT: # %bb.1: 38; RV64-NEXT: li a1, 42 39; RV64-NEXT: .LBB0_2: 40; RV64-NEXT: mv a0, a1 41; RV64-NEXT: ret 42 %add = add i64 %b, %a 43 %cmp = icmp ult i64 %add, %a 44 %Q = select i1 %cmp, i64 %b, i64 42 45 ret i64 %Q 46} 47 48define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 49; RV32-LABEL: uaddo1_math_overflow_used: 50; RV32: # %bb.0: 51; RV32-NEXT: add a5, a3, a1 52; RV32-NEXT: add a0, a2, a0 53; RV32-NEXT: sltu a1, a0, a2 54; RV32-NEXT: add a5, a5, a1 55; RV32-NEXT: beq a5, a3, .LBB1_2 56; RV32-NEXT: # %bb.1: 57; RV32-NEXT: sltu a1, a5, a3 58; RV32-NEXT: .LBB1_2: 59; RV32-NEXT: bnez a1, .LBB1_4 60; RV32-NEXT: # %bb.3: 61; RV32-NEXT: li a2, 42 62; RV32-NEXT: .LBB1_4: 63; RV32-NEXT: neg a1, a1 64; RV32-NEXT: and a1, a1, a3 65; RV32-NEXT: sw a0, 0(a4) 66; RV32-NEXT: sw a5, 4(a4) 67; RV32-NEXT: mv a0, a2 68; RV32-NEXT: ret 69; 70; RV64-LABEL: uaddo1_math_overflow_used: 71; RV64: # %bb.0: 72; RV64-NEXT: add a0, a1, a0 73; RV64-NEXT: bltu a0, a1, .LBB1_2 74; RV64-NEXT: # %bb.1: 75; RV64-NEXT: li a1, 42 76; RV64-NEXT: .LBB1_2: 77; RV64-NEXT: sd a0, 0(a2) 78; RV64-NEXT: mv a0, a1 79; RV64-NEXT: ret 80 %add = add i64 %b, %a 81 %cmp = icmp ult i64 %add, %a 82 %Q = select i1 %cmp, i64 %b, i64 42 83 store i64 %add, ptr %res 84 ret i64 %Q 85} 86 87define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp { 88; RV32-LABEL: uaddo2_overflow_used: 89; RV32: # %bb.0: 90; RV32-NEXT: add a1, a3, a1 91; RV32-NEXT: add a0, a2, a0 92; RV32-NEXT: sltu a0, a0, a2 93; RV32-NEXT: add a1, a1, a0 94; RV32-NEXT: beq a1, a3, .LBB2_2 95; RV32-NEXT: # %bb.1: 96; RV32-NEXT: sltu a0, a1, a3 97; RV32-NEXT: .LBB2_2: 98; RV32-NEXT: bnez a0, .LBB2_4 99; RV32-NEXT: # %bb.3: 100; RV32-NEXT: li a2, 42 101; RV32-NEXT: .LBB2_4: 102; RV32-NEXT: neg a1, a0 103; RV32-NEXT: and a1, a1, a3 104; RV32-NEXT: mv a0, a2 105; RV32-NEXT: ret 106; 107; RV64-LABEL: uaddo2_overflow_used: 108; RV64: # %bb.0: 109; RV64-NEXT: add a0, a1, a0 110; RV64-NEXT: bltu a0, a1, .LBB2_2 111; RV64-NEXT: # %bb.1: 112; RV64-NEXT: li a1, 42 113; RV64-NEXT: .LBB2_2: 114; RV64-NEXT: mv a0, a1 115; RV64-NEXT: ret 116 %add = add i64 %b, %a 117 %cmp = icmp ult i64 %add, %b 118 %Q = select i1 %cmp, i64 %b, i64 42 119 ret i64 %Q 120} 121 122define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 123; RV32-LABEL: uaddo2_math_overflow_used: 124; RV32: # %bb.0: 125; RV32-NEXT: add a5, a3, a1 126; RV32-NEXT: add a0, a2, a0 127; RV32-NEXT: sltu a1, a0, a2 128; RV32-NEXT: add a5, a5, a1 129; RV32-NEXT: beq a5, a3, .LBB3_2 130; RV32-NEXT: # %bb.1: 131; RV32-NEXT: sltu a1, a5, a3 132; RV32-NEXT: .LBB3_2: 133; RV32-NEXT: bnez a1, .LBB3_4 134; RV32-NEXT: # %bb.3: 135; RV32-NEXT: li a2, 42 136; RV32-NEXT: .LBB3_4: 137; RV32-NEXT: neg a1, a1 138; RV32-NEXT: and a1, a1, a3 139; RV32-NEXT: sw a0, 0(a4) 140; RV32-NEXT: sw a5, 4(a4) 141; RV32-NEXT: mv a0, a2 142; RV32-NEXT: ret 143; 144; RV64-LABEL: uaddo2_math_overflow_used: 145; RV64: # %bb.0: 146; RV64-NEXT: add a0, a1, a0 147; RV64-NEXT: bltu a0, a1, .LBB3_2 148; RV64-NEXT: # %bb.1: 149; RV64-NEXT: li a1, 42 150; RV64-NEXT: .LBB3_2: 151; RV64-NEXT: sd a0, 0(a2) 152; RV64-NEXT: mv a0, a1 153; RV64-NEXT: ret 154 %add = add i64 %b, %a 155 %cmp = icmp ult i64 %add, %b 156 %Q = select i1 %cmp, i64 %b, i64 42 157 store i64 %add, ptr %res 158 ret i64 %Q 159} 160 161define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp { 162; RV32-LABEL: uaddo3_overflow_used: 163; RV32: # %bb.0: 164; RV32-NEXT: add a1, a3, a1 165; RV32-NEXT: add a0, a2, a0 166; RV32-NEXT: sltu a0, a0, a2 167; RV32-NEXT: add a1, a1, a0 168; RV32-NEXT: beq a3, a1, .LBB4_2 169; RV32-NEXT: # %bb.1: 170; RV32-NEXT: sltu a0, a1, a3 171; RV32-NEXT: .LBB4_2: 172; RV32-NEXT: bnez a0, .LBB4_4 173; RV32-NEXT: # %bb.3: 174; RV32-NEXT: li a2, 42 175; RV32-NEXT: .LBB4_4: 176; RV32-NEXT: neg a1, a0 177; RV32-NEXT: and a1, a1, a3 178; RV32-NEXT: mv a0, a2 179; RV32-NEXT: ret 180; 181; RV64-LABEL: uaddo3_overflow_used: 182; RV64: # %bb.0: 183; RV64-NEXT: add a0, a1, a0 184; RV64-NEXT: bltu a0, a1, .LBB4_2 185; RV64-NEXT: # %bb.1: 186; RV64-NEXT: li a1, 42 187; RV64-NEXT: .LBB4_2: 188; RV64-NEXT: mv a0, a1 189; RV64-NEXT: ret 190 %add = add i64 %b, %a 191 %cmp = icmp ugt i64 %b, %add 192 %Q = select i1 %cmp, i64 %b, i64 42 193 ret i64 %Q 194} 195 196define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 197; RV32-LABEL: uaddo3_math_overflow_used: 198; RV32: # %bb.0: 199; RV32-NEXT: add a5, a3, a1 200; RV32-NEXT: add a0, a2, a0 201; RV32-NEXT: sltu a1, a0, a2 202; RV32-NEXT: add a5, a5, a1 203; RV32-NEXT: beq a5, a3, .LBB5_2 204; RV32-NEXT: # %bb.1: 205; RV32-NEXT: sltu a1, a5, a3 206; RV32-NEXT: .LBB5_2: 207; RV32-NEXT: bnez a1, .LBB5_4 208; RV32-NEXT: # %bb.3: 209; RV32-NEXT: li a2, 42 210; RV32-NEXT: .LBB5_4: 211; RV32-NEXT: neg a1, a1 212; RV32-NEXT: and a1, a1, a3 213; RV32-NEXT: sw a0, 0(a4) 214; RV32-NEXT: sw a5, 4(a4) 215; RV32-NEXT: mv a0, a2 216; RV32-NEXT: ret 217; 218; RV64-LABEL: uaddo3_math_overflow_used: 219; RV64: # %bb.0: 220; RV64-NEXT: add a0, a1, a0 221; RV64-NEXT: bltu a0, a1, .LBB5_2 222; RV64-NEXT: # %bb.1: 223; RV64-NEXT: li a1, 42 224; RV64-NEXT: .LBB5_2: 225; RV64-NEXT: sd a0, 0(a2) 226; RV64-NEXT: mv a0, a1 227; RV64-NEXT: ret 228 %add = add i64 %b, %a 229 %cmp = icmp ugt i64 %b, %add 230 %Q = select i1 %cmp, i64 %b, i64 42 231 store i64 %add, ptr %res 232 ret i64 %Q 233} 234 235; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic. 236 237define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp { 238; RV32-LABEL: uaddo4: 239; RV32: # %bb.0: # %entry 240; RV32-NEXT: andi a4, a4, 1 241; RV32-NEXT: beqz a4, .LBB6_6 242; RV32-NEXT: # %bb.1: # %next 243; RV32-NEXT: add a1, a3, a1 244; RV32-NEXT: add a0, a2, a0 245; RV32-NEXT: sltu a0, a0, a2 246; RV32-NEXT: add a1, a1, a0 247; RV32-NEXT: beq a3, a1, .LBB6_3 248; RV32-NEXT: # %bb.2: # %next 249; RV32-NEXT: sltu a0, a1, a3 250; RV32-NEXT: .LBB6_3: # %next 251; RV32-NEXT: bnez a0, .LBB6_5 252; RV32-NEXT: # %bb.4: # %next 253; RV32-NEXT: li a2, 42 254; RV32-NEXT: .LBB6_5: # %next 255; RV32-NEXT: neg a1, a0 256; RV32-NEXT: and a1, a1, a3 257; RV32-NEXT: mv a0, a2 258; RV32-NEXT: ret 259; RV32-NEXT: .LBB6_6: # %exit 260; RV32-NEXT: li a0, 0 261; RV32-NEXT: li a1, 0 262; RV32-NEXT: ret 263; 264; RV64-LABEL: uaddo4: 265; RV64: # %bb.0: # %entry 266; RV64-NEXT: andi a2, a2, 1 267; RV64-NEXT: beqz a2, .LBB6_4 268; RV64-NEXT: # %bb.1: # %next 269; RV64-NEXT: add a0, a1, a0 270; RV64-NEXT: bltu a0, a1, .LBB6_3 271; RV64-NEXT: # %bb.2: # %next 272; RV64-NEXT: li a1, 42 273; RV64-NEXT: .LBB6_3: # %next 274; RV64-NEXT: mv a0, a1 275; RV64-NEXT: ret 276; RV64-NEXT: .LBB6_4: # %exit 277; RV64-NEXT: li a0, 0 278; RV64-NEXT: ret 279entry: 280 %add = add i64 %b, %a 281 %cmp = icmp ugt i64 %b, %add 282 br i1 %c, label %next, label %exit 283 284next: 285 %Q = select i1 %cmp, i64 %b, i64 42 286 ret i64 %Q 287 288exit: 289 ret i64 0 290} 291 292define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp { 293; RV32-LABEL: uaddo5: 294; RV32: # %bb.0: # %entry 295; RV32-NEXT: andi a5, a5, 1 296; RV32-NEXT: add a1, a3, a1 297; RV32-NEXT: add a6, a2, a0 298; RV32-NEXT: sltu a0, a6, a2 299; RV32-NEXT: add a1, a1, a0 300; RV32-NEXT: sw a6, 0(a4) 301; RV32-NEXT: sw a1, 4(a4) 302; RV32-NEXT: beqz a5, .LBB7_6 303; RV32-NEXT: # %bb.1: # %next 304; RV32-NEXT: beq a3, a1, .LBB7_3 305; RV32-NEXT: # %bb.2: # %next 306; RV32-NEXT: sltu a0, a1, a3 307; RV32-NEXT: .LBB7_3: # %next 308; RV32-NEXT: bnez a0, .LBB7_5 309; RV32-NEXT: # %bb.4: # %next 310; RV32-NEXT: li a2, 42 311; RV32-NEXT: .LBB7_5: # %next 312; RV32-NEXT: neg a1, a0 313; RV32-NEXT: and a1, a1, a3 314; RV32-NEXT: mv a0, a2 315; RV32-NEXT: ret 316; RV32-NEXT: .LBB7_6: # %exit 317; RV32-NEXT: li a0, 0 318; RV32-NEXT: li a1, 0 319; RV32-NEXT: ret 320; 321; RV64-LABEL: uaddo5: 322; RV64: # %bb.0: # %entry 323; RV64-NEXT: andi a3, a3, 1 324; RV64-NEXT: add a0, a1, a0 325; RV64-NEXT: sd a0, 0(a2) 326; RV64-NEXT: beqz a3, .LBB7_4 327; RV64-NEXT: # %bb.1: # %next 328; RV64-NEXT: bltu a0, a1, .LBB7_3 329; RV64-NEXT: # %bb.2: # %next 330; RV64-NEXT: li a1, 42 331; RV64-NEXT: .LBB7_3: # %next 332; RV64-NEXT: mv a0, a1 333; RV64-NEXT: ret 334; RV64-NEXT: .LBB7_4: # %exit 335; RV64-NEXT: li a0, 0 336; RV64-NEXT: ret 337entry: 338 %add = add i64 %b, %a 339 store i64 %add, ptr %ptr 340 %cmp = icmp ugt i64 %b, %add 341 br i1 %c, label %next, label %exit 342 343next: 344 %Q = select i1 %cmp, i64 %b, i64 42 345 ret i64 %Q 346 347exit: 348 ret i64 0 349} 350 351; Instcombine folds (a + b <u a) to (a ^ -1 <u b). Make sure we match this 352; pattern as well. 353define i64 @uaddo6_xor(i64 %a, i64 %b) { 354; RV32-LABEL: uaddo6_xor: 355; RV32: # %bb.0: 356; RV32-NEXT: not a1, a1 357; RV32-NEXT: beq a1, a3, .LBB8_2 358; RV32-NEXT: # %bb.1: 359; RV32-NEXT: sltu a0, a1, a3 360; RV32-NEXT: beqz a0, .LBB8_3 361; RV32-NEXT: j .LBB8_4 362; RV32-NEXT: .LBB8_2: 363; RV32-NEXT: not a0, a0 364; RV32-NEXT: sltu a0, a0, a2 365; RV32-NEXT: bnez a0, .LBB8_4 366; RV32-NEXT: .LBB8_3: 367; RV32-NEXT: li a2, 42 368; RV32-NEXT: .LBB8_4: 369; RV32-NEXT: neg a1, a0 370; RV32-NEXT: and a1, a1, a3 371; RV32-NEXT: mv a0, a2 372; RV32-NEXT: ret 373; 374; RV64-LABEL: uaddo6_xor: 375; RV64: # %bb.0: 376; RV64-NEXT: not a2, a0 377; RV64-NEXT: mv a0, a1 378; RV64-NEXT: bltu a2, a1, .LBB8_2 379; RV64-NEXT: # %bb.1: 380; RV64-NEXT: li a0, 42 381; RV64-NEXT: .LBB8_2: 382; RV64-NEXT: ret 383 %x = xor i64 %a, -1 384 %cmp = icmp ult i64 %x, %b 385 %Q = select i1 %cmp, i64 %b, i64 42 386 ret i64 %Q 387} 388 389define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { 390; RV32-LABEL: uaddo6_xor_commuted: 391; RV32: # %bb.0: 392; RV32-NEXT: not a1, a1 393; RV32-NEXT: beq a1, a3, .LBB9_2 394; RV32-NEXT: # %bb.1: 395; RV32-NEXT: sltu a0, a1, a3 396; RV32-NEXT: beqz a0, .LBB9_3 397; RV32-NEXT: j .LBB9_4 398; RV32-NEXT: .LBB9_2: 399; RV32-NEXT: not a0, a0 400; RV32-NEXT: sltu a0, a0, a2 401; RV32-NEXT: bnez a0, .LBB9_4 402; RV32-NEXT: .LBB9_3: 403; RV32-NEXT: li a2, 42 404; RV32-NEXT: .LBB9_4: 405; RV32-NEXT: neg a1, a0 406; RV32-NEXT: and a1, a1, a3 407; RV32-NEXT: mv a0, a2 408; RV32-NEXT: ret 409; 410; RV64-LABEL: uaddo6_xor_commuted: 411; RV64: # %bb.0: 412; RV64-NEXT: not a2, a0 413; RV64-NEXT: mv a0, a1 414; RV64-NEXT: bltu a2, a1, .LBB9_2 415; RV64-NEXT: # %bb.1: 416; RV64-NEXT: li a0, 42 417; RV64-NEXT: .LBB9_2: 418; RV64-NEXT: ret 419 %x = xor i64 %a, -1 420 %cmp = icmp ult i64 %x, %b 421 %Q = select i1 %cmp, i64 %b, i64 42 422 ret i64 %Q 423} 424 425declare void @use(i64) 426 427define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { 428; RV32-LABEL: uaddo6_xor_multi_use: 429; RV32: # %bb.0: 430; RV32-NEXT: addi sp, sp, -16 431; RV32-NEXT: .cfi_def_cfa_offset 16 432; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 433; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 434; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 435; RV32-NEXT: .cfi_offset ra, -4 436; RV32-NEXT: .cfi_offset s0, -8 437; RV32-NEXT: .cfi_offset s1, -12 438; RV32-NEXT: mv s0, a2 439; RV32-NEXT: not a1, a1 440; RV32-NEXT: not a0, a0 441; RV32-NEXT: beq a1, a3, .LBB10_2 442; RV32-NEXT: # %bb.1: 443; RV32-NEXT: sltu a2, a1, a3 444; RV32-NEXT: beqz a2, .LBB10_3 445; RV32-NEXT: j .LBB10_4 446; RV32-NEXT: .LBB10_2: 447; RV32-NEXT: sltu a2, a0, s0 448; RV32-NEXT: bnez a2, .LBB10_4 449; RV32-NEXT: .LBB10_3: 450; RV32-NEXT: li s0, 42 451; RV32-NEXT: .LBB10_4: 452; RV32-NEXT: neg s1, a2 453; RV32-NEXT: and s1, s1, a3 454; RV32-NEXT: call use 455; RV32-NEXT: mv a0, s0 456; RV32-NEXT: mv a1, s1 457; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 458; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 459; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 460; RV32-NEXT: addi sp, sp, 16 461; RV32-NEXT: ret 462; 463; RV64-LABEL: uaddo6_xor_multi_use: 464; RV64: # %bb.0: 465; RV64-NEXT: addi sp, sp, -16 466; RV64-NEXT: .cfi_def_cfa_offset 16 467; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 468; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 469; RV64-NEXT: .cfi_offset ra, -8 470; RV64-NEXT: .cfi_offset s0, -16 471; RV64-NEXT: not a0, a0 472; RV64-NEXT: mv s0, a1 473; RV64-NEXT: bltu a0, a1, .LBB10_2 474; RV64-NEXT: # %bb.1: 475; RV64-NEXT: li s0, 42 476; RV64-NEXT: .LBB10_2: 477; RV64-NEXT: call use 478; RV64-NEXT: mv a0, s0 479; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 480; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 481; RV64-NEXT: addi sp, sp, 16 482; RV64-NEXT: ret 483 %x = xor i64 -1, %a 484 %cmp = icmp ult i64 %x, %b 485 %Q = select i1 %cmp, i64 %b, i64 42 486 call void @use(i64 %x) 487 ret i64 %Q 488} 489 490; Make sure we do not use the XOR binary operator as insert point, as it may 491; come before the second operand of the overflow intrinsic. 492define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) { 493; RV32-LABEL: uaddo6_xor_op_after_XOR: 494; RV32: # %bb.0: 495; RV32-NEXT: lw a1, 0(a1) 496; RV32-NEXT: not a0, a0 497; RV32-NEXT: sltu a0, a0, a1 498; RV32-NEXT: xori a0, a0, 1 499; RV32-NEXT: ret 500; 501; RV64-LABEL: uaddo6_xor_op_after_XOR: 502; RV64: # %bb.0: 503; RV64-NEXT: lw a1, 0(a1) 504; RV64-NEXT: not a0, a0 505; RV64-NEXT: sext.w a0, a0 506; RV64-NEXT: sltu a0, a0, a1 507; RV64-NEXT: xori a0, a0, 1 508; RV64-NEXT: ret 509 %x = xor i32 %a, -1 510 %b = load i32, ptr %b.ptr, align 8 511 %cmp14 = icmp ugt i32 %b, %x 512 %ov = xor i1 %cmp14, true 513 ret i1 %ov 514} 515 516; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization. 517; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754 518 519define i1 @uaddo_i64_increment(i64 %x, ptr %p) { 520; RV32-LABEL: uaddo_i64_increment: 521; RV32: # %bb.0: 522; RV32-NEXT: addi a3, a0, 1 523; RV32-NEXT: seqz a0, a3 524; RV32-NEXT: add a1, a1, a0 525; RV32-NEXT: or a0, a3, a1 526; RV32-NEXT: seqz a0, a0 527; RV32-NEXT: sw a3, 0(a2) 528; RV32-NEXT: sw a1, 4(a2) 529; RV32-NEXT: ret 530; 531; RV64-LABEL: uaddo_i64_increment: 532; RV64: # %bb.0: 533; RV64-NEXT: addi a2, a0, 1 534; RV64-NEXT: seqz a0, a2 535; RV64-NEXT: sd a2, 0(a1) 536; RV64-NEXT: ret 537 %a = add i64 %x, 1 538 %ov = icmp eq i64 %a, 0 539 store i64 %a, ptr %p 540 ret i1 %ov 541} 542 543define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) { 544; RV32-LABEL: uaddo_i8_increment_noncanonical_1: 545; RV32: # %bb.0: 546; RV32-NEXT: addi a2, a0, 1 547; RV32-NEXT: andi a0, a2, 255 548; RV32-NEXT: seqz a0, a0 549; RV32-NEXT: sb a2, 0(a1) 550; RV32-NEXT: ret 551; 552; RV64-LABEL: uaddo_i8_increment_noncanonical_1: 553; RV64: # %bb.0: 554; RV64-NEXT: addi a2, a0, 1 555; RV64-NEXT: andi a0, a2, 255 556; RV64-NEXT: seqz a0, a0 557; RV64-NEXT: sb a2, 0(a1) 558; RV64-NEXT: ret 559 %a = add i8 1, %x ; commute 560 %ov = icmp eq i8 %a, 0 561 store i8 %a, ptr %p 562 ret i1 %ov 563} 564 565define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, ptr %p) { 566; RV32-LABEL: uaddo_i32_increment_noncanonical_2: 567; RV32: # %bb.0: 568; RV32-NEXT: addi a2, a0, 1 569; RV32-NEXT: seqz a0, a2 570; RV32-NEXT: sw a2, 0(a1) 571; RV32-NEXT: ret 572; 573; RV64-LABEL: uaddo_i32_increment_noncanonical_2: 574; RV64: # %bb.0: 575; RV64-NEXT: addiw a2, a0, 1 576; RV64-NEXT: seqz a0, a2 577; RV64-NEXT: sw a2, 0(a1) 578; RV64-NEXT: ret 579 %a = add i32 %x, 1 580 %ov = icmp eq i32 0, %a ; commute 581 store i32 %a, ptr %p 582 ret i1 %ov 583} 584 585define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) { 586; RV32-LABEL: uaddo_i16_increment_noncanonical_3: 587; RV32: # %bb.0: 588; RV32-NEXT: addi a2, a0, 1 589; RV32-NEXT: slli a0, a2, 16 590; RV32-NEXT: srli a0, a0, 16 591; RV32-NEXT: seqz a0, a0 592; RV32-NEXT: sh a2, 0(a1) 593; RV32-NEXT: ret 594; 595; RV64-LABEL: uaddo_i16_increment_noncanonical_3: 596; RV64: # %bb.0: 597; RV64-NEXT: addi a2, a0, 1 598; RV64-NEXT: slli a0, a2, 48 599; RV64-NEXT: srli a0, a0, 48 600; RV64-NEXT: seqz a0, a0 601; RV64-NEXT: sh a2, 0(a1) 602; RV64-NEXT: ret 603 %a = add i16 1, %x ; commute 604 %ov = icmp eq i16 0, %a ; commute 605 store i16 %a, ptr %p 606 ret i1 %ov 607} 608 609; The overflow check may be against the input rather than the sum. 610 611define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) { 612; RV32-LABEL: uaddo_i64_increment_alt: 613; RV32: # %bb.0: 614; RV32-NEXT: addi a3, a0, 1 615; RV32-NEXT: seqz a0, a3 616; RV32-NEXT: add a1, a1, a0 617; RV32-NEXT: or a0, a3, a1 618; RV32-NEXT: seqz a0, a0 619; RV32-NEXT: sw a3, 0(a2) 620; RV32-NEXT: sw a1, 4(a2) 621; RV32-NEXT: ret 622; 623; RV64-LABEL: uaddo_i64_increment_alt: 624; RV64: # %bb.0: 625; RV64-NEXT: addi a2, a0, 1 626; RV64-NEXT: seqz a0, a2 627; RV64-NEXT: sd a2, 0(a1) 628; RV64-NEXT: ret 629 %a = add i64 %x, 1 630 store i64 %a, ptr %p 631 %ov = icmp eq i64 %x, -1 632 ret i1 %ov 633} 634 635; Make sure insertion is done correctly based on dominance. 636 637define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) { 638; RV32-LABEL: uaddo_i64_increment_alt_dom: 639; RV32: # %bb.0: 640; RV32-NEXT: addi a3, a0, 1 641; RV32-NEXT: seqz a0, a3 642; RV32-NEXT: add a1, a1, a0 643; RV32-NEXT: or a0, a3, a1 644; RV32-NEXT: seqz a0, a0 645; RV32-NEXT: sw a3, 0(a2) 646; RV32-NEXT: sw a1, 4(a2) 647; RV32-NEXT: ret 648; 649; RV64-LABEL: uaddo_i64_increment_alt_dom: 650; RV64: # %bb.0: 651; RV64-NEXT: addi a2, a0, 1 652; RV64-NEXT: seqz a0, a2 653; RV64-NEXT: sd a2, 0(a1) 654; RV64-NEXT: ret 655 %ov = icmp eq i64 %x, -1 656 %a = add i64 %x, 1 657 store i64 %a, ptr %p 658 ret i1 %ov 659} 660 661; The overflow check may be against the input rather than the sum. 662 663define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) { 664; RV32-LABEL: uaddo_i32_decrement_alt: 665; RV32: # %bb.0: 666; RV32-NEXT: snez a2, a0 667; RV32-NEXT: addi a0, a0, -1 668; RV32-NEXT: sw a0, 0(a1) 669; RV32-NEXT: mv a0, a2 670; RV32-NEXT: ret 671; 672; RV64-LABEL: uaddo_i32_decrement_alt: 673; RV64: # %bb.0: 674; RV64-NEXT: snez a2, a0 675; RV64-NEXT: addi a0, a0, -1 676; RV64-NEXT: sw a0, 0(a1) 677; RV64-NEXT: mv a0, a2 678; RV64-NEXT: ret 679 %a = add i32 %x, -1 680 store i32 %a, ptr %p 681 %ov = icmp ne i32 %x, 0 682 ret i1 %ov 683} 684 685define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) { 686; RV32-LABEL: uaddo_i64_decrement_alt: 687; RV32: # %bb.0: 688; RV32-NEXT: or a3, a0, a1 689; RV32-NEXT: snez a3, a3 690; RV32-NEXT: seqz a4, a0 691; RV32-NEXT: sub a1, a1, a4 692; RV32-NEXT: addi a0, a0, -1 693; RV32-NEXT: sw a0, 0(a2) 694; RV32-NEXT: sw a1, 4(a2) 695; RV32-NEXT: mv a0, a3 696; RV32-NEXT: ret 697; 698; RV64-LABEL: uaddo_i64_decrement_alt: 699; RV64: # %bb.0: 700; RV64-NEXT: snez a2, a0 701; RV64-NEXT: addi a0, a0, -1 702; RV64-NEXT: sd a0, 0(a1) 703; RV64-NEXT: mv a0, a2 704; RV64-NEXT: ret 705 %a = add i64 %x, -1 706 store i64 %a, ptr %p 707 %ov = icmp ne i64 %x, 0 708 ret i1 %ov 709} 710 711; Make sure insertion is done correctly based on dominance. 712 713define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) { 714; RV32-LABEL: uaddo_i64_decrement_alt_dom: 715; RV32: # %bb.0: 716; RV32-NEXT: or a3, a0, a1 717; RV32-NEXT: snez a3, a3 718; RV32-NEXT: seqz a4, a0 719; RV32-NEXT: sub a1, a1, a4 720; RV32-NEXT: addi a0, a0, -1 721; RV32-NEXT: sw a0, 0(a2) 722; RV32-NEXT: sw a1, 4(a2) 723; RV32-NEXT: mv a0, a3 724; RV32-NEXT: ret 725; 726; RV64-LABEL: uaddo_i64_decrement_alt_dom: 727; RV64: # %bb.0: 728; RV64-NEXT: snez a2, a0 729; RV64-NEXT: addi a0, a0, -1 730; RV64-NEXT: sd a0, 0(a1) 731; RV64-NEXT: mv a0, a2 732; RV64-NEXT: ret 733 %ov = icmp ne i64 %x, 0 734 %a = add i64 %x, -1 735 store i64 %a, ptr %p 736 ret i1 %ov 737} 738 739; No transform for illegal types. 740 741define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) { 742; RV32-LABEL: uaddo_i42_increment_illegal_type: 743; RV32: # %bb.0: 744; RV32-NEXT: addi a3, a0, 1 745; RV32-NEXT: seqz a0, a3 746; RV32-NEXT: add a0, a1, a0 747; RV32-NEXT: andi a1, a0, 1023 748; RV32-NEXT: or a0, a3, a1 749; RV32-NEXT: seqz a0, a0 750; RV32-NEXT: sw a3, 0(a2) 751; RV32-NEXT: sh a1, 4(a2) 752; RV32-NEXT: ret 753; 754; RV64-LABEL: uaddo_i42_increment_illegal_type: 755; RV64: # %bb.0: 756; RV64-NEXT: addi a2, a0, 1 757; RV64-NEXT: slli a0, a2, 22 758; RV64-NEXT: srli a3, a0, 22 759; RV64-NEXT: seqz a0, a3 760; RV64-NEXT: sw a2, 0(a1) 761; RV64-NEXT: srli a3, a3, 32 762; RV64-NEXT: sh a3, 4(a1) 763; RV64-NEXT: ret 764 %a = add i42 %x, 1 765 %ov = icmp eq i42 %a, 0 766 store i42 %a, ptr %p 767 ret i1 %ov 768} 769 770define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) { 771; RV32-LABEL: usubo_ult_i64_overflow_used: 772; RV32: # %bb.0: 773; RV32-NEXT: beq a1, a3, .LBB22_2 774; RV32-NEXT: # %bb.1: 775; RV32-NEXT: sltu a0, a1, a3 776; RV32-NEXT: ret 777; RV32-NEXT: .LBB22_2: 778; RV32-NEXT: sltu a0, a0, a2 779; RV32-NEXT: ret 780; 781; RV64-LABEL: usubo_ult_i64_overflow_used: 782; RV64: # %bb.0: 783; RV64-NEXT: sltu a0, a0, a1 784; RV64-NEXT: ret 785 %s = sub i64 %x, %y 786 %ov = icmp ult i64 %x, %y 787 ret i1 %ov 788} 789 790define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { 791; RV32-LABEL: usubo_ult_i64_math_overflow_used: 792; RV32: # %bb.0: 793; RV32-NEXT: mv a5, a0 794; RV32-NEXT: sltu a0, a0, a2 795; RV32-NEXT: sub a6, a1, a3 796; RV32-NEXT: sub a6, a6, a0 797; RV32-NEXT: sub a5, a5, a2 798; RV32-NEXT: sw a5, 0(a4) 799; RV32-NEXT: sw a6, 4(a4) 800; RV32-NEXT: beq a1, a3, .LBB23_2 801; RV32-NEXT: # %bb.1: 802; RV32-NEXT: sltu a0, a1, a3 803; RV32-NEXT: .LBB23_2: 804; RV32-NEXT: ret 805; 806; RV64-LABEL: usubo_ult_i64_math_overflow_used: 807; RV64: # %bb.0: 808; RV64-NEXT: sub a3, a0, a1 809; RV64-NEXT: sltu a0, a0, a1 810; RV64-NEXT: sd a3, 0(a2) 811; RV64-NEXT: ret 812 %s = sub i64 %x, %y 813 store i64 %s, ptr %p 814 %ov = icmp ult i64 %x, %y 815 ret i1 %ov 816} 817 818; Verify insertion point for single-BB. Toggle predicate. 819 820define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) { 821; RV32-LABEL: usubo_ugt_i32: 822; RV32: # %bb.0: 823; RV32-NEXT: sltu a3, a0, a1 824; RV32-NEXT: sub a0, a0, a1 825; RV32-NEXT: sw a0, 0(a2) 826; RV32-NEXT: mv a0, a3 827; RV32-NEXT: ret 828; 829; RV64-LABEL: usubo_ugt_i32: 830; RV64: # %bb.0: 831; RV64-NEXT: sext.w a3, a1 832; RV64-NEXT: sext.w a4, a0 833; RV64-NEXT: sltu a3, a4, a3 834; RV64-NEXT: subw a0, a0, a1 835; RV64-NEXT: sw a0, 0(a2) 836; RV64-NEXT: mv a0, a3 837; RV64-NEXT: ret 838 %ov = icmp ugt i32 %y, %x 839 %s = sub i32 %x, %y 840 store i32 %s, ptr %p 841 ret i1 %ov 842} 843 844; Constant operand should match. 845 846define i1 @usubo_ugt_constant_op0_i8(i8 %x, ptr %p) { 847; RV32-LABEL: usubo_ugt_constant_op0_i8: 848; RV32: # %bb.0: 849; RV32-NEXT: andi a2, a0, 255 850; RV32-NEXT: li a3, 42 851; RV32-NEXT: sub a3, a3, a0 852; RV32-NEXT: sltiu a0, a2, 43 853; RV32-NEXT: xori a0, a0, 1 854; RV32-NEXT: sb a3, 0(a1) 855; RV32-NEXT: ret 856; 857; RV64-LABEL: usubo_ugt_constant_op0_i8: 858; RV64: # %bb.0: 859; RV64-NEXT: andi a2, a0, 255 860; RV64-NEXT: li a3, 42 861; RV64-NEXT: subw a3, a3, a0 862; RV64-NEXT: sltiu a0, a2, 43 863; RV64-NEXT: xori a0, a0, 1 864; RV64-NEXT: sb a3, 0(a1) 865; RV64-NEXT: ret 866 %s = sub i8 42, %x 867 %ov = icmp ugt i8 %x, 42 868 store i8 %s, ptr %p 869 ret i1 %ov 870} 871 872; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form. 873 874define i1 @usubo_ult_constant_op0_i16(i16 %x, ptr %p) { 875; RV32-LABEL: usubo_ult_constant_op0_i16: 876; RV32: # %bb.0: 877; RV32-NEXT: slli a2, a0, 16 878; RV32-NEXT: srli a2, a2, 16 879; RV32-NEXT: li a3, 43 880; RV32-NEXT: sub a3, a3, a0 881; RV32-NEXT: sltiu a0, a2, 44 882; RV32-NEXT: xori a0, a0, 1 883; RV32-NEXT: sh a3, 0(a1) 884; RV32-NEXT: ret 885; 886; RV64-LABEL: usubo_ult_constant_op0_i16: 887; RV64: # %bb.0: 888; RV64-NEXT: slli a2, a0, 48 889; RV64-NEXT: srli a2, a2, 48 890; RV64-NEXT: li a3, 43 891; RV64-NEXT: subw a3, a3, a0 892; RV64-NEXT: sltiu a0, a2, 44 893; RV64-NEXT: xori a0, a0, 1 894; RV64-NEXT: sh a3, 0(a1) 895; RV64-NEXT: ret 896 %s = sub i16 43, %x 897 %ov = icmp ult i16 43, %x 898 store i16 %s, ptr %p 899 ret i1 %ov 900} 901 902; Subtract with constant operand 1 is canonicalized to add. 903 904define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) { 905; RV32-LABEL: usubo_ult_constant_op1_i16: 906; RV32: # %bb.0: 907; RV32-NEXT: slli a2, a0, 16 908; RV32-NEXT: srli a2, a2, 16 909; RV32-NEXT: addi a3, a0, -44 910; RV32-NEXT: sltiu a0, a2, 44 911; RV32-NEXT: sh a3, 0(a1) 912; RV32-NEXT: ret 913; 914; RV64-LABEL: usubo_ult_constant_op1_i16: 915; RV64: # %bb.0: 916; RV64-NEXT: slli a2, a0, 48 917; RV64-NEXT: srli a2, a2, 48 918; RV64-NEXT: addi a3, a0, -44 919; RV64-NEXT: sltiu a0, a2, 44 920; RV64-NEXT: sh a3, 0(a1) 921; RV64-NEXT: ret 922 %s = add i16 %x, -44 923 %ov = icmp ult i16 %x, 44 924 store i16 %s, ptr %p 925 ret i1 %ov 926} 927 928define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) { 929; RV32-LABEL: usubo_ugt_constant_op1_i8: 930; RV32: # %bb.0: 931; RV32-NEXT: andi a2, a0, 255 932; RV32-NEXT: sltiu a2, a2, 45 933; RV32-NEXT: addi a0, a0, -45 934; RV32-NEXT: sb a0, 0(a1) 935; RV32-NEXT: mv a0, a2 936; RV32-NEXT: ret 937; 938; RV64-LABEL: usubo_ugt_constant_op1_i8: 939; RV64: # %bb.0: 940; RV64-NEXT: andi a2, a0, 255 941; RV64-NEXT: sltiu a2, a2, 45 942; RV64-NEXT: addi a0, a0, -45 943; RV64-NEXT: sb a0, 0(a1) 944; RV64-NEXT: mv a0, a2 945; RV64-NEXT: ret 946 %ov = icmp ugt i8 45, %x 947 %s = add i8 %x, -45 948 store i8 %s, ptr %p 949 ret i1 %ov 950} 951 952; Special-case: subtract 1 changes the compare predicate and constant. 953 954define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) { 955; RV32-LABEL: usubo_eq_constant1_op1_i32: 956; RV32: # %bb.0: 957; RV32-NEXT: addi a2, a0, -1 958; RV32-NEXT: seqz a0, a0 959; RV32-NEXT: sw a2, 0(a1) 960; RV32-NEXT: ret 961; 962; RV64-LABEL: usubo_eq_constant1_op1_i32: 963; RV64: # %bb.0: 964; RV64-NEXT: sext.w a2, a0 965; RV64-NEXT: addi a3, a0, -1 966; RV64-NEXT: seqz a0, a2 967; RV64-NEXT: sw a3, 0(a1) 968; RV64-NEXT: ret 969 %s = add i32 %x, -1 970 %ov = icmp eq i32 %x, 0 971 store i32 %s, ptr %p 972 ret i1 %ov 973} 974 975; Special-case: subtract from 0 (negate) changes the compare predicate. 976 977define i1 @usubo_ne_constant0_op1_i32(i32 %x, ptr %p) { 978; RV32-LABEL: usubo_ne_constant0_op1_i32: 979; RV32: # %bb.0: 980; RV32-NEXT: neg a2, a0 981; RV32-NEXT: snez a0, a0 982; RV32-NEXT: sw a2, 0(a1) 983; RV32-NEXT: ret 984; 985; RV64-LABEL: usubo_ne_constant0_op1_i32: 986; RV64: # %bb.0: 987; RV64-NEXT: sext.w a2, a0 988; RV64-NEXT: negw a3, a0 989; RV64-NEXT: snez a0, a2 990; RV64-NEXT: sw a3, 0(a1) 991; RV64-NEXT: ret 992 %s = sub i32 0, %x 993 %ov = icmp ne i32 %x, 0 994 store i32 %s, ptr %p 995 ret i1 %ov 996} 997 998; This used to verify insertion point for multi-BB, but now we just bail out. 999 1000declare void @call(i1) 1001 1002define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { 1003; RV32-LABEL: usubo_ult_sub_dominates_i64: 1004; RV32: # %bb.0: # %entry 1005; RV32-NEXT: andi a6, a5, 1 1006; RV32-NEXT: beqz a6, .LBB31_5 1007; RV32-NEXT: # %bb.1: # %t 1008; RV32-NEXT: mv a7, a0 1009; RV32-NEXT: sltu a0, a0, a2 1010; RV32-NEXT: sub t0, a1, a3 1011; RV32-NEXT: sub t0, t0, a0 1012; RV32-NEXT: sub a2, a7, a2 1013; RV32-NEXT: sw a2, 0(a4) 1014; RV32-NEXT: sw t0, 4(a4) 1015; RV32-NEXT: beqz a6, .LBB31_5 1016; RV32-NEXT: # %bb.2: # %end 1017; RV32-NEXT: beq a1, a3, .LBB31_4 1018; RV32-NEXT: # %bb.3: # %end 1019; RV32-NEXT: sltu a0, a1, a3 1020; RV32-NEXT: .LBB31_4: # %end 1021; RV32-NEXT: ret 1022; RV32-NEXT: .LBB31_5: # %f 1023; RV32-NEXT: mv a0, a5 1024; RV32-NEXT: ret 1025; 1026; RV64-LABEL: usubo_ult_sub_dominates_i64: 1027; RV64: # %bb.0: # %entry 1028; RV64-NEXT: andi a4, a3, 1 1029; RV64-NEXT: beqz a4, .LBB31_3 1030; RV64-NEXT: # %bb.1: # %t 1031; RV64-NEXT: sub a5, a0, a1 1032; RV64-NEXT: sd a5, 0(a2) 1033; RV64-NEXT: beqz a4, .LBB31_3 1034; RV64-NEXT: # %bb.2: # %end 1035; RV64-NEXT: sltu a0, a0, a1 1036; RV64-NEXT: ret 1037; RV64-NEXT: .LBB31_3: # %f 1038; RV64-NEXT: mv a0, a3 1039; RV64-NEXT: ret 1040entry: 1041 br i1 %cond, label %t, label %f 1042 1043t: 1044 %s = sub i64 %x, %y 1045 store i64 %s, ptr %p 1046 br i1 %cond, label %end, label %f 1047 1048f: 1049 ret i1 %cond 1050 1051end: 1052 %ov = icmp ult i64 %x, %y 1053 ret i1 %ov 1054} 1055 1056define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { 1057; RV32-LABEL: usubo_ult_cmp_dominates_i64: 1058; RV32: # %bb.0: # %entry 1059; RV32-NEXT: addi sp, sp, -32 1060; RV32-NEXT: .cfi_def_cfa_offset 32 1061; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1062; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1063; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1064; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1065; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 1066; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill 1067; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill 1068; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill 1069; RV32-NEXT: .cfi_offset ra, -4 1070; RV32-NEXT: .cfi_offset s0, -8 1071; RV32-NEXT: .cfi_offset s1, -12 1072; RV32-NEXT: .cfi_offset s2, -16 1073; RV32-NEXT: .cfi_offset s3, -20 1074; RV32-NEXT: .cfi_offset s4, -24 1075; RV32-NEXT: .cfi_offset s5, -28 1076; RV32-NEXT: .cfi_offset s6, -32 1077; RV32-NEXT: mv s2, a5 1078; RV32-NEXT: andi a5, a5, 1 1079; RV32-NEXT: beqz a5, .LBB32_8 1080; RV32-NEXT: # %bb.1: # %t 1081; RV32-NEXT: mv s0, a4 1082; RV32-NEXT: mv s3, a3 1083; RV32-NEXT: mv s1, a2 1084; RV32-NEXT: mv s5, a1 1085; RV32-NEXT: mv s4, a0 1086; RV32-NEXT: beq a1, a3, .LBB32_3 1087; RV32-NEXT: # %bb.2: # %t 1088; RV32-NEXT: sltu s6, s5, s3 1089; RV32-NEXT: j .LBB32_4 1090; RV32-NEXT: .LBB32_3: 1091; RV32-NEXT: sltu s6, s4, s1 1092; RV32-NEXT: .LBB32_4: # %t 1093; RV32-NEXT: mv a0, s6 1094; RV32-NEXT: call call 1095; RV32-NEXT: beqz s6, .LBB32_8 1096; RV32-NEXT: # %bb.5: # %end 1097; RV32-NEXT: sltu a1, s4, s1 1098; RV32-NEXT: mv a0, a1 1099; RV32-NEXT: beq s5, s3, .LBB32_7 1100; RV32-NEXT: # %bb.6: # %end 1101; RV32-NEXT: sltu a0, s5, s3 1102; RV32-NEXT: .LBB32_7: # %end 1103; RV32-NEXT: sub a2, s5, s3 1104; RV32-NEXT: sub a2, a2, a1 1105; RV32-NEXT: sub a1, s4, s1 1106; RV32-NEXT: sw a1, 0(s0) 1107; RV32-NEXT: sw a2, 4(s0) 1108; RV32-NEXT: j .LBB32_9 1109; RV32-NEXT: .LBB32_8: # %f 1110; RV32-NEXT: mv a0, s2 1111; RV32-NEXT: .LBB32_9: # %f 1112; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1113; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1114; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1115; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1116; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 1117; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload 1118; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload 1119; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload 1120; RV32-NEXT: addi sp, sp, 32 1121; RV32-NEXT: ret 1122; 1123; RV64-LABEL: usubo_ult_cmp_dominates_i64: 1124; RV64: # %bb.0: # %entry 1125; RV64-NEXT: addi sp, sp, -48 1126; RV64-NEXT: .cfi_def_cfa_offset 48 1127; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 1128; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 1129; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 1130; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 1131; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 1132; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill 1133; RV64-NEXT: .cfi_offset ra, -8 1134; RV64-NEXT: .cfi_offset s0, -16 1135; RV64-NEXT: .cfi_offset s1, -24 1136; RV64-NEXT: .cfi_offset s2, -32 1137; RV64-NEXT: .cfi_offset s3, -40 1138; RV64-NEXT: .cfi_offset s4, -48 1139; RV64-NEXT: mv s0, a3 1140; RV64-NEXT: andi a3, a3, 1 1141; RV64-NEXT: beqz a3, .LBB32_3 1142; RV64-NEXT: # %bb.1: # %t 1143; RV64-NEXT: mv s1, a2 1144; RV64-NEXT: mv s2, a1 1145; RV64-NEXT: mv s3, a0 1146; RV64-NEXT: sltu s4, a0, a1 1147; RV64-NEXT: mv a0, s4 1148; RV64-NEXT: call call 1149; RV64-NEXT: bgeu s3, s2, .LBB32_3 1150; RV64-NEXT: # %bb.2: # %end 1151; RV64-NEXT: sub a0, s3, s2 1152; RV64-NEXT: sd a0, 0(s1) 1153; RV64-NEXT: mv a0, s4 1154; RV64-NEXT: j .LBB32_4 1155; RV64-NEXT: .LBB32_3: # %f 1156; RV64-NEXT: mv a0, s0 1157; RV64-NEXT: .LBB32_4: # %f 1158; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 1159; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 1160; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 1161; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 1162; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 1163; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload 1164; RV64-NEXT: addi sp, sp, 48 1165; RV64-NEXT: ret 1166entry: 1167 br i1 %cond, label %t, label %f 1168 1169t: 1170 %ov = icmp ult i64 %x, %y 1171 call void @call(i1 %ov) 1172 br i1 %ov, label %end, label %f 1173 1174f: 1175 ret i1 %cond 1176 1177end: 1178 %s = sub i64 %x, %y 1179 store i64 %s, ptr %p 1180 ret i1 %ov 1181} 1182 1183; Verify that crazy/non-canonical code does not crash. 1184 1185define void @bar() { 1186; RV32-LABEL: bar: 1187; RV32: # %bb.0: 1188; 1189; RV64-LABEL: bar: 1190; RV64: # %bb.0: 1191 %cmp = icmp eq i64 1, -1 1192 %frombool = zext i1 %cmp to i8 1193 unreachable 1194} 1195 1196define void @foo() { 1197; RV32-LABEL: foo: 1198; RV32: # %bb.0: 1199; 1200; RV64-LABEL: foo: 1201; RV64: # %bb.0: 1202 %sub = add nsw i64 1, 1 1203 %conv = trunc i64 %sub to i32 1204 unreachable 1205} 1206 1207; Similarly for usubo. 1208 1209define i1 @bar2() { 1210; RV32-LABEL: bar2: 1211; RV32: # %bb.0: 1212; RV32-NEXT: li a0, 0 1213; RV32-NEXT: ret 1214; 1215; RV64-LABEL: bar2: 1216; RV64: # %bb.0: 1217; RV64-NEXT: li a0, 0 1218; RV64-NEXT: ret 1219 %cmp = icmp eq i64 1, 0 1220 ret i1 %cmp 1221} 1222 1223define i64 @foo2(ptr %p) { 1224; RV32-LABEL: foo2: 1225; RV32: # %bb.0: 1226; RV32-NEXT: li a0, 0 1227; RV32-NEXT: li a1, 0 1228; RV32-NEXT: ret 1229; 1230; RV64-LABEL: foo2: 1231; RV64: # %bb.0: 1232; RV64-NEXT: li a0, 0 1233; RV64-NEXT: ret 1234 %sub = add nsw i64 1, -1 1235 ret i64 %sub 1236} 1237 1238; Avoid hoisting a math op into a dominating block which would 1239; increase the critical path. 1240 1241define void @PR41129(ptr %p64) { 1242; RV32-LABEL: PR41129: 1243; RV32: # %bb.0: # %entry 1244; RV32-NEXT: lw a2, 4(a0) 1245; RV32-NEXT: lw a1, 0(a0) 1246; RV32-NEXT: or a3, a1, a2 1247; RV32-NEXT: beqz a3, .LBB37_2 1248; RV32-NEXT: # %bb.1: # %false 1249; RV32-NEXT: andi a1, a1, 7 1250; RV32-NEXT: sw zero, 4(a0) 1251; RV32-NEXT: sw a1, 0(a0) 1252; RV32-NEXT: ret 1253; RV32-NEXT: .LBB37_2: # %true 1254; RV32-NEXT: seqz a3, a1 1255; RV32-NEXT: sub a2, a2, a3 1256; RV32-NEXT: addi a1, a1, -1 1257; RV32-NEXT: sw a1, 0(a0) 1258; RV32-NEXT: sw a2, 4(a0) 1259; RV32-NEXT: ret 1260; 1261; RV64-LABEL: PR41129: 1262; RV64: # %bb.0: # %entry 1263; RV64-NEXT: ld a1, 0(a0) 1264; RV64-NEXT: beqz a1, .LBB37_2 1265; RV64-NEXT: # %bb.1: # %false 1266; RV64-NEXT: andi a1, a1, 7 1267; RV64-NEXT: sd a1, 0(a0) 1268; RV64-NEXT: ret 1269; RV64-NEXT: .LBB37_2: # %true 1270; RV64-NEXT: addi a1, a1, -1 1271; RV64-NEXT: sd a1, 0(a0) 1272; RV64-NEXT: ret 1273entry: 1274 %key = load i64, ptr %p64, align 8 1275 %cond17 = icmp eq i64 %key, 0 1276 br i1 %cond17, label %true, label %false 1277 1278false: 1279 %andval = and i64 %key, 7 1280 store i64 %andval, ptr %p64 1281 br label %exit 1282 1283true: 1284 %svalue = add i64 %key, -1 1285 store i64 %svalue, ptr %p64 1286 br label %exit 1287 1288exit: 1289 ret void 1290} 1291 1292define i16 @overflow_not_used(i16 %a, i16 %b, ptr %res) { 1293; RV32-LABEL: overflow_not_used: 1294; RV32: # %bb.0: 1295; RV32-NEXT: lui a3, 16 1296; RV32-NEXT: addi a3, a3, -1 1297; RV32-NEXT: and a4, a1, a3 1298; RV32-NEXT: add a0, a1, a0 1299; RV32-NEXT: and a3, a0, a3 1300; RV32-NEXT: bltu a3, a4, .LBB38_2 1301; RV32-NEXT: # %bb.1: 1302; RV32-NEXT: li a1, 42 1303; RV32-NEXT: .LBB38_2: 1304; RV32-NEXT: sh a0, 0(a2) 1305; RV32-NEXT: mv a0, a1 1306; RV32-NEXT: ret 1307; 1308; RV64-LABEL: overflow_not_used: 1309; RV64: # %bb.0: 1310; RV64-NEXT: lui a3, 16 1311; RV64-NEXT: addiw a3, a3, -1 1312; RV64-NEXT: and a4, a1, a3 1313; RV64-NEXT: add a0, a1, a0 1314; RV64-NEXT: and a3, a0, a3 1315; RV64-NEXT: bltu a3, a4, .LBB38_2 1316; RV64-NEXT: # %bb.1: 1317; RV64-NEXT: li a1, 42 1318; RV64-NEXT: .LBB38_2: 1319; RV64-NEXT: sh a0, 0(a2) 1320; RV64-NEXT: mv a0, a1 1321; RV64-NEXT: ret 1322 %add = add i16 %b, %a 1323 %cmp = icmp ult i16 %add, %b 1324 %Q = select i1 %cmp, i16 %b, i16 42 1325 store i16 %add, ptr %res 1326 ret i16 %Q 1327} 1328