1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefixes=RV32 4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefixes=RV64 6 7;Copy tests from llvm/tests/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll 8;to test shouldFormOverflowOp on RISCV 9 10define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp { 11; RV32-LABEL: uaddo1_overflow_used: 12; RV32: # %bb.0: 13; RV32-NEXT: add a5, a3, a1 14; RV32-NEXT: add a4, a2, a0 15; RV32-NEXT: sltu a6, a4, a2 16; RV32-NEXT: add a5, a5, a6 17; RV32-NEXT: beq a5, a1, .LBB0_2 18; RV32-NEXT: # %bb.1: 19; RV32-NEXT: sltu a0, a5, a1 20; RV32-NEXT: beqz a0, .LBB0_3 21; RV32-NEXT: j .LBB0_4 22; RV32-NEXT: .LBB0_2: 23; RV32-NEXT: sltu a0, a4, a0 24; RV32-NEXT: bnez a0, .LBB0_4 25; RV32-NEXT: .LBB0_3: 26; RV32-NEXT: li a2, 42 27; RV32-NEXT: .LBB0_4: 28; RV32-NEXT: neg a1, a0 29; RV32-NEXT: and a1, a1, a3 30; RV32-NEXT: mv a0, a2 31; RV32-NEXT: ret 32; 33; RV64-LABEL: uaddo1_overflow_used: 34; RV64: # %bb.0: 35; RV64-NEXT: add a2, a1, a0 36; RV64-NEXT: bltu a2, a0, .LBB0_2 37; RV64-NEXT: # %bb.1: 38; RV64-NEXT: li a1, 42 39; RV64-NEXT: .LBB0_2: 40; RV64-NEXT: mv a0, a1 41; RV64-NEXT: ret 42 %add = add i64 %b, %a 43 %cmp = icmp ult i64 %add, %a 44 %Q = select i1 %cmp, i64 %b, i64 42 45 ret i64 %Q 46} 47 48define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 49; RV32-LABEL: uaddo1_math_overflow_used: 50; RV32: # %bb.0: 51; RV32-NEXT: add a5, a3, a1 52; RV32-NEXT: add a0, a2, a0 53; RV32-NEXT: sltu a1, a0, a2 54; RV32-NEXT: add a5, a5, a1 55; RV32-NEXT: beq a5, a3, .LBB1_2 56; RV32-NEXT: # %bb.1: 57; RV32-NEXT: sltu a1, a5, a3 58; RV32-NEXT: .LBB1_2: 59; RV32-NEXT: bnez a1, .LBB1_4 60; RV32-NEXT: # %bb.3: 61; RV32-NEXT: li a2, 42 62; RV32-NEXT: .LBB1_4: 63; RV32-NEXT: neg a1, a1 64; RV32-NEXT: and a1, a1, a3 65; RV32-NEXT: sw a0, 0(a4) 66; RV32-NEXT: sw a5, 4(a4) 67; RV32-NEXT: mv a0, a2 68; RV32-NEXT: ret 69; 70; RV64-LABEL: uaddo1_math_overflow_used: 71; RV64: # %bb.0: 72; RV64-NEXT: add a0, a1, a0 73; RV64-NEXT: bltu a0, a1, .LBB1_2 74; RV64-NEXT: # %bb.1: 75; RV64-NEXT: li a1, 42 76; RV64-NEXT: .LBB1_2: 77; RV64-NEXT: sd a0, 0(a2) 78; RV64-NEXT: mv a0, a1 79; RV64-NEXT: ret 80 %add = add i64 %b, %a 81 %cmp = icmp ult i64 %add, %a 82 %Q = select i1 %cmp, i64 %b, i64 42 83 store i64 %add, ptr %res 84 ret i64 %Q 85} 86 87define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp { 88; RV32-LABEL: uaddo2_overflow_used: 89; RV32: # %bb.0: 90; RV32-NEXT: add a1, a3, a1 91; RV32-NEXT: add a0, a2, a0 92; RV32-NEXT: sltu a0, a0, a2 93; RV32-NEXT: add a1, a1, a0 94; RV32-NEXT: beq a1, a3, .LBB2_2 95; RV32-NEXT: # %bb.1: 96; RV32-NEXT: sltu a0, a1, a3 97; RV32-NEXT: .LBB2_2: 98; RV32-NEXT: bnez a0, .LBB2_4 99; RV32-NEXT: # %bb.3: 100; RV32-NEXT: li a2, 42 101; RV32-NEXT: .LBB2_4: 102; RV32-NEXT: neg a1, a0 103; RV32-NEXT: and a1, a1, a3 104; RV32-NEXT: mv a0, a2 105; RV32-NEXT: ret 106; 107; RV64-LABEL: uaddo2_overflow_used: 108; RV64: # %bb.0: 109; RV64-NEXT: add a0, a1, a0 110; RV64-NEXT: bltu a0, a1, .LBB2_2 111; RV64-NEXT: # %bb.1: 112; RV64-NEXT: li a1, 42 113; RV64-NEXT: .LBB2_2: 114; RV64-NEXT: mv a0, a1 115; RV64-NEXT: ret 116 %add = add i64 %b, %a 117 %cmp = icmp ult i64 %add, %b 118 %Q = select i1 %cmp, i64 %b, i64 42 119 ret i64 %Q 120} 121 122define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 123; RV32-LABEL: uaddo2_math_overflow_used: 124; RV32: # %bb.0: 125; RV32-NEXT: add a5, a3, a1 126; RV32-NEXT: add a0, a2, a0 127; RV32-NEXT: sltu a1, a0, a2 128; RV32-NEXT: add a5, a5, a1 129; RV32-NEXT: beq a5, a3, .LBB3_2 130; RV32-NEXT: # %bb.1: 131; RV32-NEXT: sltu a1, a5, a3 132; RV32-NEXT: .LBB3_2: 133; RV32-NEXT: bnez a1, .LBB3_4 134; RV32-NEXT: # %bb.3: 135; RV32-NEXT: li a2, 42 136; RV32-NEXT: .LBB3_4: 137; RV32-NEXT: neg a1, a1 138; RV32-NEXT: and a1, a1, a3 139; RV32-NEXT: sw a0, 0(a4) 140; RV32-NEXT: sw a5, 4(a4) 141; RV32-NEXT: mv a0, a2 142; RV32-NEXT: ret 143; 144; RV64-LABEL: uaddo2_math_overflow_used: 145; RV64: # %bb.0: 146; RV64-NEXT: add a0, a1, a0 147; RV64-NEXT: bltu a0, a1, .LBB3_2 148; RV64-NEXT: # %bb.1: 149; RV64-NEXT: li a1, 42 150; RV64-NEXT: .LBB3_2: 151; RV64-NEXT: sd a0, 0(a2) 152; RV64-NEXT: mv a0, a1 153; RV64-NEXT: ret 154 %add = add i64 %b, %a 155 %cmp = icmp ult i64 %add, %b 156 %Q = select i1 %cmp, i64 %b, i64 42 157 store i64 %add, ptr %res 158 ret i64 %Q 159} 160 161define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp { 162; RV32-LABEL: uaddo3_overflow_used: 163; RV32: # %bb.0: 164; RV32-NEXT: add a1, a3, a1 165; RV32-NEXT: add a0, a2, a0 166; RV32-NEXT: sltu a0, a0, a2 167; RV32-NEXT: add a1, a1, a0 168; RV32-NEXT: beq a3, a1, .LBB4_2 169; RV32-NEXT: # %bb.1: 170; RV32-NEXT: sltu a0, a1, a3 171; RV32-NEXT: .LBB4_2: 172; RV32-NEXT: bnez a0, .LBB4_4 173; RV32-NEXT: # %bb.3: 174; RV32-NEXT: li a2, 42 175; RV32-NEXT: .LBB4_4: 176; RV32-NEXT: neg a1, a0 177; RV32-NEXT: and a1, a1, a3 178; RV32-NEXT: mv a0, a2 179; RV32-NEXT: ret 180; 181; RV64-LABEL: uaddo3_overflow_used: 182; RV64: # %bb.0: 183; RV64-NEXT: add a0, a1, a0 184; RV64-NEXT: bltu a0, a1, .LBB4_2 185; RV64-NEXT: # %bb.1: 186; RV64-NEXT: li a1, 42 187; RV64-NEXT: .LBB4_2: 188; RV64-NEXT: mv a0, a1 189; RV64-NEXT: ret 190 %add = add i64 %b, %a 191 %cmp = icmp ugt i64 %b, %add 192 %Q = select i1 %cmp, i64 %b, i64 42 193 ret i64 %Q 194} 195 196define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp { 197; RV32-LABEL: uaddo3_math_overflow_used: 198; RV32: # %bb.0: 199; RV32-NEXT: add a5, a3, a1 200; RV32-NEXT: add a0, a2, a0 201; RV32-NEXT: sltu a1, a0, a2 202; RV32-NEXT: add a5, a5, a1 203; RV32-NEXT: beq a5, a3, .LBB5_2 204; RV32-NEXT: # %bb.1: 205; RV32-NEXT: sltu a1, a5, a3 206; RV32-NEXT: .LBB5_2: 207; RV32-NEXT: bnez a1, .LBB5_4 208; RV32-NEXT: # %bb.3: 209; RV32-NEXT: li a2, 42 210; RV32-NEXT: .LBB5_4: 211; RV32-NEXT: neg a1, a1 212; RV32-NEXT: and a1, a1, a3 213; RV32-NEXT: sw a0, 0(a4) 214; RV32-NEXT: sw a5, 4(a4) 215; RV32-NEXT: mv a0, a2 216; RV32-NEXT: ret 217; 218; RV64-LABEL: uaddo3_math_overflow_used: 219; RV64: # %bb.0: 220; RV64-NEXT: add a0, a1, a0 221; RV64-NEXT: bltu a0, a1, .LBB5_2 222; RV64-NEXT: # %bb.1: 223; RV64-NEXT: li a1, 42 224; RV64-NEXT: .LBB5_2: 225; RV64-NEXT: sd a0, 0(a2) 226; RV64-NEXT: mv a0, a1 227; RV64-NEXT: ret 228 %add = add i64 %b, %a 229 %cmp = icmp ugt i64 %b, %add 230 %Q = select i1 %cmp, i64 %b, i64 42 231 store i64 %add, ptr %res 232 ret i64 %Q 233} 234 235; TODO? CGP sinks the compare before we have a chance to form the overflow intrinsic. 236 237define i64 @uaddo4(i64 %a, i64 %b, i1 %c) nounwind ssp { 238; RV32-LABEL: uaddo4: 239; RV32: # %bb.0: # %entry 240; RV32-NEXT: andi a4, a4, 1 241; RV32-NEXT: beqz a4, .LBB6_6 242; RV32-NEXT: # %bb.1: # %next 243; RV32-NEXT: add a1, a3, a1 244; RV32-NEXT: add a0, a2, a0 245; RV32-NEXT: sltu a0, a0, a2 246; RV32-NEXT: add a1, a1, a0 247; RV32-NEXT: beq a3, a1, .LBB6_3 248; RV32-NEXT: # %bb.2: # %next 249; RV32-NEXT: sltu a0, a1, a3 250; RV32-NEXT: .LBB6_3: # %next 251; RV32-NEXT: bnez a0, .LBB6_5 252; RV32-NEXT: # %bb.4: # %next 253; RV32-NEXT: li a2, 42 254; RV32-NEXT: .LBB6_5: # %next 255; RV32-NEXT: neg a1, a0 256; RV32-NEXT: and a1, a1, a3 257; RV32-NEXT: mv a0, a2 258; RV32-NEXT: ret 259; RV32-NEXT: .LBB6_6: # %exit 260; RV32-NEXT: li a0, 0 261; RV32-NEXT: li a1, 0 262; RV32-NEXT: ret 263; 264; RV64-LABEL: uaddo4: 265; RV64: # %bb.0: # %entry 266; RV64-NEXT: andi a2, a2, 1 267; RV64-NEXT: beqz a2, .LBB6_4 268; RV64-NEXT: # %bb.1: # %next 269; RV64-NEXT: add a0, a1, a0 270; RV64-NEXT: bltu a0, a1, .LBB6_3 271; RV64-NEXT: # %bb.2: # %next 272; RV64-NEXT: li a1, 42 273; RV64-NEXT: .LBB6_3: # %next 274; RV64-NEXT: mv a0, a1 275; RV64-NEXT: ret 276; RV64-NEXT: .LBB6_4: # %exit 277; RV64-NEXT: li a0, 0 278; RV64-NEXT: ret 279entry: 280 %add = add i64 %b, %a 281 %cmp = icmp ugt i64 %b, %add 282 br i1 %c, label %next, label %exit 283 284next: 285 %Q = select i1 %cmp, i64 %b, i64 42 286 ret i64 %Q 287 288exit: 289 ret i64 0 290} 291 292define i64 @uaddo5(i64 %a, i64 %b, ptr %ptr, i1 %c) nounwind ssp { 293; RV32-LABEL: uaddo5: 294; RV32: # %bb.0: # %entry 295; RV32-NEXT: andi a5, a5, 1 296; RV32-NEXT: add a1, a3, a1 297; RV32-NEXT: add a6, a2, a0 298; RV32-NEXT: sltu a0, a6, a2 299; RV32-NEXT: add a1, a1, a0 300; RV32-NEXT: sw a6, 0(a4) 301; RV32-NEXT: sw a1, 4(a4) 302; RV32-NEXT: beqz a5, .LBB7_6 303; RV32-NEXT: # %bb.1: # %next 304; RV32-NEXT: beq a3, a1, .LBB7_3 305; RV32-NEXT: # %bb.2: # %next 306; RV32-NEXT: sltu a0, a1, a3 307; RV32-NEXT: .LBB7_3: # %next 308; RV32-NEXT: bnez a0, .LBB7_5 309; RV32-NEXT: # %bb.4: # %next 310; RV32-NEXT: li a2, 42 311; RV32-NEXT: .LBB7_5: # %next 312; RV32-NEXT: neg a1, a0 313; RV32-NEXT: and a1, a1, a3 314; RV32-NEXT: mv a0, a2 315; RV32-NEXT: ret 316; RV32-NEXT: .LBB7_6: # %exit 317; RV32-NEXT: li a0, 0 318; RV32-NEXT: li a1, 0 319; RV32-NEXT: ret 320; 321; RV64-LABEL: uaddo5: 322; RV64: # %bb.0: # %entry 323; RV64-NEXT: andi a3, a3, 1 324; RV64-NEXT: add a0, a1, a0 325; RV64-NEXT: sd a0, 0(a2) 326; RV64-NEXT: beqz a3, .LBB7_4 327; RV64-NEXT: # %bb.1: # %next 328; RV64-NEXT: bltu a0, a1, .LBB7_3 329; RV64-NEXT: # %bb.2: # %next 330; RV64-NEXT: li a1, 42 331; RV64-NEXT: .LBB7_3: # %next 332; RV64-NEXT: mv a0, a1 333; RV64-NEXT: ret 334; RV64-NEXT: .LBB7_4: # %exit 335; RV64-NEXT: li a0, 0 336; RV64-NEXT: ret 337entry: 338 %add = add i64 %b, %a 339 store i64 %add, ptr %ptr 340 %cmp = icmp ugt i64 %b, %add 341 br i1 %c, label %next, label %exit 342 343next: 344 %Q = select i1 %cmp, i64 %b, i64 42 345 ret i64 %Q 346 347exit: 348 ret i64 0 349} 350 351; Instcombine folds (a + b <u a) to (a ^ -1 <u b). Make sure we match this 352; pattern as well. 353define i64 @uaddo6_xor(i64 %a, i64 %b) { 354; RV32-LABEL: uaddo6_xor: 355; RV32: # %bb.0: 356; RV32-NEXT: not a1, a1 357; RV32-NEXT: beq a1, a3, .LBB8_2 358; RV32-NEXT: # %bb.1: 359; RV32-NEXT: sltu a0, a1, a3 360; RV32-NEXT: beqz a0, .LBB8_3 361; RV32-NEXT: j .LBB8_4 362; RV32-NEXT: .LBB8_2: 363; RV32-NEXT: not a0, a0 364; RV32-NEXT: sltu a0, a0, a2 365; RV32-NEXT: bnez a0, .LBB8_4 366; RV32-NEXT: .LBB8_3: 367; RV32-NEXT: li a2, 42 368; RV32-NEXT: .LBB8_4: 369; RV32-NEXT: neg a1, a0 370; RV32-NEXT: and a1, a1, a3 371; RV32-NEXT: mv a0, a2 372; RV32-NEXT: ret 373; 374; RV64-LABEL: uaddo6_xor: 375; RV64: # %bb.0: 376; RV64-NEXT: not a2, a0 377; RV64-NEXT: mv a0, a1 378; RV64-NEXT: bltu a2, a1, .LBB8_2 379; RV64-NEXT: # %bb.1: 380; RV64-NEXT: li a0, 42 381; RV64-NEXT: .LBB8_2: 382; RV64-NEXT: ret 383 %x = xor i64 %a, -1 384 %cmp = icmp ult i64 %x, %b 385 %Q = select i1 %cmp, i64 %b, i64 42 386 ret i64 %Q 387} 388 389define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { 390; RV32-LABEL: uaddo6_xor_commuted: 391; RV32: # %bb.0: 392; RV32-NEXT: not a1, a1 393; RV32-NEXT: beq a1, a3, .LBB9_2 394; RV32-NEXT: # %bb.1: 395; RV32-NEXT: sltu a0, a1, a3 396; RV32-NEXT: beqz a0, .LBB9_3 397; RV32-NEXT: j .LBB9_4 398; RV32-NEXT: .LBB9_2: 399; RV32-NEXT: not a0, a0 400; RV32-NEXT: sltu a0, a0, a2 401; RV32-NEXT: bnez a0, .LBB9_4 402; RV32-NEXT: .LBB9_3: 403; RV32-NEXT: li a2, 42 404; RV32-NEXT: .LBB9_4: 405; RV32-NEXT: neg a1, a0 406; RV32-NEXT: and a1, a1, a3 407; RV32-NEXT: mv a0, a2 408; RV32-NEXT: ret 409; 410; RV64-LABEL: uaddo6_xor_commuted: 411; RV64: # %bb.0: 412; RV64-NEXT: not a2, a0 413; RV64-NEXT: mv a0, a1 414; RV64-NEXT: bltu a2, a1, .LBB9_2 415; RV64-NEXT: # %bb.1: 416; RV64-NEXT: li a0, 42 417; RV64-NEXT: .LBB9_2: 418; RV64-NEXT: ret 419 %x = xor i64 %a, -1 420 %cmp = icmp ult i64 %x, %b 421 %Q = select i1 %cmp, i64 %b, i64 42 422 ret i64 %Q 423} 424 425declare void @use(i64) 426 427define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { 428; RV32-LABEL: uaddo6_xor_multi_use: 429; RV32: # %bb.0: 430; RV32-NEXT: addi sp, sp, -16 431; RV32-NEXT: .cfi_def_cfa_offset 16 432; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 433; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 434; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 435; RV32-NEXT: .cfi_offset ra, -4 436; RV32-NEXT: .cfi_offset s0, -8 437; RV32-NEXT: .cfi_offset s1, -12 438; RV32-NEXT: mv s0, a2 439; RV32-NEXT: not a1, a1 440; RV32-NEXT: not a0, a0 441; RV32-NEXT: beq a1, a3, .LBB10_2 442; RV32-NEXT: # %bb.1: 443; RV32-NEXT: sltu a2, a1, a3 444; RV32-NEXT: beqz a2, .LBB10_3 445; RV32-NEXT: j .LBB10_4 446; RV32-NEXT: .LBB10_2: 447; RV32-NEXT: sltu a2, a0, s0 448; RV32-NEXT: bnez a2, .LBB10_4 449; RV32-NEXT: .LBB10_3: 450; RV32-NEXT: li s0, 42 451; RV32-NEXT: .LBB10_4: 452; RV32-NEXT: neg s1, a2 453; RV32-NEXT: and s1, s1, a3 454; RV32-NEXT: call use 455; RV32-NEXT: mv a0, s0 456; RV32-NEXT: mv a1, s1 457; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 458; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 459; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 460; RV32-NEXT: .cfi_restore ra 461; RV32-NEXT: .cfi_restore s0 462; RV32-NEXT: .cfi_restore s1 463; RV32-NEXT: addi sp, sp, 16 464; RV32-NEXT: .cfi_def_cfa_offset 0 465; RV32-NEXT: ret 466; 467; RV64-LABEL: uaddo6_xor_multi_use: 468; RV64: # %bb.0: 469; RV64-NEXT: addi sp, sp, -16 470; RV64-NEXT: .cfi_def_cfa_offset 16 471; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 472; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 473; RV64-NEXT: .cfi_offset ra, -8 474; RV64-NEXT: .cfi_offset s0, -16 475; RV64-NEXT: not a0, a0 476; RV64-NEXT: mv s0, a1 477; RV64-NEXT: bltu a0, a1, .LBB10_2 478; RV64-NEXT: # %bb.1: 479; RV64-NEXT: li s0, 42 480; RV64-NEXT: .LBB10_2: 481; RV64-NEXT: call use 482; RV64-NEXT: mv a0, s0 483; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 484; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 485; RV64-NEXT: .cfi_restore ra 486; RV64-NEXT: .cfi_restore s0 487; RV64-NEXT: addi sp, sp, 16 488; RV64-NEXT: .cfi_def_cfa_offset 0 489; RV64-NEXT: ret 490 %x = xor i64 -1, %a 491 %cmp = icmp ult i64 %x, %b 492 %Q = select i1 %cmp, i64 %b, i64 42 493 call void @use(i64 %x) 494 ret i64 %Q 495} 496 497; Make sure we do not use the XOR binary operator as insert point, as it may 498; come before the second operand of the overflow intrinsic. 499define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) { 500; RV32-LABEL: uaddo6_xor_op_after_XOR: 501; RV32: # %bb.0: 502; RV32-NEXT: lw a1, 0(a1) 503; RV32-NEXT: not a0, a0 504; RV32-NEXT: sltu a0, a0, a1 505; RV32-NEXT: xori a0, a0, 1 506; RV32-NEXT: ret 507; 508; RV64-LABEL: uaddo6_xor_op_after_XOR: 509; RV64: # %bb.0: 510; RV64-NEXT: lw a1, 0(a1) 511; RV64-NEXT: not a0, a0 512; RV64-NEXT: sext.w a0, a0 513; RV64-NEXT: sltu a0, a0, a1 514; RV64-NEXT: xori a0, a0, 1 515; RV64-NEXT: ret 516 %x = xor i32 %a, -1 517 %b = load i32, ptr %b.ptr, align 8 518 %cmp14 = icmp ugt i32 %b, %x 519 %ov = xor i1 %cmp14, true 520 ret i1 %ov 521} 522 523; When adding 1, the general pattern for add-overflow may be different due to icmp canonicalization. 524; PR31754: https://bugs.llvm.org/show_bug.cgi?id=31754 525 526define i1 @uaddo_i64_increment(i64 %x, ptr %p) { 527; RV32-LABEL: uaddo_i64_increment: 528; RV32: # %bb.0: 529; RV32-NEXT: addi a3, a0, 1 530; RV32-NEXT: seqz a0, a3 531; RV32-NEXT: add a1, a1, a0 532; RV32-NEXT: or a0, a3, a1 533; RV32-NEXT: seqz a0, a0 534; RV32-NEXT: sw a3, 0(a2) 535; RV32-NEXT: sw a1, 4(a2) 536; RV32-NEXT: ret 537; 538; RV64-LABEL: uaddo_i64_increment: 539; RV64: # %bb.0: 540; RV64-NEXT: addi a2, a0, 1 541; RV64-NEXT: seqz a0, a2 542; RV64-NEXT: sd a2, 0(a1) 543; RV64-NEXT: ret 544 %a = add i64 %x, 1 545 %ov = icmp eq i64 %a, 0 546 store i64 %a, ptr %p 547 ret i1 %ov 548} 549 550define i1 @uaddo_i8_increment_noncanonical_1(i8 %x, ptr %p) { 551; RV32-LABEL: uaddo_i8_increment_noncanonical_1: 552; RV32: # %bb.0: 553; RV32-NEXT: addi a2, a0, 1 554; RV32-NEXT: andi a0, a2, 255 555; RV32-NEXT: seqz a0, a0 556; RV32-NEXT: sb a2, 0(a1) 557; RV32-NEXT: ret 558; 559; RV64-LABEL: uaddo_i8_increment_noncanonical_1: 560; RV64: # %bb.0: 561; RV64-NEXT: addi a2, a0, 1 562; RV64-NEXT: andi a0, a2, 255 563; RV64-NEXT: seqz a0, a0 564; RV64-NEXT: sb a2, 0(a1) 565; RV64-NEXT: ret 566 %a = add i8 1, %x ; commute 567 %ov = icmp eq i8 %a, 0 568 store i8 %a, ptr %p 569 ret i1 %ov 570} 571 572define i1 @uaddo_i32_increment_noncanonical_2(i32 %x, ptr %p) { 573; RV32-LABEL: uaddo_i32_increment_noncanonical_2: 574; RV32: # %bb.0: 575; RV32-NEXT: addi a2, a0, 1 576; RV32-NEXT: seqz a0, a2 577; RV32-NEXT: sw a2, 0(a1) 578; RV32-NEXT: ret 579; 580; RV64-LABEL: uaddo_i32_increment_noncanonical_2: 581; RV64: # %bb.0: 582; RV64-NEXT: addiw a2, a0, 1 583; RV64-NEXT: seqz a0, a2 584; RV64-NEXT: sw a2, 0(a1) 585; RV64-NEXT: ret 586 %a = add i32 %x, 1 587 %ov = icmp eq i32 0, %a ; commute 588 store i32 %a, ptr %p 589 ret i1 %ov 590} 591 592define i1 @uaddo_i16_increment_noncanonical_3(i16 %x, ptr %p) { 593; RV32-LABEL: uaddo_i16_increment_noncanonical_3: 594; RV32: # %bb.0: 595; RV32-NEXT: addi a2, a0, 1 596; RV32-NEXT: slli a0, a2, 16 597; RV32-NEXT: srli a0, a0, 16 598; RV32-NEXT: seqz a0, a0 599; RV32-NEXT: sh a2, 0(a1) 600; RV32-NEXT: ret 601; 602; RV64-LABEL: uaddo_i16_increment_noncanonical_3: 603; RV64: # %bb.0: 604; RV64-NEXT: addi a2, a0, 1 605; RV64-NEXT: slli a0, a2, 48 606; RV64-NEXT: srli a0, a0, 48 607; RV64-NEXT: seqz a0, a0 608; RV64-NEXT: sh a2, 0(a1) 609; RV64-NEXT: ret 610 %a = add i16 1, %x ; commute 611 %ov = icmp eq i16 0, %a ; commute 612 store i16 %a, ptr %p 613 ret i1 %ov 614} 615 616; The overflow check may be against the input rather than the sum. 617 618define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) { 619; RV32-LABEL: uaddo_i64_increment_alt: 620; RV32: # %bb.0: 621; RV32-NEXT: addi a3, a0, 1 622; RV32-NEXT: seqz a0, a3 623; RV32-NEXT: add a1, a1, a0 624; RV32-NEXT: or a0, a3, a1 625; RV32-NEXT: seqz a0, a0 626; RV32-NEXT: sw a3, 0(a2) 627; RV32-NEXT: sw a1, 4(a2) 628; RV32-NEXT: ret 629; 630; RV64-LABEL: uaddo_i64_increment_alt: 631; RV64: # %bb.0: 632; RV64-NEXT: addi a2, a0, 1 633; RV64-NEXT: seqz a0, a2 634; RV64-NEXT: sd a2, 0(a1) 635; RV64-NEXT: ret 636 %a = add i64 %x, 1 637 store i64 %a, ptr %p 638 %ov = icmp eq i64 %x, -1 639 ret i1 %ov 640} 641 642; Make sure insertion is done correctly based on dominance. 643 644define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) { 645; RV32-LABEL: uaddo_i64_increment_alt_dom: 646; RV32: # %bb.0: 647; RV32-NEXT: addi a3, a0, 1 648; RV32-NEXT: seqz a0, a3 649; RV32-NEXT: add a1, a1, a0 650; RV32-NEXT: or a0, a3, a1 651; RV32-NEXT: seqz a0, a0 652; RV32-NEXT: sw a3, 0(a2) 653; RV32-NEXT: sw a1, 4(a2) 654; RV32-NEXT: ret 655; 656; RV64-LABEL: uaddo_i64_increment_alt_dom: 657; RV64: # %bb.0: 658; RV64-NEXT: addi a2, a0, 1 659; RV64-NEXT: seqz a0, a2 660; RV64-NEXT: sd a2, 0(a1) 661; RV64-NEXT: ret 662 %ov = icmp eq i64 %x, -1 663 %a = add i64 %x, 1 664 store i64 %a, ptr %p 665 ret i1 %ov 666} 667 668; The overflow check may be against the input rather than the sum. 669 670define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) { 671; RV32-LABEL: uaddo_i32_decrement_alt: 672; RV32: # %bb.0: 673; RV32-NEXT: snez a2, a0 674; RV32-NEXT: addi a0, a0, -1 675; RV32-NEXT: sw a0, 0(a1) 676; RV32-NEXT: mv a0, a2 677; RV32-NEXT: ret 678; 679; RV64-LABEL: uaddo_i32_decrement_alt: 680; RV64: # %bb.0: 681; RV64-NEXT: snez a2, a0 682; RV64-NEXT: addi a0, a0, -1 683; RV64-NEXT: sw a0, 0(a1) 684; RV64-NEXT: mv a0, a2 685; RV64-NEXT: ret 686 %a = add i32 %x, -1 687 store i32 %a, ptr %p 688 %ov = icmp ne i32 %x, 0 689 ret i1 %ov 690} 691 692define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) { 693; RV32-LABEL: uaddo_i64_decrement_alt: 694; RV32: # %bb.0: 695; RV32-NEXT: or a3, a0, a1 696; RV32-NEXT: seqz a4, a0 697; RV32-NEXT: addi a5, a0, -1 698; RV32-NEXT: snez a0, a3 699; RV32-NEXT: sub a1, a1, a4 700; RV32-NEXT: sw a5, 0(a2) 701; RV32-NEXT: sw a1, 4(a2) 702; RV32-NEXT: ret 703; 704; RV64-LABEL: uaddo_i64_decrement_alt: 705; RV64: # %bb.0: 706; RV64-NEXT: snez a2, a0 707; RV64-NEXT: addi a0, a0, -1 708; RV64-NEXT: sd a0, 0(a1) 709; RV64-NEXT: mv a0, a2 710; RV64-NEXT: ret 711 %a = add i64 %x, -1 712 store i64 %a, ptr %p 713 %ov = icmp ne i64 %x, 0 714 ret i1 %ov 715} 716 717; Make sure insertion is done correctly based on dominance. 718 719define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) { 720; RV32-LABEL: uaddo_i64_decrement_alt_dom: 721; RV32: # %bb.0: 722; RV32-NEXT: or a3, a0, a1 723; RV32-NEXT: seqz a4, a0 724; RV32-NEXT: addi a5, a0, -1 725; RV32-NEXT: snez a0, a3 726; RV32-NEXT: sub a1, a1, a4 727; RV32-NEXT: sw a5, 0(a2) 728; RV32-NEXT: sw a1, 4(a2) 729; RV32-NEXT: ret 730; 731; RV64-LABEL: uaddo_i64_decrement_alt_dom: 732; RV64: # %bb.0: 733; RV64-NEXT: snez a2, a0 734; RV64-NEXT: addi a0, a0, -1 735; RV64-NEXT: sd a0, 0(a1) 736; RV64-NEXT: mv a0, a2 737; RV64-NEXT: ret 738 %ov = icmp ne i64 %x, 0 739 %a = add i64 %x, -1 740 store i64 %a, ptr %p 741 ret i1 %ov 742} 743 744; No transform for illegal types. 745 746define i1 @uaddo_i42_increment_illegal_type(i42 %x, ptr %p) { 747; RV32-LABEL: uaddo_i42_increment_illegal_type: 748; RV32: # %bb.0: 749; RV32-NEXT: addi a3, a0, 1 750; RV32-NEXT: seqz a0, a3 751; RV32-NEXT: add a0, a1, a0 752; RV32-NEXT: andi a1, a0, 1023 753; RV32-NEXT: or a0, a3, a1 754; RV32-NEXT: seqz a0, a0 755; RV32-NEXT: sw a3, 0(a2) 756; RV32-NEXT: sh a1, 4(a2) 757; RV32-NEXT: ret 758; 759; RV64-LABEL: uaddo_i42_increment_illegal_type: 760; RV64: # %bb.0: 761; RV64-NEXT: addi a2, a0, 1 762; RV64-NEXT: slli a0, a2, 22 763; RV64-NEXT: srli a3, a0, 22 764; RV64-NEXT: seqz a0, a3 765; RV64-NEXT: srli a3, a3, 32 766; RV64-NEXT: sw a2, 0(a1) 767; RV64-NEXT: sh a3, 4(a1) 768; RV64-NEXT: ret 769 %a = add i42 %x, 1 770 %ov = icmp eq i42 %a, 0 771 store i42 %a, ptr %p 772 ret i1 %ov 773} 774 775define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) { 776; RV32-LABEL: usubo_ult_i64_overflow_used: 777; RV32: # %bb.0: 778; RV32-NEXT: beq a1, a3, .LBB22_2 779; RV32-NEXT: # %bb.1: 780; RV32-NEXT: sltu a0, a1, a3 781; RV32-NEXT: ret 782; RV32-NEXT: .LBB22_2: 783; RV32-NEXT: sltu a0, a0, a2 784; RV32-NEXT: ret 785; 786; RV64-LABEL: usubo_ult_i64_overflow_used: 787; RV64: # %bb.0: 788; RV64-NEXT: sltu a0, a0, a1 789; RV64-NEXT: ret 790 %s = sub i64 %x, %y 791 %ov = icmp ult i64 %x, %y 792 ret i1 %ov 793} 794 795define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { 796; RV32-LABEL: usubo_ult_i64_math_overflow_used: 797; RV32: # %bb.0: 798; RV32-NEXT: mv a5, a0 799; RV32-NEXT: sltu a0, a0, a2 800; RV32-NEXT: sub a6, a1, a3 801; RV32-NEXT: sub a5, a5, a2 802; RV32-NEXT: sub a2, a6, a0 803; RV32-NEXT: sw a5, 0(a4) 804; RV32-NEXT: sw a2, 4(a4) 805; RV32-NEXT: beq a1, a3, .LBB23_2 806; RV32-NEXT: # %bb.1: 807; RV32-NEXT: sltu a0, a1, a3 808; RV32-NEXT: .LBB23_2: 809; RV32-NEXT: ret 810; 811; RV64-LABEL: usubo_ult_i64_math_overflow_used: 812; RV64: # %bb.0: 813; RV64-NEXT: sub a3, a0, a1 814; RV64-NEXT: sltu a0, a0, a1 815; RV64-NEXT: sd a3, 0(a2) 816; RV64-NEXT: ret 817 %s = sub i64 %x, %y 818 store i64 %s, ptr %p 819 %ov = icmp ult i64 %x, %y 820 ret i1 %ov 821} 822 823; Verify insertion point for single-BB. Toggle predicate. 824 825define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) { 826; RV32-LABEL: usubo_ugt_i32: 827; RV32: # %bb.0: 828; RV32-NEXT: sltu a3, a0, a1 829; RV32-NEXT: sub a0, a0, a1 830; RV32-NEXT: sw a0, 0(a2) 831; RV32-NEXT: mv a0, a3 832; RV32-NEXT: ret 833; 834; RV64-LABEL: usubo_ugt_i32: 835; RV64: # %bb.0: 836; RV64-NEXT: sext.w a3, a1 837; RV64-NEXT: sext.w a4, a0 838; RV64-NEXT: sltu a3, a4, a3 839; RV64-NEXT: subw a0, a0, a1 840; RV64-NEXT: sw a0, 0(a2) 841; RV64-NEXT: mv a0, a3 842; RV64-NEXT: ret 843 %ov = icmp ugt i32 %y, %x 844 %s = sub i32 %x, %y 845 store i32 %s, ptr %p 846 ret i1 %ov 847} 848 849; Constant operand should match. 850 851define i1 @usubo_ugt_constant_op0_i8(i8 %x, ptr %p) { 852; RV32-LABEL: usubo_ugt_constant_op0_i8: 853; RV32: # %bb.0: 854; RV32-NEXT: andi a2, a0, 255 855; RV32-NEXT: li a3, 42 856; RV32-NEXT: sub a3, a3, a0 857; RV32-NEXT: sltiu a0, a2, 43 858; RV32-NEXT: xori a0, a0, 1 859; RV32-NEXT: sb a3, 0(a1) 860; RV32-NEXT: ret 861; 862; RV64-LABEL: usubo_ugt_constant_op0_i8: 863; RV64: # %bb.0: 864; RV64-NEXT: andi a2, a0, 255 865; RV64-NEXT: li a3, 42 866; RV64-NEXT: subw a3, a3, a0 867; RV64-NEXT: sltiu a0, a2, 43 868; RV64-NEXT: xori a0, a0, 1 869; RV64-NEXT: sb a3, 0(a1) 870; RV64-NEXT: ret 871 %s = sub i8 42, %x 872 %ov = icmp ugt i8 %x, 42 873 store i8 %s, ptr %p 874 ret i1 %ov 875} 876 877; Compare with constant operand 0 is canonicalized by commuting, but verify match for non-canonical form. 878 879define i1 @usubo_ult_constant_op0_i16(i16 %x, ptr %p) { 880; RV32-LABEL: usubo_ult_constant_op0_i16: 881; RV32: # %bb.0: 882; RV32-NEXT: slli a2, a0, 16 883; RV32-NEXT: li a3, 43 884; RV32-NEXT: srli a2, a2, 16 885; RV32-NEXT: sub a3, a3, a0 886; RV32-NEXT: sltiu a0, a2, 44 887; RV32-NEXT: xori a0, a0, 1 888; RV32-NEXT: sh a3, 0(a1) 889; RV32-NEXT: ret 890; 891; RV64-LABEL: usubo_ult_constant_op0_i16: 892; RV64: # %bb.0: 893; RV64-NEXT: slli a2, a0, 48 894; RV64-NEXT: li a3, 43 895; RV64-NEXT: srli a2, a2, 48 896; RV64-NEXT: subw a3, a3, a0 897; RV64-NEXT: sltiu a0, a2, 44 898; RV64-NEXT: xori a0, a0, 1 899; RV64-NEXT: sh a3, 0(a1) 900; RV64-NEXT: ret 901 %s = sub i16 43, %x 902 %ov = icmp ult i16 43, %x 903 store i16 %s, ptr %p 904 ret i1 %ov 905} 906 907; Subtract with constant operand 1 is canonicalized to add. 908 909define i1 @usubo_ult_constant_op1_i16(i16 %x, ptr %p) { 910; RV32-LABEL: usubo_ult_constant_op1_i16: 911; RV32: # %bb.0: 912; RV32-NEXT: slli a2, a0, 16 913; RV32-NEXT: srli a2, a2, 16 914; RV32-NEXT: addi a3, a0, -44 915; RV32-NEXT: sltiu a0, a2, 44 916; RV32-NEXT: sh a3, 0(a1) 917; RV32-NEXT: ret 918; 919; RV64-LABEL: usubo_ult_constant_op1_i16: 920; RV64: # %bb.0: 921; RV64-NEXT: slli a2, a0, 48 922; RV64-NEXT: srli a2, a2, 48 923; RV64-NEXT: addi a3, a0, -44 924; RV64-NEXT: sltiu a0, a2, 44 925; RV64-NEXT: sh a3, 0(a1) 926; RV64-NEXT: ret 927 %s = add i16 %x, -44 928 %ov = icmp ult i16 %x, 44 929 store i16 %s, ptr %p 930 ret i1 %ov 931} 932 933define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) { 934; RV32-LABEL: usubo_ugt_constant_op1_i8: 935; RV32: # %bb.0: 936; RV32-NEXT: andi a2, a0, 255 937; RV32-NEXT: sltiu a2, a2, 45 938; RV32-NEXT: addi a0, a0, -45 939; RV32-NEXT: sb a0, 0(a1) 940; RV32-NEXT: mv a0, a2 941; RV32-NEXT: ret 942; 943; RV64-LABEL: usubo_ugt_constant_op1_i8: 944; RV64: # %bb.0: 945; RV64-NEXT: andi a2, a0, 255 946; RV64-NEXT: sltiu a2, a2, 45 947; RV64-NEXT: addi a0, a0, -45 948; RV64-NEXT: sb a0, 0(a1) 949; RV64-NEXT: mv a0, a2 950; RV64-NEXT: ret 951 %ov = icmp ugt i8 45, %x 952 %s = add i8 %x, -45 953 store i8 %s, ptr %p 954 ret i1 %ov 955} 956 957; Special-case: subtract 1 changes the compare predicate and constant. 958 959define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) { 960; RV32-LABEL: usubo_eq_constant1_op1_i32: 961; RV32: # %bb.0: 962; RV32-NEXT: addi a2, a0, -1 963; RV32-NEXT: seqz a0, a0 964; RV32-NEXT: sw a2, 0(a1) 965; RV32-NEXT: ret 966; 967; RV64-LABEL: usubo_eq_constant1_op1_i32: 968; RV64: # %bb.0: 969; RV64-NEXT: sext.w a2, a0 970; RV64-NEXT: addi a3, a0, -1 971; RV64-NEXT: seqz a0, a2 972; RV64-NEXT: sw a3, 0(a1) 973; RV64-NEXT: ret 974 %s = add i32 %x, -1 975 %ov = icmp eq i32 %x, 0 976 store i32 %s, ptr %p 977 ret i1 %ov 978} 979 980; Special-case: subtract from 0 (negate) changes the compare predicate. 981 982define i1 @usubo_ne_constant0_op1_i32(i32 %x, ptr %p) { 983; RV32-LABEL: usubo_ne_constant0_op1_i32: 984; RV32: # %bb.0: 985; RV32-NEXT: neg a2, a0 986; RV32-NEXT: snez a0, a0 987; RV32-NEXT: sw a2, 0(a1) 988; RV32-NEXT: ret 989; 990; RV64-LABEL: usubo_ne_constant0_op1_i32: 991; RV64: # %bb.0: 992; RV64-NEXT: sext.w a2, a0 993; RV64-NEXT: negw a3, a0 994; RV64-NEXT: snez a0, a2 995; RV64-NEXT: sw a3, 0(a1) 996; RV64-NEXT: ret 997 %s = sub i32 0, %x 998 %ov = icmp ne i32 %x, 0 999 store i32 %s, ptr %p 1000 ret i1 %ov 1001} 1002 1003; This used to verify insertion point for multi-BB, but now we just bail out. 1004 1005declare void @call(i1) 1006 1007define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { 1008; RV32-LABEL: usubo_ult_sub_dominates_i64: 1009; RV32: # %bb.0: # %entry 1010; RV32-NEXT: andi a6, a5, 1 1011; RV32-NEXT: beqz a6, .LBB31_5 1012; RV32-NEXT: # %bb.1: # %t 1013; RV32-NEXT: mv a7, a0 1014; RV32-NEXT: sltu a0, a0, a2 1015; RV32-NEXT: sub t0, a1, a3 1016; RV32-NEXT: sub a2, a7, a2 1017; RV32-NEXT: sub a7, t0, a0 1018; RV32-NEXT: sw a2, 0(a4) 1019; RV32-NEXT: sw a7, 4(a4) 1020; RV32-NEXT: beqz a6, .LBB31_5 1021; RV32-NEXT: # %bb.2: # %end 1022; RV32-NEXT: beq a1, a3, .LBB31_4 1023; RV32-NEXT: # %bb.3: # %end 1024; RV32-NEXT: sltu a0, a1, a3 1025; RV32-NEXT: .LBB31_4: # %end 1026; RV32-NEXT: ret 1027; RV32-NEXT: .LBB31_5: # %f 1028; RV32-NEXT: mv a0, a5 1029; RV32-NEXT: ret 1030; 1031; RV64-LABEL: usubo_ult_sub_dominates_i64: 1032; RV64: # %bb.0: # %entry 1033; RV64-NEXT: andi a4, a3, 1 1034; RV64-NEXT: beqz a4, .LBB31_3 1035; RV64-NEXT: # %bb.1: # %t 1036; RV64-NEXT: sub a5, a0, a1 1037; RV64-NEXT: sd a5, 0(a2) 1038; RV64-NEXT: beqz a4, .LBB31_3 1039; RV64-NEXT: # %bb.2: # %end 1040; RV64-NEXT: sltu a0, a0, a1 1041; RV64-NEXT: ret 1042; RV64-NEXT: .LBB31_3: # %f 1043; RV64-NEXT: mv a0, a3 1044; RV64-NEXT: ret 1045entry: 1046 br i1 %cond, label %t, label %f 1047 1048t: 1049 %s = sub i64 %x, %y 1050 store i64 %s, ptr %p 1051 br i1 %cond, label %end, label %f 1052 1053f: 1054 ret i1 %cond 1055 1056end: 1057 %ov = icmp ult i64 %x, %y 1058 ret i1 %ov 1059} 1060 1061define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { 1062; RV32-LABEL: usubo_ult_cmp_dominates_i64: 1063; RV32: # %bb.0: # %entry 1064; RV32-NEXT: addi sp, sp, -32 1065; RV32-NEXT: .cfi_def_cfa_offset 32 1066; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1067; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1068; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1069; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1070; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 1071; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill 1072; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill 1073; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill 1074; RV32-NEXT: .cfi_offset ra, -4 1075; RV32-NEXT: .cfi_offset s0, -8 1076; RV32-NEXT: .cfi_offset s1, -12 1077; RV32-NEXT: .cfi_offset s2, -16 1078; RV32-NEXT: .cfi_offset s3, -20 1079; RV32-NEXT: .cfi_offset s4, -24 1080; RV32-NEXT: .cfi_offset s5, -28 1081; RV32-NEXT: .cfi_offset s6, -32 1082; RV32-NEXT: mv s5, a5 1083; RV32-NEXT: mv s3, a1 1084; RV32-NEXT: andi a1, a5, 1 1085; RV32-NEXT: beqz a1, .LBB32_8 1086; RV32-NEXT: # %bb.1: # %t 1087; RV32-NEXT: mv s0, a4 1088; RV32-NEXT: mv s2, a3 1089; RV32-NEXT: mv s1, a2 1090; RV32-NEXT: mv s4, a0 1091; RV32-NEXT: beq s3, a3, .LBB32_3 1092; RV32-NEXT: # %bb.2: # %t 1093; RV32-NEXT: sltu s6, s3, s2 1094; RV32-NEXT: j .LBB32_4 1095; RV32-NEXT: .LBB32_3: 1096; RV32-NEXT: sltu s6, s4, s1 1097; RV32-NEXT: .LBB32_4: # %t 1098; RV32-NEXT: mv a0, s6 1099; RV32-NEXT: call call 1100; RV32-NEXT: beqz s6, .LBB32_8 1101; RV32-NEXT: # %bb.5: # %end 1102; RV32-NEXT: sltu a1, s4, s1 1103; RV32-NEXT: mv a0, a1 1104; RV32-NEXT: beq s3, s2, .LBB32_7 1105; RV32-NEXT: # %bb.6: # %end 1106; RV32-NEXT: sltu a0, s3, s2 1107; RV32-NEXT: .LBB32_7: # %end 1108; RV32-NEXT: sub a2, s3, s2 1109; RV32-NEXT: sub a3, s4, s1 1110; RV32-NEXT: sub a2, a2, a1 1111; RV32-NEXT: sw a3, 0(s0) 1112; RV32-NEXT: sw a2, 4(s0) 1113; RV32-NEXT: j .LBB32_9 1114; RV32-NEXT: .LBB32_8: # %f 1115; RV32-NEXT: mv a0, s5 1116; RV32-NEXT: .LBB32_9: # %f 1117; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1118; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1119; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1120; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1121; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 1122; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload 1123; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload 1124; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload 1125; RV32-NEXT: .cfi_restore ra 1126; RV32-NEXT: .cfi_restore s0 1127; RV32-NEXT: .cfi_restore s1 1128; RV32-NEXT: .cfi_restore s2 1129; RV32-NEXT: .cfi_restore s3 1130; RV32-NEXT: .cfi_restore s4 1131; RV32-NEXT: .cfi_restore s5 1132; RV32-NEXT: .cfi_restore s6 1133; RV32-NEXT: addi sp, sp, 32 1134; RV32-NEXT: .cfi_def_cfa_offset 0 1135; RV32-NEXT: ret 1136; 1137; RV64-LABEL: usubo_ult_cmp_dominates_i64: 1138; RV64: # %bb.0: # %entry 1139; RV64-NEXT: addi sp, sp, -48 1140; RV64-NEXT: .cfi_def_cfa_offset 48 1141; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 1142; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 1143; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 1144; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 1145; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 1146; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill 1147; RV64-NEXT: .cfi_offset ra, -8 1148; RV64-NEXT: .cfi_offset s0, -16 1149; RV64-NEXT: .cfi_offset s1, -24 1150; RV64-NEXT: .cfi_offset s2, -32 1151; RV64-NEXT: .cfi_offset s3, -40 1152; RV64-NEXT: .cfi_offset s4, -48 1153; RV64-NEXT: mv s0, a3 1154; RV64-NEXT: mv s2, a1 1155; RV64-NEXT: andi a1, a3, 1 1156; RV64-NEXT: beqz a1, .LBB32_3 1157; RV64-NEXT: # %bb.1: # %t 1158; RV64-NEXT: mv s1, a2 1159; RV64-NEXT: mv s3, a0 1160; RV64-NEXT: sltu s4, a0, s2 1161; RV64-NEXT: mv a0, s4 1162; RV64-NEXT: call call 1163; RV64-NEXT: bgeu s3, s2, .LBB32_3 1164; RV64-NEXT: # %bb.2: # %end 1165; RV64-NEXT: sub a0, s3, s2 1166; RV64-NEXT: sd a0, 0(s1) 1167; RV64-NEXT: mv a0, s4 1168; RV64-NEXT: j .LBB32_4 1169; RV64-NEXT: .LBB32_3: # %f 1170; RV64-NEXT: mv a0, s0 1171; RV64-NEXT: .LBB32_4: # %f 1172; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 1173; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 1174; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 1175; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 1176; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 1177; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload 1178; RV64-NEXT: .cfi_restore ra 1179; RV64-NEXT: .cfi_restore s0 1180; RV64-NEXT: .cfi_restore s1 1181; RV64-NEXT: .cfi_restore s2 1182; RV64-NEXT: .cfi_restore s3 1183; RV64-NEXT: .cfi_restore s4 1184; RV64-NEXT: addi sp, sp, 48 1185; RV64-NEXT: .cfi_def_cfa_offset 0 1186; RV64-NEXT: ret 1187entry: 1188 br i1 %cond, label %t, label %f 1189 1190t: 1191 %ov = icmp ult i64 %x, %y 1192 call void @call(i1 %ov) 1193 br i1 %ov, label %end, label %f 1194 1195f: 1196 ret i1 %cond 1197 1198end: 1199 %s = sub i64 %x, %y 1200 store i64 %s, ptr %p 1201 ret i1 %ov 1202} 1203 1204; Verify that crazy/non-canonical code does not crash. 1205 1206define void @bar() { 1207; RV32-LABEL: bar: 1208; RV32: # %bb.0: 1209; 1210; RV64-LABEL: bar: 1211; RV64: # %bb.0: 1212 %cmp = icmp eq i64 1, -1 1213 %frombool = zext i1 %cmp to i8 1214 unreachable 1215} 1216 1217define void @foo() { 1218; RV32-LABEL: foo: 1219; RV32: # %bb.0: 1220; 1221; RV64-LABEL: foo: 1222; RV64: # %bb.0: 1223 %sub = add nsw i64 1, 1 1224 %conv = trunc i64 %sub to i32 1225 unreachable 1226} 1227 1228; Similarly for usubo. 1229 1230define i1 @bar2() { 1231; RV32-LABEL: bar2: 1232; RV32: # %bb.0: 1233; RV32-NEXT: li a0, 0 1234; RV32-NEXT: ret 1235; 1236; RV64-LABEL: bar2: 1237; RV64: # %bb.0: 1238; RV64-NEXT: li a0, 0 1239; RV64-NEXT: ret 1240 %cmp = icmp eq i64 1, 0 1241 ret i1 %cmp 1242} 1243 1244define i64 @foo2(ptr %p) { 1245; RV32-LABEL: foo2: 1246; RV32: # %bb.0: 1247; RV32-NEXT: li a0, 0 1248; RV32-NEXT: li a1, 0 1249; RV32-NEXT: ret 1250; 1251; RV64-LABEL: foo2: 1252; RV64: # %bb.0: 1253; RV64-NEXT: li a0, 0 1254; RV64-NEXT: ret 1255 %sub = add nsw i64 1, -1 1256 ret i64 %sub 1257} 1258 1259; Avoid hoisting a math op into a dominating block which would 1260; increase the critical path. 1261 1262define void @PR41129(ptr %p64) { 1263; RV32-LABEL: PR41129: 1264; RV32: # %bb.0: # %entry 1265; RV32-NEXT: lw a1, 0(a0) 1266; RV32-NEXT: lw a2, 4(a0) 1267; RV32-NEXT: or a3, a1, a2 1268; RV32-NEXT: beqz a3, .LBB37_2 1269; RV32-NEXT: # %bb.1: # %false 1270; RV32-NEXT: andi a1, a1, 7 1271; RV32-NEXT: sw a1, 0(a0) 1272; RV32-NEXT: sw zero, 4(a0) 1273; RV32-NEXT: ret 1274; RV32-NEXT: .LBB37_2: # %true 1275; RV32-NEXT: seqz a3, a1 1276; RV32-NEXT: addi a1, a1, -1 1277; RV32-NEXT: sub a2, a2, a3 1278; RV32-NEXT: sw a1, 0(a0) 1279; RV32-NEXT: sw a2, 4(a0) 1280; RV32-NEXT: ret 1281; 1282; RV64-LABEL: PR41129: 1283; RV64: # %bb.0: # %entry 1284; RV64-NEXT: ld a1, 0(a0) 1285; RV64-NEXT: beqz a1, .LBB37_2 1286; RV64-NEXT: # %bb.1: # %false 1287; RV64-NEXT: andi a1, a1, 7 1288; RV64-NEXT: sd a1, 0(a0) 1289; RV64-NEXT: ret 1290; RV64-NEXT: .LBB37_2: # %true 1291; RV64-NEXT: addi a1, a1, -1 1292; RV64-NEXT: sd a1, 0(a0) 1293; RV64-NEXT: ret 1294entry: 1295 %key = load i64, ptr %p64, align 8 1296 %cond17 = icmp eq i64 %key, 0 1297 br i1 %cond17, label %true, label %false 1298 1299false: 1300 %andval = and i64 %key, 7 1301 store i64 %andval, ptr %p64 1302 br label %exit 1303 1304true: 1305 %svalue = add i64 %key, -1 1306 store i64 %svalue, ptr %p64 1307 br label %exit 1308 1309exit: 1310 ret void 1311} 1312 1313define i16 @overflow_not_used(i16 %a, i16 %b, ptr %res) { 1314; RV32-LABEL: overflow_not_used: 1315; RV32: # %bb.0: 1316; RV32-NEXT: lui a3, 16 1317; RV32-NEXT: add a0, a1, a0 1318; RV32-NEXT: addi a3, a3, -1 1319; RV32-NEXT: and a4, a1, a3 1320; RV32-NEXT: and a3, a0, a3 1321; RV32-NEXT: bltu a3, a4, .LBB38_2 1322; RV32-NEXT: # %bb.1: 1323; RV32-NEXT: li a1, 42 1324; RV32-NEXT: .LBB38_2: 1325; RV32-NEXT: sh a0, 0(a2) 1326; RV32-NEXT: mv a0, a1 1327; RV32-NEXT: ret 1328; 1329; RV64-LABEL: overflow_not_used: 1330; RV64: # %bb.0: 1331; RV64-NEXT: lui a3, 16 1332; RV64-NEXT: add a0, a1, a0 1333; RV64-NEXT: addiw a3, a3, -1 1334; RV64-NEXT: and a4, a1, a3 1335; RV64-NEXT: and a3, a0, a3 1336; RV64-NEXT: bltu a3, a4, .LBB38_2 1337; RV64-NEXT: # %bb.1: 1338; RV64-NEXT: li a1, 42 1339; RV64-NEXT: .LBB38_2: 1340; RV64-NEXT: sh a0, 0(a2) 1341; RV64-NEXT: mv a0, a1 1342; RV64-NEXT: ret 1343 %add = add i16 %b, %a 1344 %cmp = icmp ult i16 %add, %b 1345 %Q = select i1 %cmp, i16 %b, i16 42 1346 store i16 %add, ptr %res 1347 ret i16 %Q 1348} 1349