1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefix=RV32I %s 4; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefix=RV32IA %s 6; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefix=RV32IA %s 8; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefix=RV64I %s 10; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ 11; RUN: | FileCheck -check-prefix=RV64IA %s 12; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ 13; RUN: | FileCheck -check-prefix=RV64IA %s 14 15 16define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { 17; RV32I-LABEL: atomicrmw_usub_cond_i8: 18; RV32I: # %bb.0: 19; RV32I-NEXT: addi sp, sp, -32 20; RV32I-NEXT: .cfi_def_cfa_offset 32 21; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 22; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 23; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 24; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 25; RV32I-NEXT: .cfi_offset ra, -4 26; RV32I-NEXT: .cfi_offset s0, -8 27; RV32I-NEXT: .cfi_offset s1, -12 28; RV32I-NEXT: .cfi_offset s2, -16 29; RV32I-NEXT: mv s0, a0 30; RV32I-NEXT: lbu a3, 0(a0) 31; RV32I-NEXT: mv s1, a1 32; RV32I-NEXT: andi s2, a1, 255 33; RV32I-NEXT: .LBB0_1: # %atomicrmw.start 34; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 35; RV32I-NEXT: andi a0, a3, 255 36; RV32I-NEXT: sltu a0, a0, s2 37; RV32I-NEXT: addi a0, a0, -1 38; RV32I-NEXT: and a0, a0, s1 39; RV32I-NEXT: sub a2, a3, a0 40; RV32I-NEXT: sb a3, 15(sp) 41; RV32I-NEXT: addi a1, sp, 15 42; RV32I-NEXT: li a3, 5 43; RV32I-NEXT: li a4, 5 44; RV32I-NEXT: mv a0, s0 45; RV32I-NEXT: call __atomic_compare_exchange_1 46; RV32I-NEXT: lbu a3, 15(sp) 47; RV32I-NEXT: beqz a0, .LBB0_1 48; RV32I-NEXT: # %bb.2: # %atomicrmw.end 49; RV32I-NEXT: mv a0, a3 50; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 51; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 52; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 53; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 54; RV32I-NEXT: .cfi_restore ra 55; RV32I-NEXT: .cfi_restore s0 56; RV32I-NEXT: .cfi_restore s1 57; RV32I-NEXT: .cfi_restore s2 58; RV32I-NEXT: addi sp, sp, 32 59; RV32I-NEXT: .cfi_def_cfa_offset 0 60; RV32I-NEXT: ret 61; 62; RV32IA-LABEL: atomicrmw_usub_cond_i8: 63; RV32IA: # %bb.0: 64; RV32IA-NEXT: andi a2, a0, -4 65; RV32IA-NEXT: slli a3, a0, 3 66; RV32IA-NEXT: li a4, 255 67; RV32IA-NEXT: andi a0, a3, 24 68; RV32IA-NEXT: lw a5, 0(a2) 69; RV32IA-NEXT: sll a3, a4, a3 70; RV32IA-NEXT: not a3, a3 71; RV32IA-NEXT: andi a4, a1, 255 72; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start 73; RV32IA-NEXT: # =>This Loop Header: Depth=1 74; RV32IA-NEXT: # Child Loop BB0_3 Depth 2 75; RV32IA-NEXT: mv a6, a5 76; RV32IA-NEXT: srl a5, a5, a0 77; RV32IA-NEXT: andi a7, a5, 255 78; RV32IA-NEXT: sltu a7, a7, a4 79; RV32IA-NEXT: addi a7, a7, -1 80; RV32IA-NEXT: and a7, a7, a1 81; RV32IA-NEXT: sub a5, a5, a7 82; RV32IA-NEXT: andi a5, a5, 255 83; RV32IA-NEXT: sll a5, a5, a0 84; RV32IA-NEXT: and a7, a6, a3 85; RV32IA-NEXT: or a7, a7, a5 86; RV32IA-NEXT: .LBB0_3: # %atomicrmw.start 87; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1 88; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 89; RV32IA-NEXT: lr.w.aqrl a5, (a2) 90; RV32IA-NEXT: bne a5, a6, .LBB0_1 91; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 92; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2 93; RV32IA-NEXT: sc.w.rl t0, a7, (a2) 94; RV32IA-NEXT: bnez t0, .LBB0_3 95; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 96; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 97; RV32IA-NEXT: srl a0, a5, a0 98; RV32IA-NEXT: ret 99; 100; RV64I-LABEL: atomicrmw_usub_cond_i8: 101; RV64I: # %bb.0: 102; RV64I-NEXT: addi sp, sp, -48 103; RV64I-NEXT: .cfi_def_cfa_offset 48 104; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 105; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 106; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 107; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 108; RV64I-NEXT: .cfi_offset ra, -8 109; RV64I-NEXT: .cfi_offset s0, -16 110; RV64I-NEXT: .cfi_offset s1, -24 111; RV64I-NEXT: .cfi_offset s2, -32 112; RV64I-NEXT: mv s0, a0 113; RV64I-NEXT: lbu a3, 0(a0) 114; RV64I-NEXT: mv s1, a1 115; RV64I-NEXT: andi s2, a1, 255 116; RV64I-NEXT: .LBB0_1: # %atomicrmw.start 117; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 118; RV64I-NEXT: andi a0, a3, 255 119; RV64I-NEXT: sltu a0, a0, s2 120; RV64I-NEXT: addi a0, a0, -1 121; RV64I-NEXT: and a0, a0, s1 122; RV64I-NEXT: sub a2, a3, a0 123; RV64I-NEXT: sb a3, 15(sp) 124; RV64I-NEXT: addi a1, sp, 15 125; RV64I-NEXT: li a3, 5 126; RV64I-NEXT: li a4, 5 127; RV64I-NEXT: mv a0, s0 128; RV64I-NEXT: call __atomic_compare_exchange_1 129; RV64I-NEXT: lbu a3, 15(sp) 130; RV64I-NEXT: beqz a0, .LBB0_1 131; RV64I-NEXT: # %bb.2: # %atomicrmw.end 132; RV64I-NEXT: mv a0, a3 133; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 134; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 135; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 136; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 137; RV64I-NEXT: .cfi_restore ra 138; RV64I-NEXT: .cfi_restore s0 139; RV64I-NEXT: .cfi_restore s1 140; RV64I-NEXT: .cfi_restore s2 141; RV64I-NEXT: addi sp, sp, 48 142; RV64I-NEXT: .cfi_def_cfa_offset 0 143; RV64I-NEXT: ret 144; 145; RV64IA-LABEL: atomicrmw_usub_cond_i8: 146; RV64IA: # %bb.0: 147; RV64IA-NEXT: andi a2, a0, -4 148; RV64IA-NEXT: slli a4, a0, 3 149; RV64IA-NEXT: li a5, 255 150; RV64IA-NEXT: andi a0, a4, 24 151; RV64IA-NEXT: lw a3, 0(a2) 152; RV64IA-NEXT: sllw a4, a5, a4 153; RV64IA-NEXT: not a4, a4 154; RV64IA-NEXT: andi a5, a1, 255 155; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start 156; RV64IA-NEXT: # =>This Loop Header: Depth=1 157; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 158; RV64IA-NEXT: srlw a6, a3, a0 159; RV64IA-NEXT: sext.w a7, a3 160; RV64IA-NEXT: andi t0, a6, 255 161; RV64IA-NEXT: sltu t0, t0, a5 162; RV64IA-NEXT: addi t0, t0, -1 163; RV64IA-NEXT: and t0, t0, a1 164; RV64IA-NEXT: subw a6, a6, t0 165; RV64IA-NEXT: andi a6, a6, 255 166; RV64IA-NEXT: sllw a6, a6, a0 167; RV64IA-NEXT: and a3, a3, a4 168; RV64IA-NEXT: or a6, a3, a6 169; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start 170; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 171; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 172; RV64IA-NEXT: lr.w.aqrl a3, (a2) 173; RV64IA-NEXT: bne a3, a7, .LBB0_1 174; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 175; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 176; RV64IA-NEXT: sc.w.rl t0, a6, (a2) 177; RV64IA-NEXT: bnez t0, .LBB0_3 178; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 179; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 180; RV64IA-NEXT: srlw a0, a3, a0 181; RV64IA-NEXT: ret 182 %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst 183 ret i8 %result 184} 185 186define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { 187; RV32I-LABEL: atomicrmw_usub_cond_i16: 188; RV32I: # %bb.0: 189; RV32I-NEXT: addi sp, sp, -32 190; RV32I-NEXT: .cfi_def_cfa_offset 32 191; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 192; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 193; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 194; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 195; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 196; RV32I-NEXT: .cfi_offset ra, -4 197; RV32I-NEXT: .cfi_offset s0, -8 198; RV32I-NEXT: .cfi_offset s1, -12 199; RV32I-NEXT: .cfi_offset s2, -16 200; RV32I-NEXT: .cfi_offset s3, -20 201; RV32I-NEXT: mv s0, a1 202; RV32I-NEXT: mv s1, a0 203; RV32I-NEXT: lhu a1, 0(a0) 204; RV32I-NEXT: lui s2, 16 205; RV32I-NEXT: addi s2, s2, -1 206; RV32I-NEXT: and s3, s0, s2 207; RV32I-NEXT: .LBB1_1: # %atomicrmw.start 208; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 209; RV32I-NEXT: and a0, a1, s2 210; RV32I-NEXT: sltu a0, a0, s3 211; RV32I-NEXT: addi a0, a0, -1 212; RV32I-NEXT: and a0, a0, s0 213; RV32I-NEXT: sub a2, a1, a0 214; RV32I-NEXT: sh a1, 10(sp) 215; RV32I-NEXT: addi a1, sp, 10 216; RV32I-NEXT: li a3, 5 217; RV32I-NEXT: li a4, 5 218; RV32I-NEXT: mv a0, s1 219; RV32I-NEXT: call __atomic_compare_exchange_2 220; RV32I-NEXT: lh a1, 10(sp) 221; RV32I-NEXT: beqz a0, .LBB1_1 222; RV32I-NEXT: # %bb.2: # %atomicrmw.end 223; RV32I-NEXT: mv a0, a1 224; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 225; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 226; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 227; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 228; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 229; RV32I-NEXT: .cfi_restore ra 230; RV32I-NEXT: .cfi_restore s0 231; RV32I-NEXT: .cfi_restore s1 232; RV32I-NEXT: .cfi_restore s2 233; RV32I-NEXT: .cfi_restore s3 234; RV32I-NEXT: addi sp, sp, 32 235; RV32I-NEXT: .cfi_def_cfa_offset 0 236; RV32I-NEXT: ret 237; 238; RV32IA-LABEL: atomicrmw_usub_cond_i16: 239; RV32IA: # %bb.0: 240; RV32IA-NEXT: andi a2, a0, -4 241; RV32IA-NEXT: slli a4, a0, 3 242; RV32IA-NEXT: lui a3, 16 243; RV32IA-NEXT: andi a0, a4, 24 244; RV32IA-NEXT: addi a3, a3, -1 245; RV32IA-NEXT: lw a6, 0(a2) 246; RV32IA-NEXT: sll a4, a3, a4 247; RV32IA-NEXT: not a4, a4 248; RV32IA-NEXT: and a5, a1, a3 249; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start 250; RV32IA-NEXT: # =>This Loop Header: Depth=1 251; RV32IA-NEXT: # Child Loop BB1_3 Depth 2 252; RV32IA-NEXT: mv a7, a6 253; RV32IA-NEXT: srl a6, a6, a0 254; RV32IA-NEXT: and t0, a6, a3 255; RV32IA-NEXT: sltu t0, t0, a5 256; RV32IA-NEXT: addi t0, t0, -1 257; RV32IA-NEXT: and t0, t0, a1 258; RV32IA-NEXT: sub a6, a6, t0 259; RV32IA-NEXT: and a6, a6, a3 260; RV32IA-NEXT: sll a6, a6, a0 261; RV32IA-NEXT: and t0, a7, a4 262; RV32IA-NEXT: or t0, t0, a6 263; RV32IA-NEXT: .LBB1_3: # %atomicrmw.start 264; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1 265; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 266; RV32IA-NEXT: lr.w.aqrl a6, (a2) 267; RV32IA-NEXT: bne a6, a7, .LBB1_1 268; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 269; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2 270; RV32IA-NEXT: sc.w.rl t1, t0, (a2) 271; RV32IA-NEXT: bnez t1, .LBB1_3 272; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 273; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 274; RV32IA-NEXT: srl a0, a6, a0 275; RV32IA-NEXT: ret 276; 277; RV64I-LABEL: atomicrmw_usub_cond_i16: 278; RV64I: # %bb.0: 279; RV64I-NEXT: addi sp, sp, -48 280; RV64I-NEXT: .cfi_def_cfa_offset 48 281; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 282; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 283; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 284; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 285; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 286; RV64I-NEXT: .cfi_offset ra, -8 287; RV64I-NEXT: .cfi_offset s0, -16 288; RV64I-NEXT: .cfi_offset s1, -24 289; RV64I-NEXT: .cfi_offset s2, -32 290; RV64I-NEXT: .cfi_offset s3, -40 291; RV64I-NEXT: mv s0, a1 292; RV64I-NEXT: mv s1, a0 293; RV64I-NEXT: lhu a1, 0(a0) 294; RV64I-NEXT: lui s2, 16 295; RV64I-NEXT: addiw s2, s2, -1 296; RV64I-NEXT: and s3, s0, s2 297; RV64I-NEXT: .LBB1_1: # %atomicrmw.start 298; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 299; RV64I-NEXT: and a0, a1, s2 300; RV64I-NEXT: sltu a0, a0, s3 301; RV64I-NEXT: addi a0, a0, -1 302; RV64I-NEXT: and a0, a0, s0 303; RV64I-NEXT: sub a2, a1, a0 304; RV64I-NEXT: sh a1, 6(sp) 305; RV64I-NEXT: addi a1, sp, 6 306; RV64I-NEXT: li a3, 5 307; RV64I-NEXT: li a4, 5 308; RV64I-NEXT: mv a0, s1 309; RV64I-NEXT: call __atomic_compare_exchange_2 310; RV64I-NEXT: lh a1, 6(sp) 311; RV64I-NEXT: beqz a0, .LBB1_1 312; RV64I-NEXT: # %bb.2: # %atomicrmw.end 313; RV64I-NEXT: mv a0, a1 314; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 315; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 316; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 317; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 318; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 319; RV64I-NEXT: .cfi_restore ra 320; RV64I-NEXT: .cfi_restore s0 321; RV64I-NEXT: .cfi_restore s1 322; RV64I-NEXT: .cfi_restore s2 323; RV64I-NEXT: .cfi_restore s3 324; RV64I-NEXT: addi sp, sp, 48 325; RV64I-NEXT: .cfi_def_cfa_offset 0 326; RV64I-NEXT: ret 327; 328; RV64IA-LABEL: atomicrmw_usub_cond_i16: 329; RV64IA: # %bb.0: 330; RV64IA-NEXT: andi a2, a0, -4 331; RV64IA-NEXT: slli a5, a0, 3 332; RV64IA-NEXT: lui a3, 16 333; RV64IA-NEXT: andi a0, a5, 24 334; RV64IA-NEXT: addiw a3, a3, -1 335; RV64IA-NEXT: lw a4, 0(a2) 336; RV64IA-NEXT: sllw a5, a3, a5 337; RV64IA-NEXT: not a5, a5 338; RV64IA-NEXT: and a6, a1, a3 339; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start 340; RV64IA-NEXT: # =>This Loop Header: Depth=1 341; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 342; RV64IA-NEXT: srlw a7, a4, a0 343; RV64IA-NEXT: sext.w t0, a4 344; RV64IA-NEXT: and t1, a7, a3 345; RV64IA-NEXT: sltu t1, t1, a6 346; RV64IA-NEXT: addi t1, t1, -1 347; RV64IA-NEXT: and t1, t1, a1 348; RV64IA-NEXT: subw a7, a7, t1 349; RV64IA-NEXT: and a7, a7, a3 350; RV64IA-NEXT: sllw a7, a7, a0 351; RV64IA-NEXT: and a4, a4, a5 352; RV64IA-NEXT: or a7, a4, a7 353; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start 354; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 355; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 356; RV64IA-NEXT: lr.w.aqrl a4, (a2) 357; RV64IA-NEXT: bne a4, t0, .LBB1_1 358; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 359; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 360; RV64IA-NEXT: sc.w.rl t1, a7, (a2) 361; RV64IA-NEXT: bnez t1, .LBB1_3 362; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 363; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 364; RV64IA-NEXT: srlw a0, a4, a0 365; RV64IA-NEXT: ret 366 %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst 367 ret i16 %result 368} 369 370define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) { 371; RV32I-LABEL: atomicrmw_usub_cond_i32: 372; RV32I: # %bb.0: 373; RV32I-NEXT: addi sp, sp, -16 374; RV32I-NEXT: .cfi_def_cfa_offset 16 375; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 376; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 377; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 378; RV32I-NEXT: .cfi_offset ra, -4 379; RV32I-NEXT: .cfi_offset s0, -8 380; RV32I-NEXT: .cfi_offset s1, -12 381; RV32I-NEXT: mv s0, a0 382; RV32I-NEXT: lw a3, 0(a0) 383; RV32I-NEXT: mv s1, a1 384; RV32I-NEXT: .LBB2_1: # %atomicrmw.start 385; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 386; RV32I-NEXT: sltu a0, a3, s1 387; RV32I-NEXT: addi a0, a0, -1 388; RV32I-NEXT: and a0, a0, s1 389; RV32I-NEXT: sub a2, a3, a0 390; RV32I-NEXT: sw a3, 0(sp) 391; RV32I-NEXT: mv a1, sp 392; RV32I-NEXT: li a3, 5 393; RV32I-NEXT: li a4, 5 394; RV32I-NEXT: mv a0, s0 395; RV32I-NEXT: call __atomic_compare_exchange_4 396; RV32I-NEXT: lw a3, 0(sp) 397; RV32I-NEXT: beqz a0, .LBB2_1 398; RV32I-NEXT: # %bb.2: # %atomicrmw.end 399; RV32I-NEXT: mv a0, a3 400; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 401; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 402; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 403; RV32I-NEXT: .cfi_restore ra 404; RV32I-NEXT: .cfi_restore s0 405; RV32I-NEXT: .cfi_restore s1 406; RV32I-NEXT: addi sp, sp, 16 407; RV32I-NEXT: .cfi_def_cfa_offset 0 408; RV32I-NEXT: ret 409; 410; RV32IA-LABEL: atomicrmw_usub_cond_i32: 411; RV32IA: # %bb.0: 412; RV32IA-NEXT: lw a2, 0(a0) 413; RV32IA-NEXT: .LBB2_1: # %atomicrmw.start 414; RV32IA-NEXT: # =>This Loop Header: Depth=1 415; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 416; RV32IA-NEXT: mv a3, a2 417; RV32IA-NEXT: sltu a2, a2, a1 418; RV32IA-NEXT: addi a2, a2, -1 419; RV32IA-NEXT: and a2, a2, a1 420; RV32IA-NEXT: sub a4, a3, a2 421; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start 422; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 423; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 424; RV32IA-NEXT: lr.w.aqrl a2, (a0) 425; RV32IA-NEXT: bne a2, a3, .LBB2_1 426; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 427; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2 428; RV32IA-NEXT: sc.w.rl a5, a4, (a0) 429; RV32IA-NEXT: bnez a5, .LBB2_3 430; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 431; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 432; RV32IA-NEXT: mv a0, a2 433; RV32IA-NEXT: ret 434; 435; RV64I-LABEL: atomicrmw_usub_cond_i32: 436; RV64I: # %bb.0: 437; RV64I-NEXT: addi sp, sp, -48 438; RV64I-NEXT: .cfi_def_cfa_offset 48 439; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 440; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 441; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 442; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 443; RV64I-NEXT: .cfi_offset ra, -8 444; RV64I-NEXT: .cfi_offset s0, -16 445; RV64I-NEXT: .cfi_offset s1, -24 446; RV64I-NEXT: .cfi_offset s2, -32 447; RV64I-NEXT: mv s0, a0 448; RV64I-NEXT: lw a3, 0(a0) 449; RV64I-NEXT: mv s1, a1 450; RV64I-NEXT: sext.w s2, a1 451; RV64I-NEXT: .LBB2_1: # %atomicrmw.start 452; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 453; RV64I-NEXT: sltu a0, a3, s2 454; RV64I-NEXT: addi a0, a0, -1 455; RV64I-NEXT: and a0, a0, s1 456; RV64I-NEXT: subw a2, a3, a0 457; RV64I-NEXT: sw a3, 12(sp) 458; RV64I-NEXT: addi a1, sp, 12 459; RV64I-NEXT: li a3, 5 460; RV64I-NEXT: li a4, 5 461; RV64I-NEXT: mv a0, s0 462; RV64I-NEXT: call __atomic_compare_exchange_4 463; RV64I-NEXT: lw a3, 12(sp) 464; RV64I-NEXT: beqz a0, .LBB2_1 465; RV64I-NEXT: # %bb.2: # %atomicrmw.end 466; RV64I-NEXT: mv a0, a3 467; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 468; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 469; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 470; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 471; RV64I-NEXT: .cfi_restore ra 472; RV64I-NEXT: .cfi_restore s0 473; RV64I-NEXT: .cfi_restore s1 474; RV64I-NEXT: .cfi_restore s2 475; RV64I-NEXT: addi sp, sp, 48 476; RV64I-NEXT: .cfi_def_cfa_offset 0 477; RV64I-NEXT: ret 478; 479; RV64IA-LABEL: atomicrmw_usub_cond_i32: 480; RV64IA: # %bb.0: 481; RV64IA-NEXT: lw a2, 0(a0) 482; RV64IA-NEXT: sext.w a3, a1 483; RV64IA-NEXT: .LBB2_1: # %atomicrmw.start 484; RV64IA-NEXT: # =>This Loop Header: Depth=1 485; RV64IA-NEXT: # Child Loop BB2_3 Depth 2 486; RV64IA-NEXT: sext.w a4, a2 487; RV64IA-NEXT: sltu a5, a4, a3 488; RV64IA-NEXT: addi a5, a5, -1 489; RV64IA-NEXT: and a5, a5, a1 490; RV64IA-NEXT: subw a5, a2, a5 491; RV64IA-NEXT: .LBB2_3: # %atomicrmw.start 492; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1 493; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 494; RV64IA-NEXT: lr.w.aqrl a2, (a0) 495; RV64IA-NEXT: bne a2, a4, .LBB2_1 496; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 497; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2 498; RV64IA-NEXT: sc.w.rl a6, a5, (a0) 499; RV64IA-NEXT: bnez a6, .LBB2_3 500; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 501; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 502; RV64IA-NEXT: mv a0, a2 503; RV64IA-NEXT: ret 504 %result = atomicrmw usub_cond ptr %ptr, i32 %val seq_cst 505 ret i32 %result 506} 507 508define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { 509; RV32I-LABEL: atomicrmw_usub_cond_i64: 510; RV32I: # %bb.0: 511; RV32I-NEXT: addi sp, sp, -32 512; RV32I-NEXT: .cfi_def_cfa_offset 32 513; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 514; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 515; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 516; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 517; RV32I-NEXT: .cfi_offset ra, -4 518; RV32I-NEXT: .cfi_offset s0, -8 519; RV32I-NEXT: .cfi_offset s1, -12 520; RV32I-NEXT: .cfi_offset s2, -16 521; RV32I-NEXT: mv s0, a2 522; RV32I-NEXT: mv s1, a0 523; RV32I-NEXT: lw a4, 0(a0) 524; RV32I-NEXT: lw a5, 4(a0) 525; RV32I-NEXT: mv s2, a1 526; RV32I-NEXT: j .LBB3_3 527; RV32I-NEXT: .LBB3_1: # %atomicrmw.start 528; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 529; RV32I-NEXT: sltu a0, a5, s0 530; RV32I-NEXT: .LBB3_2: # %atomicrmw.start 531; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 532; RV32I-NEXT: xori a0, a0, 1 533; RV32I-NEXT: neg a0, a0 534; RV32I-NEXT: and a1, a0, s2 535; RV32I-NEXT: and a0, a0, s0 536; RV32I-NEXT: sltu a3, a4, a1 537; RV32I-NEXT: sub a0, a5, a0 538; RV32I-NEXT: sub a2, a4, a1 539; RV32I-NEXT: sub a3, a0, a3 540; RV32I-NEXT: sw a4, 8(sp) 541; RV32I-NEXT: sw a5, 12(sp) 542; RV32I-NEXT: addi a1, sp, 8 543; RV32I-NEXT: li a4, 5 544; RV32I-NEXT: li a5, 5 545; RV32I-NEXT: mv a0, s1 546; RV32I-NEXT: call __atomic_compare_exchange_8 547; RV32I-NEXT: lw a4, 8(sp) 548; RV32I-NEXT: lw a5, 12(sp) 549; RV32I-NEXT: bnez a0, .LBB3_5 550; RV32I-NEXT: .LBB3_3: # %atomicrmw.start 551; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 552; RV32I-NEXT: bne a5, s0, .LBB3_1 553; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 554; RV32I-NEXT: sltu a0, a4, s2 555; RV32I-NEXT: j .LBB3_2 556; RV32I-NEXT: .LBB3_5: # %atomicrmw.end 557; RV32I-NEXT: mv a0, a4 558; RV32I-NEXT: mv a1, a5 559; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 560; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 561; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 562; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 563; RV32I-NEXT: .cfi_restore ra 564; RV32I-NEXT: .cfi_restore s0 565; RV32I-NEXT: .cfi_restore s1 566; RV32I-NEXT: .cfi_restore s2 567; RV32I-NEXT: addi sp, sp, 32 568; RV32I-NEXT: .cfi_def_cfa_offset 0 569; RV32I-NEXT: ret 570; 571; RV32IA-LABEL: atomicrmw_usub_cond_i64: 572; RV32IA: # %bb.0: 573; RV32IA-NEXT: addi sp, sp, -32 574; RV32IA-NEXT: .cfi_def_cfa_offset 32 575; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 576; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 577; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 578; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 579; RV32IA-NEXT: .cfi_offset ra, -4 580; RV32IA-NEXT: .cfi_offset s0, -8 581; RV32IA-NEXT: .cfi_offset s1, -12 582; RV32IA-NEXT: .cfi_offset s2, -16 583; RV32IA-NEXT: mv s0, a2 584; RV32IA-NEXT: mv s1, a0 585; RV32IA-NEXT: lw a4, 0(a0) 586; RV32IA-NEXT: lw a5, 4(a0) 587; RV32IA-NEXT: mv s2, a1 588; RV32IA-NEXT: j .LBB3_3 589; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start 590; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 591; RV32IA-NEXT: sltu a0, a5, s0 592; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start 593; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 594; RV32IA-NEXT: xori a0, a0, 1 595; RV32IA-NEXT: neg a0, a0 596; RV32IA-NEXT: and a1, a0, s2 597; RV32IA-NEXT: and a0, a0, s0 598; RV32IA-NEXT: sltu a3, a4, a1 599; RV32IA-NEXT: sub a0, a5, a0 600; RV32IA-NEXT: sub a2, a4, a1 601; RV32IA-NEXT: sub a3, a0, a3 602; RV32IA-NEXT: sw a4, 8(sp) 603; RV32IA-NEXT: sw a5, 12(sp) 604; RV32IA-NEXT: addi a1, sp, 8 605; RV32IA-NEXT: li a4, 5 606; RV32IA-NEXT: li a5, 5 607; RV32IA-NEXT: mv a0, s1 608; RV32IA-NEXT: call __atomic_compare_exchange_8 609; RV32IA-NEXT: lw a4, 8(sp) 610; RV32IA-NEXT: lw a5, 12(sp) 611; RV32IA-NEXT: bnez a0, .LBB3_5 612; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start 613; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 614; RV32IA-NEXT: bne a5, s0, .LBB3_1 615; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 616; RV32IA-NEXT: sltu a0, a4, s2 617; RV32IA-NEXT: j .LBB3_2 618; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end 619; RV32IA-NEXT: mv a0, a4 620; RV32IA-NEXT: mv a1, a5 621; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 622; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 623; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 624; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 625; RV32IA-NEXT: .cfi_restore ra 626; RV32IA-NEXT: .cfi_restore s0 627; RV32IA-NEXT: .cfi_restore s1 628; RV32IA-NEXT: .cfi_restore s2 629; RV32IA-NEXT: addi sp, sp, 32 630; RV32IA-NEXT: .cfi_def_cfa_offset 0 631; RV32IA-NEXT: ret 632; 633; RV64I-LABEL: atomicrmw_usub_cond_i64: 634; RV64I: # %bb.0: 635; RV64I-NEXT: addi sp, sp, -32 636; RV64I-NEXT: .cfi_def_cfa_offset 32 637; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 638; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 639; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 640; RV64I-NEXT: .cfi_offset ra, -8 641; RV64I-NEXT: .cfi_offset s0, -16 642; RV64I-NEXT: .cfi_offset s1, -24 643; RV64I-NEXT: mv s0, a0 644; RV64I-NEXT: ld a3, 0(a0) 645; RV64I-NEXT: mv s1, a1 646; RV64I-NEXT: .LBB3_1: # %atomicrmw.start 647; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 648; RV64I-NEXT: sltu a0, a3, s1 649; RV64I-NEXT: addi a0, a0, -1 650; RV64I-NEXT: and a0, a0, s1 651; RV64I-NEXT: sub a2, a3, a0 652; RV64I-NEXT: sd a3, 0(sp) 653; RV64I-NEXT: mv a1, sp 654; RV64I-NEXT: li a3, 5 655; RV64I-NEXT: li a4, 5 656; RV64I-NEXT: mv a0, s0 657; RV64I-NEXT: call __atomic_compare_exchange_8 658; RV64I-NEXT: ld a3, 0(sp) 659; RV64I-NEXT: beqz a0, .LBB3_1 660; RV64I-NEXT: # %bb.2: # %atomicrmw.end 661; RV64I-NEXT: mv a0, a3 662; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 663; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 664; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 665; RV64I-NEXT: .cfi_restore ra 666; RV64I-NEXT: .cfi_restore s0 667; RV64I-NEXT: .cfi_restore s1 668; RV64I-NEXT: addi sp, sp, 32 669; RV64I-NEXT: .cfi_def_cfa_offset 0 670; RV64I-NEXT: ret 671; 672; RV64IA-LABEL: atomicrmw_usub_cond_i64: 673; RV64IA: # %bb.0: 674; RV64IA-NEXT: ld a2, 0(a0) 675; RV64IA-NEXT: .LBB3_1: # %atomicrmw.start 676; RV64IA-NEXT: # =>This Loop Header: Depth=1 677; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 678; RV64IA-NEXT: mv a3, a2 679; RV64IA-NEXT: sltu a2, a2, a1 680; RV64IA-NEXT: addi a2, a2, -1 681; RV64IA-NEXT: and a2, a2, a1 682; RV64IA-NEXT: sub a4, a3, a2 683; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start 684; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 685; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 686; RV64IA-NEXT: lr.d.aqrl a2, (a0) 687; RV64IA-NEXT: bne a2, a3, .LBB3_1 688; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 689; RV64IA-NEXT: # in Loop: Header=BB3_3 Depth=2 690; RV64IA-NEXT: sc.d.rl a5, a4, (a0) 691; RV64IA-NEXT: bnez a5, .LBB3_3 692; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 693; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 694; RV64IA-NEXT: mv a0, a2 695; RV64IA-NEXT: ret 696 %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst 697 ret i64 %result 698} 699 700define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { 701; RV32I-LABEL: atomicrmw_usub_sat_i8: 702; RV32I: # %bb.0: 703; RV32I-NEXT: addi sp, sp, -16 704; RV32I-NEXT: .cfi_def_cfa_offset 16 705; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 706; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 707; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 708; RV32I-NEXT: .cfi_offset ra, -4 709; RV32I-NEXT: .cfi_offset s0, -8 710; RV32I-NEXT: .cfi_offset s1, -12 711; RV32I-NEXT: mv s0, a0 712; RV32I-NEXT: lbu a3, 0(a0) 713; RV32I-NEXT: andi s1, a1, 255 714; RV32I-NEXT: .LBB4_1: # %atomicrmw.start 715; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 716; RV32I-NEXT: andi a0, a3, 255 717; RV32I-NEXT: sub a1, a0, s1 718; RV32I-NEXT: sltu a0, a0, a1 719; RV32I-NEXT: addi a0, a0, -1 720; RV32I-NEXT: and a2, a0, a1 721; RV32I-NEXT: sb a3, 3(sp) 722; RV32I-NEXT: addi a1, sp, 3 723; RV32I-NEXT: li a3, 5 724; RV32I-NEXT: li a4, 5 725; RV32I-NEXT: mv a0, s0 726; RV32I-NEXT: call __atomic_compare_exchange_1 727; RV32I-NEXT: lbu a3, 3(sp) 728; RV32I-NEXT: beqz a0, .LBB4_1 729; RV32I-NEXT: # %bb.2: # %atomicrmw.end 730; RV32I-NEXT: mv a0, a3 731; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 732; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 733; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 734; RV32I-NEXT: .cfi_restore ra 735; RV32I-NEXT: .cfi_restore s0 736; RV32I-NEXT: .cfi_restore s1 737; RV32I-NEXT: addi sp, sp, 16 738; RV32I-NEXT: .cfi_def_cfa_offset 0 739; RV32I-NEXT: ret 740; 741; RV32IA-LABEL: atomicrmw_usub_sat_i8: 742; RV32IA: # %bb.0: 743; RV32IA-NEXT: andi a2, a0, -4 744; RV32IA-NEXT: slli a0, a0, 3 745; RV32IA-NEXT: li a3, 255 746; RV32IA-NEXT: sll a3, a3, a0 747; RV32IA-NEXT: lw a4, 0(a2) 748; RV32IA-NEXT: andi a0, a0, 24 749; RV32IA-NEXT: not a3, a3 750; RV32IA-NEXT: andi a1, a1, 255 751; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start 752; RV32IA-NEXT: # =>This Loop Header: Depth=1 753; RV32IA-NEXT: # Child Loop BB4_3 Depth 2 754; RV32IA-NEXT: mv a5, a4 755; RV32IA-NEXT: srl a4, a4, a0 756; RV32IA-NEXT: andi a4, a4, 255 757; RV32IA-NEXT: sub a6, a4, a1 758; RV32IA-NEXT: sltu a4, a4, a6 759; RV32IA-NEXT: addi a4, a4, -1 760; RV32IA-NEXT: and a4, a4, a6 761; RV32IA-NEXT: sll a4, a4, a0 762; RV32IA-NEXT: and a6, a5, a3 763; RV32IA-NEXT: or a6, a6, a4 764; RV32IA-NEXT: .LBB4_3: # %atomicrmw.start 765; RV32IA-NEXT: # Parent Loop BB4_1 Depth=1 766; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 767; RV32IA-NEXT: lr.w.aqrl a4, (a2) 768; RV32IA-NEXT: bne a4, a5, .LBB4_1 769; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 770; RV32IA-NEXT: # in Loop: Header=BB4_3 Depth=2 771; RV32IA-NEXT: sc.w.rl a7, a6, (a2) 772; RV32IA-NEXT: bnez a7, .LBB4_3 773; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 774; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 775; RV32IA-NEXT: srl a0, a4, a0 776; RV32IA-NEXT: ret 777; 778; RV64I-LABEL: atomicrmw_usub_sat_i8: 779; RV64I: # %bb.0: 780; RV64I-NEXT: addi sp, sp, -32 781; RV64I-NEXT: .cfi_def_cfa_offset 32 782; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 783; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 784; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 785; RV64I-NEXT: .cfi_offset ra, -8 786; RV64I-NEXT: .cfi_offset s0, -16 787; RV64I-NEXT: .cfi_offset s1, -24 788; RV64I-NEXT: mv s0, a0 789; RV64I-NEXT: lbu a3, 0(a0) 790; RV64I-NEXT: andi s1, a1, 255 791; RV64I-NEXT: .LBB4_1: # %atomicrmw.start 792; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 793; RV64I-NEXT: andi a0, a3, 255 794; RV64I-NEXT: sub a1, a0, s1 795; RV64I-NEXT: sltu a0, a0, a1 796; RV64I-NEXT: addi a0, a0, -1 797; RV64I-NEXT: and a2, a0, a1 798; RV64I-NEXT: sb a3, 7(sp) 799; RV64I-NEXT: addi a1, sp, 7 800; RV64I-NEXT: li a3, 5 801; RV64I-NEXT: li a4, 5 802; RV64I-NEXT: mv a0, s0 803; RV64I-NEXT: call __atomic_compare_exchange_1 804; RV64I-NEXT: lbu a3, 7(sp) 805; RV64I-NEXT: beqz a0, .LBB4_1 806; RV64I-NEXT: # %bb.2: # %atomicrmw.end 807; RV64I-NEXT: mv a0, a3 808; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 809; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 810; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 811; RV64I-NEXT: .cfi_restore ra 812; RV64I-NEXT: .cfi_restore s0 813; RV64I-NEXT: .cfi_restore s1 814; RV64I-NEXT: addi sp, sp, 32 815; RV64I-NEXT: .cfi_def_cfa_offset 0 816; RV64I-NEXT: ret 817; 818; RV64IA-LABEL: atomicrmw_usub_sat_i8: 819; RV64IA: # %bb.0: 820; RV64IA-NEXT: andi a2, a0, -4 821; RV64IA-NEXT: slli a0, a0, 3 822; RV64IA-NEXT: li a3, 255 823; RV64IA-NEXT: sllw a4, a3, a0 824; RV64IA-NEXT: lw a3, 0(a2) 825; RV64IA-NEXT: andi a0, a0, 24 826; RV64IA-NEXT: not a4, a4 827; RV64IA-NEXT: andi a1, a1, 255 828; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start 829; RV64IA-NEXT: # =>This Loop Header: Depth=1 830; RV64IA-NEXT: # Child Loop BB4_3 Depth 2 831; RV64IA-NEXT: srlw a5, a3, a0 832; RV64IA-NEXT: sext.w a6, a3 833; RV64IA-NEXT: andi a5, a5, 255 834; RV64IA-NEXT: sub a7, a5, a1 835; RV64IA-NEXT: sltu a5, a5, a7 836; RV64IA-NEXT: addi a5, a5, -1 837; RV64IA-NEXT: and a5, a5, a7 838; RV64IA-NEXT: sllw a5, a5, a0 839; RV64IA-NEXT: and a3, a3, a4 840; RV64IA-NEXT: or a5, a3, a5 841; RV64IA-NEXT: .LBB4_3: # %atomicrmw.start 842; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1 843; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 844; RV64IA-NEXT: lr.w.aqrl a3, (a2) 845; RV64IA-NEXT: bne a3, a6, .LBB4_1 846; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 847; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2 848; RV64IA-NEXT: sc.w.rl a7, a5, (a2) 849; RV64IA-NEXT: bnez a7, .LBB4_3 850; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 851; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 852; RV64IA-NEXT: srlw a0, a3, a0 853; RV64IA-NEXT: ret 854 %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst 855 ret i8 %result 856} 857 858define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { 859; RV32I-LABEL: atomicrmw_usub_sat_i16: 860; RV32I: # %bb.0: 861; RV32I-NEXT: addi sp, sp, -32 862; RV32I-NEXT: .cfi_def_cfa_offset 32 863; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 864; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 865; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 866; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 867; RV32I-NEXT: .cfi_offset ra, -4 868; RV32I-NEXT: .cfi_offset s0, -8 869; RV32I-NEXT: .cfi_offset s1, -12 870; RV32I-NEXT: .cfi_offset s2, -16 871; RV32I-NEXT: mv s0, a0 872; RV32I-NEXT: lhu a3, 0(a0) 873; RV32I-NEXT: lui s1, 16 874; RV32I-NEXT: addi s1, s1, -1 875; RV32I-NEXT: and s2, a1, s1 876; RV32I-NEXT: .LBB5_1: # %atomicrmw.start 877; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 878; RV32I-NEXT: and a0, a3, s1 879; RV32I-NEXT: sub a1, a0, s2 880; RV32I-NEXT: sltu a0, a0, a1 881; RV32I-NEXT: addi a0, a0, -1 882; RV32I-NEXT: and a2, a0, a1 883; RV32I-NEXT: sh a3, 14(sp) 884; RV32I-NEXT: addi a1, sp, 14 885; RV32I-NEXT: li a3, 5 886; RV32I-NEXT: li a4, 5 887; RV32I-NEXT: mv a0, s0 888; RV32I-NEXT: call __atomic_compare_exchange_2 889; RV32I-NEXT: lh a3, 14(sp) 890; RV32I-NEXT: beqz a0, .LBB5_1 891; RV32I-NEXT: # %bb.2: # %atomicrmw.end 892; RV32I-NEXT: mv a0, a3 893; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 894; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 895; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 896; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 897; RV32I-NEXT: .cfi_restore ra 898; RV32I-NEXT: .cfi_restore s0 899; RV32I-NEXT: .cfi_restore s1 900; RV32I-NEXT: .cfi_restore s2 901; RV32I-NEXT: addi sp, sp, 32 902; RV32I-NEXT: .cfi_def_cfa_offset 0 903; RV32I-NEXT: ret 904; 905; RV32IA-LABEL: atomicrmw_usub_sat_i16: 906; RV32IA: # %bb.0: 907; RV32IA-NEXT: andi a2, a0, -4 908; RV32IA-NEXT: slli a4, a0, 3 909; RV32IA-NEXT: lui a3, 16 910; RV32IA-NEXT: andi a0, a4, 24 911; RV32IA-NEXT: addi a3, a3, -1 912; RV32IA-NEXT: lw a5, 0(a2) 913; RV32IA-NEXT: sll a4, a3, a4 914; RV32IA-NEXT: not a4, a4 915; RV32IA-NEXT: and a1, a1, a3 916; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start 917; RV32IA-NEXT: # =>This Loop Header: Depth=1 918; RV32IA-NEXT: # Child Loop BB5_3 Depth 2 919; RV32IA-NEXT: mv a6, a5 920; RV32IA-NEXT: srl a5, a5, a0 921; RV32IA-NEXT: and a5, a5, a3 922; RV32IA-NEXT: sub a7, a5, a1 923; RV32IA-NEXT: sltu a5, a5, a7 924; RV32IA-NEXT: addi a5, a5, -1 925; RV32IA-NEXT: and a5, a5, a7 926; RV32IA-NEXT: sll a5, a5, a0 927; RV32IA-NEXT: and a7, a6, a4 928; RV32IA-NEXT: or a7, a7, a5 929; RV32IA-NEXT: .LBB5_3: # %atomicrmw.start 930; RV32IA-NEXT: # Parent Loop BB5_1 Depth=1 931; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 932; RV32IA-NEXT: lr.w.aqrl a5, (a2) 933; RV32IA-NEXT: bne a5, a6, .LBB5_1 934; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 935; RV32IA-NEXT: # in Loop: Header=BB5_3 Depth=2 936; RV32IA-NEXT: sc.w.rl t0, a7, (a2) 937; RV32IA-NEXT: bnez t0, .LBB5_3 938; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 939; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 940; RV32IA-NEXT: srl a0, a5, a0 941; RV32IA-NEXT: ret 942; 943; RV64I-LABEL: atomicrmw_usub_sat_i16: 944; RV64I: # %bb.0: 945; RV64I-NEXT: addi sp, sp, -48 946; RV64I-NEXT: .cfi_def_cfa_offset 48 947; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 948; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 949; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 950; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 951; RV64I-NEXT: .cfi_offset ra, -8 952; RV64I-NEXT: .cfi_offset s0, -16 953; RV64I-NEXT: .cfi_offset s1, -24 954; RV64I-NEXT: .cfi_offset s2, -32 955; RV64I-NEXT: mv s0, a0 956; RV64I-NEXT: lhu a3, 0(a0) 957; RV64I-NEXT: lui s1, 16 958; RV64I-NEXT: addiw s1, s1, -1 959; RV64I-NEXT: and s2, a1, s1 960; RV64I-NEXT: .LBB5_1: # %atomicrmw.start 961; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 962; RV64I-NEXT: and a0, a3, s1 963; RV64I-NEXT: sub a1, a0, s2 964; RV64I-NEXT: sltu a0, a0, a1 965; RV64I-NEXT: addi a0, a0, -1 966; RV64I-NEXT: and a2, a0, a1 967; RV64I-NEXT: sh a3, 14(sp) 968; RV64I-NEXT: addi a1, sp, 14 969; RV64I-NEXT: li a3, 5 970; RV64I-NEXT: li a4, 5 971; RV64I-NEXT: mv a0, s0 972; RV64I-NEXT: call __atomic_compare_exchange_2 973; RV64I-NEXT: lh a3, 14(sp) 974; RV64I-NEXT: beqz a0, .LBB5_1 975; RV64I-NEXT: # %bb.2: # %atomicrmw.end 976; RV64I-NEXT: mv a0, a3 977; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 978; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 979; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 980; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 981; RV64I-NEXT: .cfi_restore ra 982; RV64I-NEXT: .cfi_restore s0 983; RV64I-NEXT: .cfi_restore s1 984; RV64I-NEXT: .cfi_restore s2 985; RV64I-NEXT: addi sp, sp, 48 986; RV64I-NEXT: .cfi_def_cfa_offset 0 987; RV64I-NEXT: ret 988; 989; RV64IA-LABEL: atomicrmw_usub_sat_i16: 990; RV64IA: # %bb.0: 991; RV64IA-NEXT: andi a2, a0, -4 992; RV64IA-NEXT: slli a5, a0, 3 993; RV64IA-NEXT: lui a3, 16 994; RV64IA-NEXT: andi a0, a5, 24 995; RV64IA-NEXT: addiw a3, a3, -1 996; RV64IA-NEXT: lw a4, 0(a2) 997; RV64IA-NEXT: sllw a5, a3, a5 998; RV64IA-NEXT: not a5, a5 999; RV64IA-NEXT: and a1, a1, a3 1000; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start 1001; RV64IA-NEXT: # =>This Loop Header: Depth=1 1002; RV64IA-NEXT: # Child Loop BB5_3 Depth 2 1003; RV64IA-NEXT: srlw a6, a4, a0 1004; RV64IA-NEXT: sext.w a7, a4 1005; RV64IA-NEXT: and a6, a6, a3 1006; RV64IA-NEXT: sub t0, a6, a1 1007; RV64IA-NEXT: sltu a6, a6, t0 1008; RV64IA-NEXT: addi a6, a6, -1 1009; RV64IA-NEXT: and a6, a6, t0 1010; RV64IA-NEXT: sllw a6, a6, a0 1011; RV64IA-NEXT: and a4, a4, a5 1012; RV64IA-NEXT: or a6, a4, a6 1013; RV64IA-NEXT: .LBB5_3: # %atomicrmw.start 1014; RV64IA-NEXT: # Parent Loop BB5_1 Depth=1 1015; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 1016; RV64IA-NEXT: lr.w.aqrl a4, (a2) 1017; RV64IA-NEXT: bne a4, a7, .LBB5_1 1018; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 1019; RV64IA-NEXT: # in Loop: Header=BB5_3 Depth=2 1020; RV64IA-NEXT: sc.w.rl t0, a6, (a2) 1021; RV64IA-NEXT: bnez t0, .LBB5_3 1022; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 1023; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 1024; RV64IA-NEXT: srlw a0, a4, a0 1025; RV64IA-NEXT: ret 1026 %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst 1027 ret i16 %result 1028} 1029 1030define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) { 1031; RV32I-LABEL: atomicrmw_usub_sat_i32: 1032; RV32I: # %bb.0: 1033; RV32I-NEXT: addi sp, sp, -16 1034; RV32I-NEXT: .cfi_def_cfa_offset 16 1035; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1036; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 1037; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 1038; RV32I-NEXT: .cfi_offset ra, -4 1039; RV32I-NEXT: .cfi_offset s0, -8 1040; RV32I-NEXT: .cfi_offset s1, -12 1041; RV32I-NEXT: mv s0, a0 1042; RV32I-NEXT: lw a3, 0(a0) 1043; RV32I-NEXT: mv s1, a1 1044; RV32I-NEXT: .LBB6_1: # %atomicrmw.start 1045; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 1046; RV32I-NEXT: sub a0, a3, s1 1047; RV32I-NEXT: sltu a1, a3, a0 1048; RV32I-NEXT: addi a1, a1, -1 1049; RV32I-NEXT: and a2, a1, a0 1050; RV32I-NEXT: sw a3, 0(sp) 1051; RV32I-NEXT: mv a1, sp 1052; RV32I-NEXT: li a3, 5 1053; RV32I-NEXT: li a4, 5 1054; RV32I-NEXT: mv a0, s0 1055; RV32I-NEXT: call __atomic_compare_exchange_4 1056; RV32I-NEXT: lw a3, 0(sp) 1057; RV32I-NEXT: beqz a0, .LBB6_1 1058; RV32I-NEXT: # %bb.2: # %atomicrmw.end 1059; RV32I-NEXT: mv a0, a3 1060; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1061; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 1062; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 1063; RV32I-NEXT: .cfi_restore ra 1064; RV32I-NEXT: .cfi_restore s0 1065; RV32I-NEXT: .cfi_restore s1 1066; RV32I-NEXT: addi sp, sp, 16 1067; RV32I-NEXT: .cfi_def_cfa_offset 0 1068; RV32I-NEXT: ret 1069; 1070; RV32IA-LABEL: atomicrmw_usub_sat_i32: 1071; RV32IA: # %bb.0: 1072; RV32IA-NEXT: lw a2, 0(a0) 1073; RV32IA-NEXT: .LBB6_1: # %atomicrmw.start 1074; RV32IA-NEXT: # =>This Loop Header: Depth=1 1075; RV32IA-NEXT: # Child Loop BB6_3 Depth 2 1076; RV32IA-NEXT: mv a3, a2 1077; RV32IA-NEXT: sub a2, a2, a1 1078; RV32IA-NEXT: sltu a4, a3, a2 1079; RV32IA-NEXT: addi a4, a4, -1 1080; RV32IA-NEXT: and a4, a4, a2 1081; RV32IA-NEXT: .LBB6_3: # %atomicrmw.start 1082; RV32IA-NEXT: # Parent Loop BB6_1 Depth=1 1083; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 1084; RV32IA-NEXT: lr.w.aqrl a2, (a0) 1085; RV32IA-NEXT: bne a2, a3, .LBB6_1 1086; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 1087; RV32IA-NEXT: # in Loop: Header=BB6_3 Depth=2 1088; RV32IA-NEXT: sc.w.rl a5, a4, (a0) 1089; RV32IA-NEXT: bnez a5, .LBB6_3 1090; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 1091; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 1092; RV32IA-NEXT: mv a0, a2 1093; RV32IA-NEXT: ret 1094; 1095; RV64I-LABEL: atomicrmw_usub_sat_i32: 1096; RV64I: # %bb.0: 1097; RV64I-NEXT: addi sp, sp, -32 1098; RV64I-NEXT: .cfi_def_cfa_offset 32 1099; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 1100; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 1101; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 1102; RV64I-NEXT: .cfi_offset ra, -8 1103; RV64I-NEXT: .cfi_offset s0, -16 1104; RV64I-NEXT: .cfi_offset s1, -24 1105; RV64I-NEXT: mv s0, a0 1106; RV64I-NEXT: lw a3, 0(a0) 1107; RV64I-NEXT: mv s1, a1 1108; RV64I-NEXT: .LBB6_1: # %atomicrmw.start 1109; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 1110; RV64I-NEXT: subw a0, a3, s1 1111; RV64I-NEXT: sltu a1, a3, a0 1112; RV64I-NEXT: addi a1, a1, -1 1113; RV64I-NEXT: and a2, a1, a0 1114; RV64I-NEXT: sw a3, 4(sp) 1115; RV64I-NEXT: addi a1, sp, 4 1116; RV64I-NEXT: li a3, 5 1117; RV64I-NEXT: li a4, 5 1118; RV64I-NEXT: mv a0, s0 1119; RV64I-NEXT: call __atomic_compare_exchange_4 1120; RV64I-NEXT: lw a3, 4(sp) 1121; RV64I-NEXT: beqz a0, .LBB6_1 1122; RV64I-NEXT: # %bb.2: # %atomicrmw.end 1123; RV64I-NEXT: mv a0, a3 1124; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 1125; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 1126; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 1127; RV64I-NEXT: .cfi_restore ra 1128; RV64I-NEXT: .cfi_restore s0 1129; RV64I-NEXT: .cfi_restore s1 1130; RV64I-NEXT: addi sp, sp, 32 1131; RV64I-NEXT: .cfi_def_cfa_offset 0 1132; RV64I-NEXT: ret 1133; 1134; RV64IA-LABEL: atomicrmw_usub_sat_i32: 1135; RV64IA: # %bb.0: 1136; RV64IA-NEXT: lw a2, 0(a0) 1137; RV64IA-NEXT: .LBB6_1: # %atomicrmw.start 1138; RV64IA-NEXT: # =>This Loop Header: Depth=1 1139; RV64IA-NEXT: # Child Loop BB6_3 Depth 2 1140; RV64IA-NEXT: subw a3, a2, a1 1141; RV64IA-NEXT: sext.w a4, a2 1142; RV64IA-NEXT: sltu a2, a4, a3 1143; RV64IA-NEXT: addi a2, a2, -1 1144; RV64IA-NEXT: and a3, a2, a3 1145; RV64IA-NEXT: .LBB6_3: # %atomicrmw.start 1146; RV64IA-NEXT: # Parent Loop BB6_1 Depth=1 1147; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 1148; RV64IA-NEXT: lr.w.aqrl a2, (a0) 1149; RV64IA-NEXT: bne a2, a4, .LBB6_1 1150; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 1151; RV64IA-NEXT: # in Loop: Header=BB6_3 Depth=2 1152; RV64IA-NEXT: sc.w.rl a5, a3, (a0) 1153; RV64IA-NEXT: bnez a5, .LBB6_3 1154; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 1155; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 1156; RV64IA-NEXT: mv a0, a2 1157; RV64IA-NEXT: ret 1158 %result = atomicrmw usub_sat ptr %ptr, i32 %val seq_cst 1159 ret i32 %result 1160} 1161 1162define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { 1163; RV32I-LABEL: atomicrmw_usub_sat_i64: 1164; RV32I: # %bb.0: 1165; RV32I-NEXT: addi sp, sp, -32 1166; RV32I-NEXT: .cfi_def_cfa_offset 32 1167; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1168; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1169; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1170; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1171; RV32I-NEXT: .cfi_offset ra, -4 1172; RV32I-NEXT: .cfi_offset s0, -8 1173; RV32I-NEXT: .cfi_offset s1, -12 1174; RV32I-NEXT: .cfi_offset s2, -16 1175; RV32I-NEXT: mv s0, a2 1176; RV32I-NEXT: mv s1, a0 1177; RV32I-NEXT: lw a4, 0(a0) 1178; RV32I-NEXT: lw a5, 4(a0) 1179; RV32I-NEXT: mv s2, a1 1180; RV32I-NEXT: j .LBB7_3 1181; RV32I-NEXT: .LBB7_1: # %atomicrmw.start 1182; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1 1183; RV32I-NEXT: sltu a2, a5, a0 1184; RV32I-NEXT: .LBB7_2: # %atomicrmw.start 1185; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1 1186; RV32I-NEXT: addi a3, a2, -1 1187; RV32I-NEXT: and a2, a3, a1 1188; RV32I-NEXT: and a3, a3, a0 1189; RV32I-NEXT: sw a4, 8(sp) 1190; RV32I-NEXT: sw a5, 12(sp) 1191; RV32I-NEXT: addi a1, sp, 8 1192; RV32I-NEXT: li a4, 5 1193; RV32I-NEXT: li a5, 5 1194; RV32I-NEXT: mv a0, s1 1195; RV32I-NEXT: call __atomic_compare_exchange_8 1196; RV32I-NEXT: lw a4, 8(sp) 1197; RV32I-NEXT: lw a5, 12(sp) 1198; RV32I-NEXT: bnez a0, .LBB7_5 1199; RV32I-NEXT: .LBB7_3: # %atomicrmw.start 1200; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 1201; RV32I-NEXT: sltu a0, a4, s2 1202; RV32I-NEXT: sub a1, a5, s0 1203; RV32I-NEXT: sub a0, a1, a0 1204; RV32I-NEXT: sub a1, a4, s2 1205; RV32I-NEXT: bne a0, a5, .LBB7_1 1206; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1 1207; RV32I-NEXT: sltu a2, a4, a1 1208; RV32I-NEXT: j .LBB7_2 1209; RV32I-NEXT: .LBB7_5: # %atomicrmw.end 1210; RV32I-NEXT: mv a0, a4 1211; RV32I-NEXT: mv a1, a5 1212; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1213; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1214; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1215; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1216; RV32I-NEXT: .cfi_restore ra 1217; RV32I-NEXT: .cfi_restore s0 1218; RV32I-NEXT: .cfi_restore s1 1219; RV32I-NEXT: .cfi_restore s2 1220; RV32I-NEXT: addi sp, sp, 32 1221; RV32I-NEXT: .cfi_def_cfa_offset 0 1222; RV32I-NEXT: ret 1223; 1224; RV32IA-LABEL: atomicrmw_usub_sat_i64: 1225; RV32IA: # %bb.0: 1226; RV32IA-NEXT: addi sp, sp, -32 1227; RV32IA-NEXT: .cfi_def_cfa_offset 32 1228; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1229; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1230; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1231; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1232; RV32IA-NEXT: .cfi_offset ra, -4 1233; RV32IA-NEXT: .cfi_offset s0, -8 1234; RV32IA-NEXT: .cfi_offset s1, -12 1235; RV32IA-NEXT: .cfi_offset s2, -16 1236; RV32IA-NEXT: mv s0, a2 1237; RV32IA-NEXT: mv s1, a0 1238; RV32IA-NEXT: lw a4, 0(a0) 1239; RV32IA-NEXT: lw a5, 4(a0) 1240; RV32IA-NEXT: mv s2, a1 1241; RV32IA-NEXT: j .LBB7_3 1242; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start 1243; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1 1244; RV32IA-NEXT: sltu a2, a5, a0 1245; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start 1246; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1 1247; RV32IA-NEXT: addi a3, a2, -1 1248; RV32IA-NEXT: and a2, a3, a1 1249; RV32IA-NEXT: and a3, a3, a0 1250; RV32IA-NEXT: sw a4, 8(sp) 1251; RV32IA-NEXT: sw a5, 12(sp) 1252; RV32IA-NEXT: addi a1, sp, 8 1253; RV32IA-NEXT: li a4, 5 1254; RV32IA-NEXT: li a5, 5 1255; RV32IA-NEXT: mv a0, s1 1256; RV32IA-NEXT: call __atomic_compare_exchange_8 1257; RV32IA-NEXT: lw a4, 8(sp) 1258; RV32IA-NEXT: lw a5, 12(sp) 1259; RV32IA-NEXT: bnez a0, .LBB7_5 1260; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start 1261; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 1262; RV32IA-NEXT: sltu a0, a4, s2 1263; RV32IA-NEXT: sub a1, a5, s0 1264; RV32IA-NEXT: sub a0, a1, a0 1265; RV32IA-NEXT: sub a1, a4, s2 1266; RV32IA-NEXT: bne a0, a5, .LBB7_1 1267; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1 1268; RV32IA-NEXT: sltu a2, a4, a1 1269; RV32IA-NEXT: j .LBB7_2 1270; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end 1271; RV32IA-NEXT: mv a0, a4 1272; RV32IA-NEXT: mv a1, a5 1273; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1274; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1275; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1276; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1277; RV32IA-NEXT: .cfi_restore ra 1278; RV32IA-NEXT: .cfi_restore s0 1279; RV32IA-NEXT: .cfi_restore s1 1280; RV32IA-NEXT: .cfi_restore s2 1281; RV32IA-NEXT: addi sp, sp, 32 1282; RV32IA-NEXT: .cfi_def_cfa_offset 0 1283; RV32IA-NEXT: ret 1284; 1285; RV64I-LABEL: atomicrmw_usub_sat_i64: 1286; RV64I: # %bb.0: 1287; RV64I-NEXT: addi sp, sp, -32 1288; RV64I-NEXT: .cfi_def_cfa_offset 32 1289; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 1290; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 1291; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 1292; RV64I-NEXT: .cfi_offset ra, -8 1293; RV64I-NEXT: .cfi_offset s0, -16 1294; RV64I-NEXT: .cfi_offset s1, -24 1295; RV64I-NEXT: mv s0, a0 1296; RV64I-NEXT: ld a3, 0(a0) 1297; RV64I-NEXT: mv s1, a1 1298; RV64I-NEXT: .LBB7_1: # %atomicrmw.start 1299; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 1300; RV64I-NEXT: sub a0, a3, s1 1301; RV64I-NEXT: sltu a1, a3, a0 1302; RV64I-NEXT: addi a1, a1, -1 1303; RV64I-NEXT: and a2, a1, a0 1304; RV64I-NEXT: sd a3, 0(sp) 1305; RV64I-NEXT: mv a1, sp 1306; RV64I-NEXT: li a3, 5 1307; RV64I-NEXT: li a4, 5 1308; RV64I-NEXT: mv a0, s0 1309; RV64I-NEXT: call __atomic_compare_exchange_8 1310; RV64I-NEXT: ld a3, 0(sp) 1311; RV64I-NEXT: beqz a0, .LBB7_1 1312; RV64I-NEXT: # %bb.2: # %atomicrmw.end 1313; RV64I-NEXT: mv a0, a3 1314; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 1315; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 1316; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 1317; RV64I-NEXT: .cfi_restore ra 1318; RV64I-NEXT: .cfi_restore s0 1319; RV64I-NEXT: .cfi_restore s1 1320; RV64I-NEXT: addi sp, sp, 32 1321; RV64I-NEXT: .cfi_def_cfa_offset 0 1322; RV64I-NEXT: ret 1323; 1324; RV64IA-LABEL: atomicrmw_usub_sat_i64: 1325; RV64IA: # %bb.0: 1326; RV64IA-NEXT: ld a2, 0(a0) 1327; RV64IA-NEXT: .LBB7_1: # %atomicrmw.start 1328; RV64IA-NEXT: # =>This Loop Header: Depth=1 1329; RV64IA-NEXT: # Child Loop BB7_3 Depth 2 1330; RV64IA-NEXT: mv a3, a2 1331; RV64IA-NEXT: sub a2, a2, a1 1332; RV64IA-NEXT: sltu a4, a3, a2 1333; RV64IA-NEXT: addi a4, a4, -1 1334; RV64IA-NEXT: and a4, a4, a2 1335; RV64IA-NEXT: .LBB7_3: # %atomicrmw.start 1336; RV64IA-NEXT: # Parent Loop BB7_1 Depth=1 1337; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 1338; RV64IA-NEXT: lr.d.aqrl a2, (a0) 1339; RV64IA-NEXT: bne a2, a3, .LBB7_1 1340; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 1341; RV64IA-NEXT: # in Loop: Header=BB7_3 Depth=2 1342; RV64IA-NEXT: sc.d.rl a5, a4, (a0) 1343; RV64IA-NEXT: bnez a5, .LBB7_3 1344; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 1345; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 1346; RV64IA-NEXT: mv a0, a2 1347; RV64IA-NEXT: ret 1348 %result = atomicrmw usub_sat ptr %ptr, i64 %val seq_cst 1349 ret i64 %result 1350} 1351