1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefix=RV32I %s 4; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefix=RV32IA %s 6; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefix=RV32IA %s 8; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefix=RV64I %s 10; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \ 11; RUN: | FileCheck -check-prefix=RV64IA %s 12; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso -verify-machineinstrs < %s \ 13; RUN: | FileCheck -check-prefix=RV64IA %s 14 15 16define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { 17; RV32I-LABEL: atomicrmw_uinc_wrap_i8: 18; RV32I: # %bb.0: 19; RV32I-NEXT: addi sp, sp, -16 20; RV32I-NEXT: .cfi_def_cfa_offset 16 21; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 22; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 23; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 24; RV32I-NEXT: .cfi_offset ra, -4 25; RV32I-NEXT: .cfi_offset s0, -8 26; RV32I-NEXT: .cfi_offset s1, -12 27; RV32I-NEXT: mv s0, a0 28; RV32I-NEXT: lbu a3, 0(a0) 29; RV32I-NEXT: andi s1, a1, 255 30; RV32I-NEXT: .LBB0_1: # %atomicrmw.start 31; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 32; RV32I-NEXT: addi a0, a3, 1 33; RV32I-NEXT: andi a1, a3, 255 34; RV32I-NEXT: sltu a1, a1, s1 35; RV32I-NEXT: neg a2, a1 36; RV32I-NEXT: and a2, a2, a0 37; RV32I-NEXT: sb a3, 3(sp) 38; RV32I-NEXT: addi a1, sp, 3 39; RV32I-NEXT: li a3, 5 40; RV32I-NEXT: li a4, 5 41; RV32I-NEXT: mv a0, s0 42; RV32I-NEXT: call __atomic_compare_exchange_1 43; RV32I-NEXT: lbu a3, 3(sp) 44; RV32I-NEXT: beqz a0, .LBB0_1 45; RV32I-NEXT: # %bb.2: # %atomicrmw.end 46; RV32I-NEXT: mv a0, a3 47; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 48; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 49; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 50; RV32I-NEXT: .cfi_restore ra 51; RV32I-NEXT: .cfi_restore s0 52; RV32I-NEXT: .cfi_restore s1 53; RV32I-NEXT: addi sp, sp, 16 54; RV32I-NEXT: .cfi_def_cfa_offset 0 55; RV32I-NEXT: ret 56; 57; RV32IA-LABEL: atomicrmw_uinc_wrap_i8: 58; RV32IA: # %bb.0: 59; RV32IA-NEXT: andi a2, a0, -4 60; RV32IA-NEXT: slli a0, a0, 3 61; RV32IA-NEXT: li a3, 255 62; RV32IA-NEXT: sll a3, a3, a0 63; RV32IA-NEXT: lw a4, 0(a2) 64; RV32IA-NEXT: andi a0, a0, 24 65; RV32IA-NEXT: not a3, a3 66; RV32IA-NEXT: andi a1, a1, 255 67; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start 68; RV32IA-NEXT: # =>This Loop Header: Depth=1 69; RV32IA-NEXT: # Child Loop BB0_3 Depth 2 70; RV32IA-NEXT: mv a5, a4 71; RV32IA-NEXT: srl a4, a4, a0 72; RV32IA-NEXT: andi a6, a4, 255 73; RV32IA-NEXT: addi a4, a4, 1 74; RV32IA-NEXT: sltu a6, a6, a1 75; RV32IA-NEXT: neg a6, a6 76; RV32IA-NEXT: and a4, a6, a4 77; RV32IA-NEXT: andi a4, a4, 255 78; RV32IA-NEXT: sll a4, a4, a0 79; RV32IA-NEXT: and a6, a5, a3 80; RV32IA-NEXT: or a6, a6, a4 81; RV32IA-NEXT: .LBB0_3: # %atomicrmw.start 82; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1 83; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 84; RV32IA-NEXT: lr.w.aqrl a4, (a2) 85; RV32IA-NEXT: bne a4, a5, .LBB0_1 86; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 87; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2 88; RV32IA-NEXT: sc.w.rl a7, a6, (a2) 89; RV32IA-NEXT: bnez a7, .LBB0_3 90; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 91; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 92; RV32IA-NEXT: srl a0, a4, a0 93; RV32IA-NEXT: ret 94; 95; RV64I-LABEL: atomicrmw_uinc_wrap_i8: 96; RV64I: # %bb.0: 97; RV64I-NEXT: addi sp, sp, -32 98; RV64I-NEXT: .cfi_def_cfa_offset 32 99; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 100; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 101; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 102; RV64I-NEXT: .cfi_offset ra, -8 103; RV64I-NEXT: .cfi_offset s0, -16 104; RV64I-NEXT: .cfi_offset s1, -24 105; RV64I-NEXT: mv s0, a0 106; RV64I-NEXT: lbu a3, 0(a0) 107; RV64I-NEXT: andi s1, a1, 255 108; RV64I-NEXT: .LBB0_1: # %atomicrmw.start 109; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 110; RV64I-NEXT: addi a0, a3, 1 111; RV64I-NEXT: andi a1, a3, 255 112; RV64I-NEXT: sltu a1, a1, s1 113; RV64I-NEXT: neg a2, a1 114; RV64I-NEXT: and a2, a2, a0 115; RV64I-NEXT: sb a3, 7(sp) 116; RV64I-NEXT: addi a1, sp, 7 117; RV64I-NEXT: li a3, 5 118; RV64I-NEXT: li a4, 5 119; RV64I-NEXT: mv a0, s0 120; RV64I-NEXT: call __atomic_compare_exchange_1 121; RV64I-NEXT: lbu a3, 7(sp) 122; RV64I-NEXT: beqz a0, .LBB0_1 123; RV64I-NEXT: # %bb.2: # %atomicrmw.end 124; RV64I-NEXT: mv a0, a3 125; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 126; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 127; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 128; RV64I-NEXT: .cfi_restore ra 129; RV64I-NEXT: .cfi_restore s0 130; RV64I-NEXT: .cfi_restore s1 131; RV64I-NEXT: addi sp, sp, 32 132; RV64I-NEXT: .cfi_def_cfa_offset 0 133; RV64I-NEXT: ret 134; 135; RV64IA-LABEL: atomicrmw_uinc_wrap_i8: 136; RV64IA: # %bb.0: 137; RV64IA-NEXT: andi a2, a0, -4 138; RV64IA-NEXT: slli a0, a0, 3 139; RV64IA-NEXT: li a3, 255 140; RV64IA-NEXT: sllw a4, a3, a0 141; RV64IA-NEXT: lw a3, 0(a2) 142; RV64IA-NEXT: andi a0, a0, 24 143; RV64IA-NEXT: not a4, a4 144; RV64IA-NEXT: andi a1, a1, 255 145; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start 146; RV64IA-NEXT: # =>This Loop Header: Depth=1 147; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 148; RV64IA-NEXT: srlw a5, a3, a0 149; RV64IA-NEXT: sext.w a6, a3 150; RV64IA-NEXT: andi a7, a5, 255 151; RV64IA-NEXT: addi a5, a5, 1 152; RV64IA-NEXT: sltu a7, a7, a1 153; RV64IA-NEXT: negw a7, a7 154; RV64IA-NEXT: and a5, a7, a5 155; RV64IA-NEXT: andi a5, a5, 255 156; RV64IA-NEXT: sllw a5, a5, a0 157; RV64IA-NEXT: and a3, a3, a4 158; RV64IA-NEXT: or a5, a3, a5 159; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start 160; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 161; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 162; RV64IA-NEXT: lr.w.aqrl a3, (a2) 163; RV64IA-NEXT: bne a3, a6, .LBB0_1 164; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 165; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 166; RV64IA-NEXT: sc.w.rl a7, a5, (a2) 167; RV64IA-NEXT: bnez a7, .LBB0_3 168; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 169; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 170; RV64IA-NEXT: srlw a0, a3, a0 171; RV64IA-NEXT: ret 172 %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst 173 ret i8 %result 174} 175 176define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { 177; RV32I-LABEL: atomicrmw_uinc_wrap_i16: 178; RV32I: # %bb.0: 179; RV32I-NEXT: addi sp, sp, -32 180; RV32I-NEXT: .cfi_def_cfa_offset 32 181; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 182; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 183; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 184; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 185; RV32I-NEXT: .cfi_offset ra, -4 186; RV32I-NEXT: .cfi_offset s0, -8 187; RV32I-NEXT: .cfi_offset s1, -12 188; RV32I-NEXT: .cfi_offset s2, -16 189; RV32I-NEXT: mv s0, a0 190; RV32I-NEXT: lhu a3, 0(a0) 191; RV32I-NEXT: lui s1, 16 192; RV32I-NEXT: addi s1, s1, -1 193; RV32I-NEXT: and s2, a1, s1 194; RV32I-NEXT: .LBB1_1: # %atomicrmw.start 195; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 196; RV32I-NEXT: and a0, a3, s1 197; RV32I-NEXT: addi a1, a3, 1 198; RV32I-NEXT: sltu a0, a0, s2 199; RV32I-NEXT: neg a2, a0 200; RV32I-NEXT: and a2, a2, a1 201; RV32I-NEXT: sh a3, 14(sp) 202; RV32I-NEXT: addi a1, sp, 14 203; RV32I-NEXT: li a3, 5 204; RV32I-NEXT: li a4, 5 205; RV32I-NEXT: mv a0, s0 206; RV32I-NEXT: call __atomic_compare_exchange_2 207; RV32I-NEXT: lh a3, 14(sp) 208; RV32I-NEXT: beqz a0, .LBB1_1 209; RV32I-NEXT: # %bb.2: # %atomicrmw.end 210; RV32I-NEXT: mv a0, a3 211; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 212; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 213; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 214; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 215; RV32I-NEXT: .cfi_restore ra 216; RV32I-NEXT: .cfi_restore s0 217; RV32I-NEXT: .cfi_restore s1 218; RV32I-NEXT: .cfi_restore s2 219; RV32I-NEXT: addi sp, sp, 32 220; RV32I-NEXT: .cfi_def_cfa_offset 0 221; RV32I-NEXT: ret 222; 223; RV32IA-LABEL: atomicrmw_uinc_wrap_i16: 224; RV32IA: # %bb.0: 225; RV32IA-NEXT: andi a2, a0, -4 226; RV32IA-NEXT: slli a4, a0, 3 227; RV32IA-NEXT: lui a3, 16 228; RV32IA-NEXT: andi a0, a4, 24 229; RV32IA-NEXT: addi a3, a3, -1 230; RV32IA-NEXT: lw a5, 0(a2) 231; RV32IA-NEXT: sll a4, a3, a4 232; RV32IA-NEXT: not a4, a4 233; RV32IA-NEXT: and a1, a1, a3 234; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start 235; RV32IA-NEXT: # =>This Loop Header: Depth=1 236; RV32IA-NEXT: # Child Loop BB1_3 Depth 2 237; RV32IA-NEXT: mv a6, a5 238; RV32IA-NEXT: srl a5, a5, a0 239; RV32IA-NEXT: and a7, a5, a3 240; RV32IA-NEXT: addi a5, a5, 1 241; RV32IA-NEXT: sltu a7, a7, a1 242; RV32IA-NEXT: and a5, a5, a3 243; RV32IA-NEXT: neg a7, a7 244; RV32IA-NEXT: and a5, a7, a5 245; RV32IA-NEXT: sll a5, a5, a0 246; RV32IA-NEXT: and a7, a6, a4 247; RV32IA-NEXT: or a7, a7, a5 248; RV32IA-NEXT: .LBB1_3: # %atomicrmw.start 249; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1 250; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 251; RV32IA-NEXT: lr.w.aqrl a5, (a2) 252; RV32IA-NEXT: bne a5, a6, .LBB1_1 253; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 254; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2 255; RV32IA-NEXT: sc.w.rl t0, a7, (a2) 256; RV32IA-NEXT: bnez t0, .LBB1_3 257; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 258; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 259; RV32IA-NEXT: srl a0, a5, a0 260; RV32IA-NEXT: ret 261; 262; RV64I-LABEL: atomicrmw_uinc_wrap_i16: 263; RV64I: # %bb.0: 264; RV64I-NEXT: addi sp, sp, -48 265; RV64I-NEXT: .cfi_def_cfa_offset 48 266; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 267; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 268; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 269; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 270; RV64I-NEXT: .cfi_offset ra, -8 271; RV64I-NEXT: .cfi_offset s0, -16 272; RV64I-NEXT: .cfi_offset s1, -24 273; RV64I-NEXT: .cfi_offset s2, -32 274; RV64I-NEXT: mv s0, a0 275; RV64I-NEXT: lhu a3, 0(a0) 276; RV64I-NEXT: lui s1, 16 277; RV64I-NEXT: addiw s1, s1, -1 278; RV64I-NEXT: and s2, a1, s1 279; RV64I-NEXT: .LBB1_1: # %atomicrmw.start 280; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 281; RV64I-NEXT: and a0, a3, s1 282; RV64I-NEXT: addi a1, a3, 1 283; RV64I-NEXT: sltu a0, a0, s2 284; RV64I-NEXT: neg a2, a0 285; RV64I-NEXT: and a2, a2, a1 286; RV64I-NEXT: sh a3, 14(sp) 287; RV64I-NEXT: addi a1, sp, 14 288; RV64I-NEXT: li a3, 5 289; RV64I-NEXT: li a4, 5 290; RV64I-NEXT: mv a0, s0 291; RV64I-NEXT: call __atomic_compare_exchange_2 292; RV64I-NEXT: lh a3, 14(sp) 293; RV64I-NEXT: beqz a0, .LBB1_1 294; RV64I-NEXT: # %bb.2: # %atomicrmw.end 295; RV64I-NEXT: mv a0, a3 296; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 297; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 298; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 299; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 300; RV64I-NEXT: .cfi_restore ra 301; RV64I-NEXT: .cfi_restore s0 302; RV64I-NEXT: .cfi_restore s1 303; RV64I-NEXT: .cfi_restore s2 304; RV64I-NEXT: addi sp, sp, 48 305; RV64I-NEXT: .cfi_def_cfa_offset 0 306; RV64I-NEXT: ret 307; 308; RV64IA-LABEL: atomicrmw_uinc_wrap_i16: 309; RV64IA: # %bb.0: 310; RV64IA-NEXT: andi a2, a0, -4 311; RV64IA-NEXT: slli a5, a0, 3 312; RV64IA-NEXT: lui a3, 16 313; RV64IA-NEXT: andi a0, a5, 24 314; RV64IA-NEXT: addiw a3, a3, -1 315; RV64IA-NEXT: lw a4, 0(a2) 316; RV64IA-NEXT: sllw a5, a3, a5 317; RV64IA-NEXT: not a5, a5 318; RV64IA-NEXT: and a1, a1, a3 319; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start 320; RV64IA-NEXT: # =>This Loop Header: Depth=1 321; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 322; RV64IA-NEXT: srlw a6, a4, a0 323; RV64IA-NEXT: sext.w a7, a4 324; RV64IA-NEXT: and t0, a6, a3 325; RV64IA-NEXT: addi a6, a6, 1 326; RV64IA-NEXT: sltu t0, t0, a1 327; RV64IA-NEXT: and a6, a6, a3 328; RV64IA-NEXT: negw t0, t0 329; RV64IA-NEXT: and a6, t0, a6 330; RV64IA-NEXT: sllw a6, a6, a0 331; RV64IA-NEXT: and a4, a4, a5 332; RV64IA-NEXT: or a6, a4, a6 333; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start 334; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 335; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 336; RV64IA-NEXT: lr.w.aqrl a4, (a2) 337; RV64IA-NEXT: bne a4, a7, .LBB1_1 338; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 339; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 340; RV64IA-NEXT: sc.w.rl t0, a6, (a2) 341; RV64IA-NEXT: bnez t0, .LBB1_3 342; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 343; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 344; RV64IA-NEXT: srlw a0, a4, a0 345; RV64IA-NEXT: ret 346 %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst 347 ret i16 %result 348} 349 350define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { 351; RV32I-LABEL: atomicrmw_uinc_wrap_i32: 352; RV32I: # %bb.0: 353; RV32I-NEXT: addi sp, sp, -16 354; RV32I-NEXT: .cfi_def_cfa_offset 16 355; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 356; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 357; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 358; RV32I-NEXT: .cfi_offset ra, -4 359; RV32I-NEXT: .cfi_offset s0, -8 360; RV32I-NEXT: .cfi_offset s1, -12 361; RV32I-NEXT: mv s0, a0 362; RV32I-NEXT: lw a3, 0(a0) 363; RV32I-NEXT: mv s1, a1 364; RV32I-NEXT: .LBB2_1: # %atomicrmw.start 365; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 366; RV32I-NEXT: addi a0, a3, 1 367; RV32I-NEXT: sltu a1, a3, s1 368; RV32I-NEXT: neg a2, a1 369; RV32I-NEXT: and a2, a2, a0 370; RV32I-NEXT: sw a3, 0(sp) 371; RV32I-NEXT: mv a1, sp 372; RV32I-NEXT: li a3, 5 373; RV32I-NEXT: li a4, 5 374; RV32I-NEXT: mv a0, s0 375; RV32I-NEXT: call __atomic_compare_exchange_4 376; RV32I-NEXT: lw a3, 0(sp) 377; RV32I-NEXT: beqz a0, .LBB2_1 378; RV32I-NEXT: # %bb.2: # %atomicrmw.end 379; RV32I-NEXT: mv a0, a3 380; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 381; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 382; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 383; RV32I-NEXT: .cfi_restore ra 384; RV32I-NEXT: .cfi_restore s0 385; RV32I-NEXT: .cfi_restore s1 386; RV32I-NEXT: addi sp, sp, 16 387; RV32I-NEXT: .cfi_def_cfa_offset 0 388; RV32I-NEXT: ret 389; 390; RV32IA-LABEL: atomicrmw_uinc_wrap_i32: 391; RV32IA: # %bb.0: 392; RV32IA-NEXT: lw a2, 0(a0) 393; RV32IA-NEXT: .LBB2_1: # %atomicrmw.start 394; RV32IA-NEXT: # =>This Loop Header: Depth=1 395; RV32IA-NEXT: # Child Loop BB2_3 Depth 2 396; RV32IA-NEXT: mv a3, a2 397; RV32IA-NEXT: addi a2, a2, 1 398; RV32IA-NEXT: sltu a4, a3, a1 399; RV32IA-NEXT: neg a4, a4 400; RV32IA-NEXT: and a4, a4, a2 401; RV32IA-NEXT: .LBB2_3: # %atomicrmw.start 402; RV32IA-NEXT: # Parent Loop BB2_1 Depth=1 403; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 404; RV32IA-NEXT: lr.w.aqrl a2, (a0) 405; RV32IA-NEXT: bne a2, a3, .LBB2_1 406; RV32IA-NEXT: # %bb.4: # %atomicrmw.start 407; RV32IA-NEXT: # in Loop: Header=BB2_3 Depth=2 408; RV32IA-NEXT: sc.w.rl a5, a4, (a0) 409; RV32IA-NEXT: bnez a5, .LBB2_3 410; RV32IA-NEXT: # %bb.5: # %atomicrmw.start 411; RV32IA-NEXT: # %bb.2: # %atomicrmw.end 412; RV32IA-NEXT: mv a0, a2 413; RV32IA-NEXT: ret 414; 415; RV64I-LABEL: atomicrmw_uinc_wrap_i32: 416; RV64I: # %bb.0: 417; RV64I-NEXT: addi sp, sp, -32 418; RV64I-NEXT: .cfi_def_cfa_offset 32 419; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 420; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 421; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 422; RV64I-NEXT: .cfi_offset ra, -8 423; RV64I-NEXT: .cfi_offset s0, -16 424; RV64I-NEXT: .cfi_offset s1, -24 425; RV64I-NEXT: mv s0, a0 426; RV64I-NEXT: lw a3, 0(a0) 427; RV64I-NEXT: sext.w s1, a1 428; RV64I-NEXT: .LBB2_1: # %atomicrmw.start 429; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 430; RV64I-NEXT: addiw a0, a3, 1 431; RV64I-NEXT: sltu a1, a3, s1 432; RV64I-NEXT: neg a2, a1 433; RV64I-NEXT: and a2, a2, a0 434; RV64I-NEXT: sw a3, 4(sp) 435; RV64I-NEXT: addi a1, sp, 4 436; RV64I-NEXT: li a3, 5 437; RV64I-NEXT: li a4, 5 438; RV64I-NEXT: mv a0, s0 439; RV64I-NEXT: call __atomic_compare_exchange_4 440; RV64I-NEXT: lw a3, 4(sp) 441; RV64I-NEXT: beqz a0, .LBB2_1 442; RV64I-NEXT: # %bb.2: # %atomicrmw.end 443; RV64I-NEXT: mv a0, a3 444; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 445; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 446; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 447; RV64I-NEXT: .cfi_restore ra 448; RV64I-NEXT: .cfi_restore s0 449; RV64I-NEXT: .cfi_restore s1 450; RV64I-NEXT: addi sp, sp, 32 451; RV64I-NEXT: .cfi_def_cfa_offset 0 452; RV64I-NEXT: ret 453; 454; RV64IA-LABEL: atomicrmw_uinc_wrap_i32: 455; RV64IA: # %bb.0: 456; RV64IA-NEXT: lw a2, 0(a0) 457; RV64IA-NEXT: sext.w a1, a1 458; RV64IA-NEXT: .LBB2_1: # %atomicrmw.start 459; RV64IA-NEXT: # =>This Loop Header: Depth=1 460; RV64IA-NEXT: # Child Loop BB2_3 Depth 2 461; RV64IA-NEXT: addiw a3, a2, 1 462; RV64IA-NEXT: sext.w a4, a2 463; RV64IA-NEXT: sltu a2, a4, a1 464; RV64IA-NEXT: neg a2, a2 465; RV64IA-NEXT: and a3, a2, a3 466; RV64IA-NEXT: .LBB2_3: # %atomicrmw.start 467; RV64IA-NEXT: # Parent Loop BB2_1 Depth=1 468; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 469; RV64IA-NEXT: lr.w.aqrl a2, (a0) 470; RV64IA-NEXT: bne a2, a4, .LBB2_1 471; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 472; RV64IA-NEXT: # in Loop: Header=BB2_3 Depth=2 473; RV64IA-NEXT: sc.w.rl a5, a3, (a0) 474; RV64IA-NEXT: bnez a5, .LBB2_3 475; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 476; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 477; RV64IA-NEXT: mv a0, a2 478; RV64IA-NEXT: ret 479 %result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst 480 ret i32 %result 481} 482 483define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { 484; RV32I-LABEL: atomicrmw_uinc_wrap_i64: 485; RV32I: # %bb.0: 486; RV32I-NEXT: addi sp, sp, -32 487; RV32I-NEXT: .cfi_def_cfa_offset 32 488; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 489; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 490; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 491; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 492; RV32I-NEXT: .cfi_offset ra, -4 493; RV32I-NEXT: .cfi_offset s0, -8 494; RV32I-NEXT: .cfi_offset s1, -12 495; RV32I-NEXT: .cfi_offset s2, -16 496; RV32I-NEXT: mv s0, a2 497; RV32I-NEXT: mv s1, a0 498; RV32I-NEXT: lw a4, 0(a0) 499; RV32I-NEXT: lw a5, 4(a0) 500; RV32I-NEXT: mv s2, a1 501; RV32I-NEXT: j .LBB3_3 502; RV32I-NEXT: .LBB3_1: # %atomicrmw.start 503; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 504; RV32I-NEXT: sltu a0, a5, s0 505; RV32I-NEXT: .LBB3_2: # %atomicrmw.start 506; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 507; RV32I-NEXT: addi a1, a4, 1 508; RV32I-NEXT: neg a0, a0 509; RV32I-NEXT: seqz a3, a1 510; RV32I-NEXT: and a2, a0, a1 511; RV32I-NEXT: add a3, a5, a3 512; RV32I-NEXT: and a3, a0, a3 513; RV32I-NEXT: sw a4, 8(sp) 514; RV32I-NEXT: sw a5, 12(sp) 515; RV32I-NEXT: addi a1, sp, 8 516; RV32I-NEXT: li a4, 5 517; RV32I-NEXT: li a5, 5 518; RV32I-NEXT: mv a0, s1 519; RV32I-NEXT: call __atomic_compare_exchange_8 520; RV32I-NEXT: lw a4, 8(sp) 521; RV32I-NEXT: lw a5, 12(sp) 522; RV32I-NEXT: bnez a0, .LBB3_5 523; RV32I-NEXT: .LBB3_3: # %atomicrmw.start 524; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 525; RV32I-NEXT: bne a5, s0, .LBB3_1 526; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 527; RV32I-NEXT: sltu a0, a4, s2 528; RV32I-NEXT: j .LBB3_2 529; RV32I-NEXT: .LBB3_5: # %atomicrmw.end 530; RV32I-NEXT: mv a0, a4 531; RV32I-NEXT: mv a1, a5 532; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 533; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 534; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 535; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 536; RV32I-NEXT: .cfi_restore ra 537; RV32I-NEXT: .cfi_restore s0 538; RV32I-NEXT: .cfi_restore s1 539; RV32I-NEXT: .cfi_restore s2 540; RV32I-NEXT: addi sp, sp, 32 541; RV32I-NEXT: .cfi_def_cfa_offset 0 542; RV32I-NEXT: ret 543; 544; RV32IA-LABEL: atomicrmw_uinc_wrap_i64: 545; RV32IA: # %bb.0: 546; RV32IA-NEXT: addi sp, sp, -32 547; RV32IA-NEXT: .cfi_def_cfa_offset 32 548; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 549; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 550; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 551; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 552; RV32IA-NEXT: .cfi_offset ra, -4 553; RV32IA-NEXT: .cfi_offset s0, -8 554; RV32IA-NEXT: .cfi_offset s1, -12 555; RV32IA-NEXT: .cfi_offset s2, -16 556; RV32IA-NEXT: mv s0, a2 557; RV32IA-NEXT: mv s1, a0 558; RV32IA-NEXT: lw a4, 0(a0) 559; RV32IA-NEXT: lw a5, 4(a0) 560; RV32IA-NEXT: mv s2, a1 561; RV32IA-NEXT: j .LBB3_3 562; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start 563; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 564; RV32IA-NEXT: sltu a0, a5, s0 565; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start 566; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 567; RV32IA-NEXT: addi a1, a4, 1 568; RV32IA-NEXT: neg a0, a0 569; RV32IA-NEXT: seqz a3, a1 570; RV32IA-NEXT: and a2, a0, a1 571; RV32IA-NEXT: add a3, a5, a3 572; RV32IA-NEXT: and a3, a0, a3 573; RV32IA-NEXT: sw a4, 8(sp) 574; RV32IA-NEXT: sw a5, 12(sp) 575; RV32IA-NEXT: addi a1, sp, 8 576; RV32IA-NEXT: li a4, 5 577; RV32IA-NEXT: li a5, 5 578; RV32IA-NEXT: mv a0, s1 579; RV32IA-NEXT: call __atomic_compare_exchange_8 580; RV32IA-NEXT: lw a4, 8(sp) 581; RV32IA-NEXT: lw a5, 12(sp) 582; RV32IA-NEXT: bnez a0, .LBB3_5 583; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start 584; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 585; RV32IA-NEXT: bne a5, s0, .LBB3_1 586; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 587; RV32IA-NEXT: sltu a0, a4, s2 588; RV32IA-NEXT: j .LBB3_2 589; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end 590; RV32IA-NEXT: mv a0, a4 591; RV32IA-NEXT: mv a1, a5 592; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 593; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 594; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 595; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 596; RV32IA-NEXT: .cfi_restore ra 597; RV32IA-NEXT: .cfi_restore s0 598; RV32IA-NEXT: .cfi_restore s1 599; RV32IA-NEXT: .cfi_restore s2 600; RV32IA-NEXT: addi sp, sp, 32 601; RV32IA-NEXT: .cfi_def_cfa_offset 0 602; RV32IA-NEXT: ret 603; 604; RV64I-LABEL: atomicrmw_uinc_wrap_i64: 605; RV64I: # %bb.0: 606; RV64I-NEXT: addi sp, sp, -32 607; RV64I-NEXT: .cfi_def_cfa_offset 32 608; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 609; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 610; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 611; RV64I-NEXT: .cfi_offset ra, -8 612; RV64I-NEXT: .cfi_offset s0, -16 613; RV64I-NEXT: .cfi_offset s1, -24 614; RV64I-NEXT: mv s0, a0 615; RV64I-NEXT: ld a3, 0(a0) 616; RV64I-NEXT: mv s1, a1 617; RV64I-NEXT: .LBB3_1: # %atomicrmw.start 618; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 619; RV64I-NEXT: addi a0, a3, 1 620; RV64I-NEXT: sltu a1, a3, s1 621; RV64I-NEXT: neg a2, a1 622; RV64I-NEXT: and a2, a2, a0 623; RV64I-NEXT: sd a3, 0(sp) 624; RV64I-NEXT: mv a1, sp 625; RV64I-NEXT: li a3, 5 626; RV64I-NEXT: li a4, 5 627; RV64I-NEXT: mv a0, s0 628; RV64I-NEXT: call __atomic_compare_exchange_8 629; RV64I-NEXT: ld a3, 0(sp) 630; RV64I-NEXT: beqz a0, .LBB3_1 631; RV64I-NEXT: # %bb.2: # %atomicrmw.end 632; RV64I-NEXT: mv a0, a3 633; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 634; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 635; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 636; RV64I-NEXT: .cfi_restore ra 637; RV64I-NEXT: .cfi_restore s0 638; RV64I-NEXT: .cfi_restore s1 639; RV64I-NEXT: addi sp, sp, 32 640; RV64I-NEXT: .cfi_def_cfa_offset 0 641; RV64I-NEXT: ret 642; 643; RV64IA-LABEL: atomicrmw_uinc_wrap_i64: 644; RV64IA: # %bb.0: 645; RV64IA-NEXT: ld a2, 0(a0) 646; RV64IA-NEXT: .LBB3_1: # %atomicrmw.start 647; RV64IA-NEXT: # =>This Loop Header: Depth=1 648; RV64IA-NEXT: # Child Loop BB3_3 Depth 2 649; RV64IA-NEXT: mv a3, a2 650; RV64IA-NEXT: addi a2, a2, 1 651; RV64IA-NEXT: sltu a4, a3, a1 652; RV64IA-NEXT: neg a4, a4 653; RV64IA-NEXT: and a4, a4, a2 654; RV64IA-NEXT: .LBB3_3: # %atomicrmw.start 655; RV64IA-NEXT: # Parent Loop BB3_1 Depth=1 656; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 657; RV64IA-NEXT: lr.d.aqrl a2, (a0) 658; RV64IA-NEXT: bne a2, a3, .LBB3_1 659; RV64IA-NEXT: # %bb.4: # %atomicrmw.start 660; RV64IA-NEXT: # in Loop: Header=BB3_3 Depth=2 661; RV64IA-NEXT: sc.d.rl a5, a4, (a0) 662; RV64IA-NEXT: bnez a5, .LBB3_3 663; RV64IA-NEXT: # %bb.5: # %atomicrmw.start 664; RV64IA-NEXT: # %bb.2: # %atomicrmw.end 665; RV64IA-NEXT: mv a0, a2 666; RV64IA-NEXT: ret 667 %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst 668 ret i64 %result 669} 670 671define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { 672; RV32I-LABEL: atomicrmw_udec_wrap_i8: 673; RV32I: # %bb.0: 674; RV32I-NEXT: addi sp, sp, -32 675; RV32I-NEXT: .cfi_def_cfa_offset 32 676; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 677; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 678; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 679; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 680; RV32I-NEXT: .cfi_offset ra, -4 681; RV32I-NEXT: .cfi_offset s0, -8 682; RV32I-NEXT: .cfi_offset s1, -12 683; RV32I-NEXT: .cfi_offset s2, -16 684; RV32I-NEXT: mv s0, a0 685; RV32I-NEXT: lbu a3, 0(a0) 686; RV32I-NEXT: mv s1, a1 687; RV32I-NEXT: andi s2, a1, 255 688; RV32I-NEXT: j .LBB4_2 689; RV32I-NEXT: .LBB4_1: # %atomicrmw.start 690; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1 691; RV32I-NEXT: sb a3, 15(sp) 692; RV32I-NEXT: addi a1, sp, 15 693; RV32I-NEXT: li a3, 5 694; RV32I-NEXT: li a4, 5 695; RV32I-NEXT: mv a0, s0 696; RV32I-NEXT: call __atomic_compare_exchange_1 697; RV32I-NEXT: lbu a3, 15(sp) 698; RV32I-NEXT: bnez a0, .LBB4_4 699; RV32I-NEXT: .LBB4_2: # %atomicrmw.start 700; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 701; RV32I-NEXT: andi a0, a3, 255 702; RV32I-NEXT: seqz a1, a0 703; RV32I-NEXT: sltu a0, s2, a0 704; RV32I-NEXT: or a0, a1, a0 705; RV32I-NEXT: mv a2, s1 706; RV32I-NEXT: bnez a0, .LBB4_1 707; RV32I-NEXT: # %bb.3: # %atomicrmw.start 708; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1 709; RV32I-NEXT: addi a2, a3, -1 710; RV32I-NEXT: j .LBB4_1 711; RV32I-NEXT: .LBB4_4: # %atomicrmw.end 712; RV32I-NEXT: mv a0, a3 713; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 714; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 715; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 716; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 717; RV32I-NEXT: .cfi_restore ra 718; RV32I-NEXT: .cfi_restore s0 719; RV32I-NEXT: .cfi_restore s1 720; RV32I-NEXT: .cfi_restore s2 721; RV32I-NEXT: addi sp, sp, 32 722; RV32I-NEXT: .cfi_def_cfa_offset 0 723; RV32I-NEXT: ret 724; 725; RV32IA-LABEL: atomicrmw_udec_wrap_i8: 726; RV32IA: # %bb.0: 727; RV32IA-NEXT: andi a2, a0, -4 728; RV32IA-NEXT: slli a3, a0, 3 729; RV32IA-NEXT: li a4, 255 730; RV32IA-NEXT: andi a0, a3, 24 731; RV32IA-NEXT: lw a6, 0(a2) 732; RV32IA-NEXT: sll a3, a4, a3 733; RV32IA-NEXT: not a3, a3 734; RV32IA-NEXT: andi a4, a1, 255 735; RV32IA-NEXT: j .LBB4_2 736; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start 737; RV32IA-NEXT: # in Loop: Header=BB4_2 Depth=1 738; RV32IA-NEXT: andi a6, a7, 255 739; RV32IA-NEXT: sll a6, a6, a0 740; RV32IA-NEXT: and a7, a5, a3 741; RV32IA-NEXT: or a7, a7, a6 742; RV32IA-NEXT: .LBB4_5: # %atomicrmw.start 743; RV32IA-NEXT: # Parent Loop BB4_2 Depth=1 744; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 745; RV32IA-NEXT: lr.w.aqrl a6, (a2) 746; RV32IA-NEXT: bne a6, a5, .LBB4_7 747; RV32IA-NEXT: # %bb.6: # %atomicrmw.start 748; RV32IA-NEXT: # in Loop: Header=BB4_5 Depth=2 749; RV32IA-NEXT: sc.w.rl t0, a7, (a2) 750; RV32IA-NEXT: bnez t0, .LBB4_5 751; RV32IA-NEXT: .LBB4_7: # %atomicrmw.start 752; RV32IA-NEXT: # in Loop: Header=BB4_2 Depth=1 753; RV32IA-NEXT: beq a6, a5, .LBB4_4 754; RV32IA-NEXT: .LBB4_2: # %atomicrmw.start 755; RV32IA-NEXT: # =>This Loop Header: Depth=1 756; RV32IA-NEXT: # Child Loop BB4_5 Depth 2 757; RV32IA-NEXT: mv a5, a6 758; RV32IA-NEXT: srl a6, a6, a0 759; RV32IA-NEXT: andi a7, a6, 255 760; RV32IA-NEXT: seqz t0, a7 761; RV32IA-NEXT: sltu a7, a4, a7 762; RV32IA-NEXT: or t0, t0, a7 763; RV32IA-NEXT: mv a7, a1 764; RV32IA-NEXT: bnez t0, .LBB4_1 765; RV32IA-NEXT: # %bb.3: # %atomicrmw.start 766; RV32IA-NEXT: # in Loop: Header=BB4_2 Depth=1 767; RV32IA-NEXT: addi a7, a6, -1 768; RV32IA-NEXT: j .LBB4_1 769; RV32IA-NEXT: .LBB4_4: # %atomicrmw.end 770; RV32IA-NEXT: srl a0, a6, a0 771; RV32IA-NEXT: ret 772; 773; RV64I-LABEL: atomicrmw_udec_wrap_i8: 774; RV64I: # %bb.0: 775; RV64I-NEXT: addi sp, sp, -48 776; RV64I-NEXT: .cfi_def_cfa_offset 48 777; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 778; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 779; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 780; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 781; RV64I-NEXT: .cfi_offset ra, -8 782; RV64I-NEXT: .cfi_offset s0, -16 783; RV64I-NEXT: .cfi_offset s1, -24 784; RV64I-NEXT: .cfi_offset s2, -32 785; RV64I-NEXT: mv s0, a0 786; RV64I-NEXT: lbu a3, 0(a0) 787; RV64I-NEXT: mv s1, a1 788; RV64I-NEXT: andi s2, a1, 255 789; RV64I-NEXT: j .LBB4_2 790; RV64I-NEXT: .LBB4_1: # %atomicrmw.start 791; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1 792; RV64I-NEXT: sb a3, 15(sp) 793; RV64I-NEXT: addi a1, sp, 15 794; RV64I-NEXT: li a3, 5 795; RV64I-NEXT: li a4, 5 796; RV64I-NEXT: mv a0, s0 797; RV64I-NEXT: call __atomic_compare_exchange_1 798; RV64I-NEXT: lbu a3, 15(sp) 799; RV64I-NEXT: bnez a0, .LBB4_4 800; RV64I-NEXT: .LBB4_2: # %atomicrmw.start 801; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 802; RV64I-NEXT: andi a0, a3, 255 803; RV64I-NEXT: seqz a1, a0 804; RV64I-NEXT: sltu a0, s2, a0 805; RV64I-NEXT: or a0, a1, a0 806; RV64I-NEXT: mv a2, s1 807; RV64I-NEXT: bnez a0, .LBB4_1 808; RV64I-NEXT: # %bb.3: # %atomicrmw.start 809; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1 810; RV64I-NEXT: addi a2, a3, -1 811; RV64I-NEXT: j .LBB4_1 812; RV64I-NEXT: .LBB4_4: # %atomicrmw.end 813; RV64I-NEXT: mv a0, a3 814; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 815; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 816; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 817; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 818; RV64I-NEXT: .cfi_restore ra 819; RV64I-NEXT: .cfi_restore s0 820; RV64I-NEXT: .cfi_restore s1 821; RV64I-NEXT: .cfi_restore s2 822; RV64I-NEXT: addi sp, sp, 48 823; RV64I-NEXT: .cfi_def_cfa_offset 0 824; RV64I-NEXT: ret 825; 826; RV64IA-LABEL: atomicrmw_udec_wrap_i8: 827; RV64IA: # %bb.0: 828; RV64IA-NEXT: andi a2, a0, -4 829; RV64IA-NEXT: slli a4, a0, 3 830; RV64IA-NEXT: li a5, 255 831; RV64IA-NEXT: andi a0, a4, 24 832; RV64IA-NEXT: lw a3, 0(a2) 833; RV64IA-NEXT: sllw a4, a5, a4 834; RV64IA-NEXT: not a4, a4 835; RV64IA-NEXT: andi a5, a1, 255 836; RV64IA-NEXT: j .LBB4_2 837; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start 838; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 839; RV64IA-NEXT: sext.w a6, a3 840; RV64IA-NEXT: andi a7, a7, 255 841; RV64IA-NEXT: sllw a7, a7, a0 842; RV64IA-NEXT: and a3, a3, a4 843; RV64IA-NEXT: or a7, a3, a7 844; RV64IA-NEXT: .LBB4_5: # %atomicrmw.start 845; RV64IA-NEXT: # Parent Loop BB4_2 Depth=1 846; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 847; RV64IA-NEXT: lr.w.aqrl a3, (a2) 848; RV64IA-NEXT: bne a3, a6, .LBB4_7 849; RV64IA-NEXT: # %bb.6: # %atomicrmw.start 850; RV64IA-NEXT: # in Loop: Header=BB4_5 Depth=2 851; RV64IA-NEXT: sc.w.rl t0, a7, (a2) 852; RV64IA-NEXT: bnez t0, .LBB4_5 853; RV64IA-NEXT: .LBB4_7: # %atomicrmw.start 854; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 855; RV64IA-NEXT: beq a3, a6, .LBB4_4 856; RV64IA-NEXT: .LBB4_2: # %atomicrmw.start 857; RV64IA-NEXT: # =>This Loop Header: Depth=1 858; RV64IA-NEXT: # Child Loop BB4_5 Depth 2 859; RV64IA-NEXT: srlw a6, a3, a0 860; RV64IA-NEXT: andi a7, a6, 255 861; RV64IA-NEXT: seqz t0, a7 862; RV64IA-NEXT: sltu a7, a5, a7 863; RV64IA-NEXT: or t0, t0, a7 864; RV64IA-NEXT: mv a7, a1 865; RV64IA-NEXT: bnez t0, .LBB4_1 866; RV64IA-NEXT: # %bb.3: # %atomicrmw.start 867; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 868; RV64IA-NEXT: addi a7, a6, -1 869; RV64IA-NEXT: j .LBB4_1 870; RV64IA-NEXT: .LBB4_4: # %atomicrmw.end 871; RV64IA-NEXT: srlw a0, a3, a0 872; RV64IA-NEXT: ret 873 %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst 874 ret i8 %result 875} 876 877define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { 878; RV32I-LABEL: atomicrmw_udec_wrap_i16: 879; RV32I: # %bb.0: 880; RV32I-NEXT: addi sp, sp, -32 881; RV32I-NEXT: .cfi_def_cfa_offset 32 882; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 883; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 884; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 885; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 886; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 887; RV32I-NEXT: .cfi_offset ra, -4 888; RV32I-NEXT: .cfi_offset s0, -8 889; RV32I-NEXT: .cfi_offset s1, -12 890; RV32I-NEXT: .cfi_offset s2, -16 891; RV32I-NEXT: .cfi_offset s3, -20 892; RV32I-NEXT: mv s0, a1 893; RV32I-NEXT: mv s1, a0 894; RV32I-NEXT: lhu a1, 0(a0) 895; RV32I-NEXT: lui s2, 16 896; RV32I-NEXT: addi s2, s2, -1 897; RV32I-NEXT: and s3, s0, s2 898; RV32I-NEXT: j .LBB5_2 899; RV32I-NEXT: .LBB5_1: # %atomicrmw.start 900; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 901; RV32I-NEXT: sh a1, 10(sp) 902; RV32I-NEXT: addi a1, sp, 10 903; RV32I-NEXT: li a3, 5 904; RV32I-NEXT: li a4, 5 905; RV32I-NEXT: mv a0, s1 906; RV32I-NEXT: call __atomic_compare_exchange_2 907; RV32I-NEXT: lh a1, 10(sp) 908; RV32I-NEXT: bnez a0, .LBB5_4 909; RV32I-NEXT: .LBB5_2: # %atomicrmw.start 910; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 911; RV32I-NEXT: and a0, a1, s2 912; RV32I-NEXT: seqz a2, a0 913; RV32I-NEXT: sltu a0, s3, a0 914; RV32I-NEXT: or a0, a2, a0 915; RV32I-NEXT: mv a2, s0 916; RV32I-NEXT: bnez a0, .LBB5_1 917; RV32I-NEXT: # %bb.3: # %atomicrmw.start 918; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 919; RV32I-NEXT: addi a2, a1, -1 920; RV32I-NEXT: j .LBB5_1 921; RV32I-NEXT: .LBB5_4: # %atomicrmw.end 922; RV32I-NEXT: mv a0, a1 923; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 924; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 925; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 926; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 927; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 928; RV32I-NEXT: .cfi_restore ra 929; RV32I-NEXT: .cfi_restore s0 930; RV32I-NEXT: .cfi_restore s1 931; RV32I-NEXT: .cfi_restore s2 932; RV32I-NEXT: .cfi_restore s3 933; RV32I-NEXT: addi sp, sp, 32 934; RV32I-NEXT: .cfi_def_cfa_offset 0 935; RV32I-NEXT: ret 936; 937; RV32IA-LABEL: atomicrmw_udec_wrap_i16: 938; RV32IA: # %bb.0: 939; RV32IA-NEXT: andi a2, a0, -4 940; RV32IA-NEXT: slli a4, a0, 3 941; RV32IA-NEXT: lui a3, 16 942; RV32IA-NEXT: andi a0, a4, 24 943; RV32IA-NEXT: addi a3, a3, -1 944; RV32IA-NEXT: lw a7, 0(a2) 945; RV32IA-NEXT: sll a4, a3, a4 946; RV32IA-NEXT: not a4, a4 947; RV32IA-NEXT: and a5, a1, a3 948; RV32IA-NEXT: j .LBB5_2 949; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start 950; RV32IA-NEXT: # in Loop: Header=BB5_2 Depth=1 951; RV32IA-NEXT: and a7, t0, a3 952; RV32IA-NEXT: sll a7, a7, a0 953; RV32IA-NEXT: and t0, a6, a4 954; RV32IA-NEXT: or t0, t0, a7 955; RV32IA-NEXT: .LBB5_5: # %atomicrmw.start 956; RV32IA-NEXT: # Parent Loop BB5_2 Depth=1 957; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 958; RV32IA-NEXT: lr.w.aqrl a7, (a2) 959; RV32IA-NEXT: bne a7, a6, .LBB5_7 960; RV32IA-NEXT: # %bb.6: # %atomicrmw.start 961; RV32IA-NEXT: # in Loop: Header=BB5_5 Depth=2 962; RV32IA-NEXT: sc.w.rl t1, t0, (a2) 963; RV32IA-NEXT: bnez t1, .LBB5_5 964; RV32IA-NEXT: .LBB5_7: # %atomicrmw.start 965; RV32IA-NEXT: # in Loop: Header=BB5_2 Depth=1 966; RV32IA-NEXT: beq a7, a6, .LBB5_4 967; RV32IA-NEXT: .LBB5_2: # %atomicrmw.start 968; RV32IA-NEXT: # =>This Loop Header: Depth=1 969; RV32IA-NEXT: # Child Loop BB5_5 Depth 2 970; RV32IA-NEXT: mv a6, a7 971; RV32IA-NEXT: srl a7, a7, a0 972; RV32IA-NEXT: and t0, a7, a3 973; RV32IA-NEXT: seqz t1, t0 974; RV32IA-NEXT: sltu t0, a5, t0 975; RV32IA-NEXT: or t1, t1, t0 976; RV32IA-NEXT: mv t0, a1 977; RV32IA-NEXT: bnez t1, .LBB5_1 978; RV32IA-NEXT: # %bb.3: # %atomicrmw.start 979; RV32IA-NEXT: # in Loop: Header=BB5_2 Depth=1 980; RV32IA-NEXT: addi t0, a7, -1 981; RV32IA-NEXT: j .LBB5_1 982; RV32IA-NEXT: .LBB5_4: # %atomicrmw.end 983; RV32IA-NEXT: srl a0, a7, a0 984; RV32IA-NEXT: ret 985; 986; RV64I-LABEL: atomicrmw_udec_wrap_i16: 987; RV64I: # %bb.0: 988; RV64I-NEXT: addi sp, sp, -48 989; RV64I-NEXT: .cfi_def_cfa_offset 48 990; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 991; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 992; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 993; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 994; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 995; RV64I-NEXT: .cfi_offset ra, -8 996; RV64I-NEXT: .cfi_offset s0, -16 997; RV64I-NEXT: .cfi_offset s1, -24 998; RV64I-NEXT: .cfi_offset s2, -32 999; RV64I-NEXT: .cfi_offset s3, -40 1000; RV64I-NEXT: mv s0, a1 1001; RV64I-NEXT: mv s1, a0 1002; RV64I-NEXT: lhu a1, 0(a0) 1003; RV64I-NEXT: lui s2, 16 1004; RV64I-NEXT: addiw s2, s2, -1 1005; RV64I-NEXT: and s3, s0, s2 1006; RV64I-NEXT: j .LBB5_2 1007; RV64I-NEXT: .LBB5_1: # %atomicrmw.start 1008; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 1009; RV64I-NEXT: sh a1, 6(sp) 1010; RV64I-NEXT: addi a1, sp, 6 1011; RV64I-NEXT: li a3, 5 1012; RV64I-NEXT: li a4, 5 1013; RV64I-NEXT: mv a0, s1 1014; RV64I-NEXT: call __atomic_compare_exchange_2 1015; RV64I-NEXT: lh a1, 6(sp) 1016; RV64I-NEXT: bnez a0, .LBB5_4 1017; RV64I-NEXT: .LBB5_2: # %atomicrmw.start 1018; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 1019; RV64I-NEXT: and a0, a1, s2 1020; RV64I-NEXT: seqz a2, a0 1021; RV64I-NEXT: sltu a0, s3, a0 1022; RV64I-NEXT: or a0, a2, a0 1023; RV64I-NEXT: mv a2, s0 1024; RV64I-NEXT: bnez a0, .LBB5_1 1025; RV64I-NEXT: # %bb.3: # %atomicrmw.start 1026; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 1027; RV64I-NEXT: addi a2, a1, -1 1028; RV64I-NEXT: j .LBB5_1 1029; RV64I-NEXT: .LBB5_4: # %atomicrmw.end 1030; RV64I-NEXT: mv a0, a1 1031; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 1032; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 1033; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 1034; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 1035; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 1036; RV64I-NEXT: .cfi_restore ra 1037; RV64I-NEXT: .cfi_restore s0 1038; RV64I-NEXT: .cfi_restore s1 1039; RV64I-NEXT: .cfi_restore s2 1040; RV64I-NEXT: .cfi_restore s3 1041; RV64I-NEXT: addi sp, sp, 48 1042; RV64I-NEXT: .cfi_def_cfa_offset 0 1043; RV64I-NEXT: ret 1044; 1045; RV64IA-LABEL: atomicrmw_udec_wrap_i16: 1046; RV64IA: # %bb.0: 1047; RV64IA-NEXT: andi a2, a0, -4 1048; RV64IA-NEXT: slli a5, a0, 3 1049; RV64IA-NEXT: lui a3, 16 1050; RV64IA-NEXT: andi a0, a5, 24 1051; RV64IA-NEXT: addiw a3, a3, -1 1052; RV64IA-NEXT: lw a4, 0(a2) 1053; RV64IA-NEXT: sllw a5, a3, a5 1054; RV64IA-NEXT: not a5, a5 1055; RV64IA-NEXT: and a6, a1, a3 1056; RV64IA-NEXT: j .LBB5_2 1057; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start 1058; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 1059; RV64IA-NEXT: sext.w a7, a4 1060; RV64IA-NEXT: and t0, t0, a3 1061; RV64IA-NEXT: sllw t0, t0, a0 1062; RV64IA-NEXT: and a4, a4, a5 1063; RV64IA-NEXT: or t0, a4, t0 1064; RV64IA-NEXT: .LBB5_5: # %atomicrmw.start 1065; RV64IA-NEXT: # Parent Loop BB5_2 Depth=1 1066; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 1067; RV64IA-NEXT: lr.w.aqrl a4, (a2) 1068; RV64IA-NEXT: bne a4, a7, .LBB5_7 1069; RV64IA-NEXT: # %bb.6: # %atomicrmw.start 1070; RV64IA-NEXT: # in Loop: Header=BB5_5 Depth=2 1071; RV64IA-NEXT: sc.w.rl t1, t0, (a2) 1072; RV64IA-NEXT: bnez t1, .LBB5_5 1073; RV64IA-NEXT: .LBB5_7: # %atomicrmw.start 1074; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 1075; RV64IA-NEXT: beq a4, a7, .LBB5_4 1076; RV64IA-NEXT: .LBB5_2: # %atomicrmw.start 1077; RV64IA-NEXT: # =>This Loop Header: Depth=1 1078; RV64IA-NEXT: # Child Loop BB5_5 Depth 2 1079; RV64IA-NEXT: srlw a7, a4, a0 1080; RV64IA-NEXT: and t0, a7, a3 1081; RV64IA-NEXT: seqz t1, t0 1082; RV64IA-NEXT: sltu t0, a6, t0 1083; RV64IA-NEXT: or t1, t1, t0 1084; RV64IA-NEXT: mv t0, a1 1085; RV64IA-NEXT: bnez t1, .LBB5_1 1086; RV64IA-NEXT: # %bb.3: # %atomicrmw.start 1087; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 1088; RV64IA-NEXT: addi t0, a7, -1 1089; RV64IA-NEXT: j .LBB5_1 1090; RV64IA-NEXT: .LBB5_4: # %atomicrmw.end 1091; RV64IA-NEXT: srlw a0, a4, a0 1092; RV64IA-NEXT: ret 1093 %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst 1094 ret i16 %result 1095} 1096 1097define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { 1098; RV32I-LABEL: atomicrmw_udec_wrap_i32: 1099; RV32I: # %bb.0: 1100; RV32I-NEXT: addi sp, sp, -16 1101; RV32I-NEXT: .cfi_def_cfa_offset 16 1102; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1103; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 1104; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 1105; RV32I-NEXT: .cfi_offset ra, -4 1106; RV32I-NEXT: .cfi_offset s0, -8 1107; RV32I-NEXT: .cfi_offset s1, -12 1108; RV32I-NEXT: mv s0, a0 1109; RV32I-NEXT: lw a3, 0(a0) 1110; RV32I-NEXT: mv s1, a1 1111; RV32I-NEXT: j .LBB6_2 1112; RV32I-NEXT: .LBB6_1: # %atomicrmw.start 1113; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 1114; RV32I-NEXT: sw a3, 0(sp) 1115; RV32I-NEXT: mv a1, sp 1116; RV32I-NEXT: li a3, 5 1117; RV32I-NEXT: li a4, 5 1118; RV32I-NEXT: mv a0, s0 1119; RV32I-NEXT: call __atomic_compare_exchange_4 1120; RV32I-NEXT: lw a3, 0(sp) 1121; RV32I-NEXT: bnez a0, .LBB6_4 1122; RV32I-NEXT: .LBB6_2: # %atomicrmw.start 1123; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 1124; RV32I-NEXT: seqz a0, a3 1125; RV32I-NEXT: sltu a1, s1, a3 1126; RV32I-NEXT: or a0, a0, a1 1127; RV32I-NEXT: mv a2, s1 1128; RV32I-NEXT: bnez a0, .LBB6_1 1129; RV32I-NEXT: # %bb.3: # %atomicrmw.start 1130; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 1131; RV32I-NEXT: addi a2, a3, -1 1132; RV32I-NEXT: j .LBB6_1 1133; RV32I-NEXT: .LBB6_4: # %atomicrmw.end 1134; RV32I-NEXT: mv a0, a3 1135; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1136; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 1137; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 1138; RV32I-NEXT: .cfi_restore ra 1139; RV32I-NEXT: .cfi_restore s0 1140; RV32I-NEXT: .cfi_restore s1 1141; RV32I-NEXT: addi sp, sp, 16 1142; RV32I-NEXT: .cfi_def_cfa_offset 0 1143; RV32I-NEXT: ret 1144; 1145; RV32IA-LABEL: atomicrmw_udec_wrap_i32: 1146; RV32IA: # %bb.0: 1147; RV32IA-NEXT: lw a2, 0(a0) 1148; RV32IA-NEXT: j .LBB6_2 1149; RV32IA-NEXT: .LBB6_1: # %atomicrmw.start 1150; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 1151; RV32IA-NEXT: .LBB6_5: # %atomicrmw.start 1152; RV32IA-NEXT: # Parent Loop BB6_2 Depth=1 1153; RV32IA-NEXT: # => This Inner Loop Header: Depth=2 1154; RV32IA-NEXT: lr.w.aqrl a2, (a0) 1155; RV32IA-NEXT: bne a2, a3, .LBB6_7 1156; RV32IA-NEXT: # %bb.6: # %atomicrmw.start 1157; RV32IA-NEXT: # in Loop: Header=BB6_5 Depth=2 1158; RV32IA-NEXT: sc.w.rl a5, a4, (a0) 1159; RV32IA-NEXT: bnez a5, .LBB6_5 1160; RV32IA-NEXT: .LBB6_7: # %atomicrmw.start 1161; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 1162; RV32IA-NEXT: beq a2, a3, .LBB6_4 1163; RV32IA-NEXT: .LBB6_2: # %atomicrmw.start 1164; RV32IA-NEXT: # =>This Loop Header: Depth=1 1165; RV32IA-NEXT: # Child Loop BB6_5 Depth 2 1166; RV32IA-NEXT: mv a3, a2 1167; RV32IA-NEXT: seqz a2, a2 1168; RV32IA-NEXT: sltu a4, a1, a3 1169; RV32IA-NEXT: or a2, a2, a4 1170; RV32IA-NEXT: mv a4, a1 1171; RV32IA-NEXT: bnez a2, .LBB6_1 1172; RV32IA-NEXT: # %bb.3: # %atomicrmw.start 1173; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 1174; RV32IA-NEXT: addi a4, a3, -1 1175; RV32IA-NEXT: j .LBB6_1 1176; RV32IA-NEXT: .LBB6_4: # %atomicrmw.end 1177; RV32IA-NEXT: mv a0, a2 1178; RV32IA-NEXT: ret 1179; 1180; RV64I-LABEL: atomicrmw_udec_wrap_i32: 1181; RV64I: # %bb.0: 1182; RV64I-NEXT: addi sp, sp, -48 1183; RV64I-NEXT: .cfi_def_cfa_offset 48 1184; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 1185; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 1186; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 1187; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 1188; RV64I-NEXT: .cfi_offset ra, -8 1189; RV64I-NEXT: .cfi_offset s0, -16 1190; RV64I-NEXT: .cfi_offset s1, -24 1191; RV64I-NEXT: .cfi_offset s2, -32 1192; RV64I-NEXT: mv s0, a0 1193; RV64I-NEXT: lw a3, 0(a0) 1194; RV64I-NEXT: mv s1, a1 1195; RV64I-NEXT: sext.w s2, a1 1196; RV64I-NEXT: j .LBB6_2 1197; RV64I-NEXT: .LBB6_1: # %atomicrmw.start 1198; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 1199; RV64I-NEXT: sw a3, 12(sp) 1200; RV64I-NEXT: addi a1, sp, 12 1201; RV64I-NEXT: li a3, 5 1202; RV64I-NEXT: li a4, 5 1203; RV64I-NEXT: mv a0, s0 1204; RV64I-NEXT: call __atomic_compare_exchange_4 1205; RV64I-NEXT: lw a3, 12(sp) 1206; RV64I-NEXT: bnez a0, .LBB6_4 1207; RV64I-NEXT: .LBB6_2: # %atomicrmw.start 1208; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 1209; RV64I-NEXT: seqz a0, a3 1210; RV64I-NEXT: sltu a1, s2, a3 1211; RV64I-NEXT: or a0, a0, a1 1212; RV64I-NEXT: mv a2, s1 1213; RV64I-NEXT: bnez a0, .LBB6_1 1214; RV64I-NEXT: # %bb.3: # %atomicrmw.start 1215; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 1216; RV64I-NEXT: addiw a2, a3, -1 1217; RV64I-NEXT: j .LBB6_1 1218; RV64I-NEXT: .LBB6_4: # %atomicrmw.end 1219; RV64I-NEXT: mv a0, a3 1220; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 1221; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 1222; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 1223; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 1224; RV64I-NEXT: .cfi_restore ra 1225; RV64I-NEXT: .cfi_restore s0 1226; RV64I-NEXT: .cfi_restore s1 1227; RV64I-NEXT: .cfi_restore s2 1228; RV64I-NEXT: addi sp, sp, 48 1229; RV64I-NEXT: .cfi_def_cfa_offset 0 1230; RV64I-NEXT: ret 1231; 1232; RV64IA-LABEL: atomicrmw_udec_wrap_i32: 1233; RV64IA: # %bb.0: 1234; RV64IA-NEXT: lw a2, 0(a0) 1235; RV64IA-NEXT: sext.w a3, a1 1236; RV64IA-NEXT: j .LBB6_2 1237; RV64IA-NEXT: .LBB6_1: # %atomicrmw.start 1238; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 1239; RV64IA-NEXT: .LBB6_5: # %atomicrmw.start 1240; RV64IA-NEXT: # Parent Loop BB6_2 Depth=1 1241; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 1242; RV64IA-NEXT: lr.w.aqrl a2, (a0) 1243; RV64IA-NEXT: bne a2, a4, .LBB6_7 1244; RV64IA-NEXT: # %bb.6: # %atomicrmw.start 1245; RV64IA-NEXT: # in Loop: Header=BB6_5 Depth=2 1246; RV64IA-NEXT: sc.w.rl a6, a5, (a0) 1247; RV64IA-NEXT: bnez a6, .LBB6_5 1248; RV64IA-NEXT: .LBB6_7: # %atomicrmw.start 1249; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 1250; RV64IA-NEXT: beq a2, a4, .LBB6_4 1251; RV64IA-NEXT: .LBB6_2: # %atomicrmw.start 1252; RV64IA-NEXT: # =>This Loop Header: Depth=1 1253; RV64IA-NEXT: # Child Loop BB6_5 Depth 2 1254; RV64IA-NEXT: sext.w a4, a2 1255; RV64IA-NEXT: seqz a5, a4 1256; RV64IA-NEXT: sltu a6, a3, a4 1257; RV64IA-NEXT: or a6, a5, a6 1258; RV64IA-NEXT: mv a5, a1 1259; RV64IA-NEXT: bnez a6, .LBB6_1 1260; RV64IA-NEXT: # %bb.3: # %atomicrmw.start 1261; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 1262; RV64IA-NEXT: addiw a5, a2, -1 1263; RV64IA-NEXT: j .LBB6_1 1264; RV64IA-NEXT: .LBB6_4: # %atomicrmw.end 1265; RV64IA-NEXT: mv a0, a2 1266; RV64IA-NEXT: ret 1267 %result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst 1268 ret i32 %result 1269} 1270 1271define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { 1272; RV32I-LABEL: atomicrmw_udec_wrap_i64: 1273; RV32I: # %bb.0: 1274; RV32I-NEXT: addi sp, sp, -32 1275; RV32I-NEXT: .cfi_def_cfa_offset 32 1276; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1277; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1278; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1279; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1280; RV32I-NEXT: .cfi_offset ra, -4 1281; RV32I-NEXT: .cfi_offset s0, -8 1282; RV32I-NEXT: .cfi_offset s1, -12 1283; RV32I-NEXT: .cfi_offset s2, -16 1284; RV32I-NEXT: mv s0, a2 1285; RV32I-NEXT: mv s1, a0 1286; RV32I-NEXT: lw a5, 0(a0) 1287; RV32I-NEXT: lw a4, 4(a0) 1288; RV32I-NEXT: mv s2, a1 1289; RV32I-NEXT: j .LBB7_2 1290; RV32I-NEXT: .LBB7_1: # %atomicrmw.start 1291; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 1292; RV32I-NEXT: sw a5, 8(sp) 1293; RV32I-NEXT: sw a4, 12(sp) 1294; RV32I-NEXT: addi a1, sp, 8 1295; RV32I-NEXT: li a4, 5 1296; RV32I-NEXT: li a5, 5 1297; RV32I-NEXT: mv a0, s1 1298; RV32I-NEXT: call __atomic_compare_exchange_8 1299; RV32I-NEXT: lw a5, 8(sp) 1300; RV32I-NEXT: lw a4, 12(sp) 1301; RV32I-NEXT: bnez a0, .LBB7_7 1302; RV32I-NEXT: .LBB7_2: # %atomicrmw.start 1303; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 1304; RV32I-NEXT: beq a4, s0, .LBB7_4 1305; RV32I-NEXT: # %bb.3: # %atomicrmw.start 1306; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 1307; RV32I-NEXT: sltu a0, s0, a4 1308; RV32I-NEXT: j .LBB7_5 1309; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 1310; RV32I-NEXT: sltu a0, s2, a5 1311; RV32I-NEXT: .LBB7_5: # %atomicrmw.start 1312; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 1313; RV32I-NEXT: or a1, a5, a4 1314; RV32I-NEXT: seqz a1, a1 1315; RV32I-NEXT: or a0, a1, a0 1316; RV32I-NEXT: mv a2, s2 1317; RV32I-NEXT: mv a3, s0 1318; RV32I-NEXT: bnez a0, .LBB7_1 1319; RV32I-NEXT: # %bb.6: # %atomicrmw.start 1320; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 1321; RV32I-NEXT: seqz a0, a5 1322; RV32I-NEXT: sub a3, a4, a0 1323; RV32I-NEXT: addi a2, a5, -1 1324; RV32I-NEXT: j .LBB7_1 1325; RV32I-NEXT: .LBB7_7: # %atomicrmw.end 1326; RV32I-NEXT: mv a0, a5 1327; RV32I-NEXT: mv a1, a4 1328; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1329; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1330; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1331; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1332; RV32I-NEXT: .cfi_restore ra 1333; RV32I-NEXT: .cfi_restore s0 1334; RV32I-NEXT: .cfi_restore s1 1335; RV32I-NEXT: .cfi_restore s2 1336; RV32I-NEXT: addi sp, sp, 32 1337; RV32I-NEXT: .cfi_def_cfa_offset 0 1338; RV32I-NEXT: ret 1339; 1340; RV32IA-LABEL: atomicrmw_udec_wrap_i64: 1341; RV32IA: # %bb.0: 1342; RV32IA-NEXT: addi sp, sp, -32 1343; RV32IA-NEXT: .cfi_def_cfa_offset 32 1344; RV32IA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1345; RV32IA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1346; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1347; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1348; RV32IA-NEXT: .cfi_offset ra, -4 1349; RV32IA-NEXT: .cfi_offset s0, -8 1350; RV32IA-NEXT: .cfi_offset s1, -12 1351; RV32IA-NEXT: .cfi_offset s2, -16 1352; RV32IA-NEXT: mv s0, a2 1353; RV32IA-NEXT: mv s1, a0 1354; RV32IA-NEXT: lw a5, 0(a0) 1355; RV32IA-NEXT: lw a4, 4(a0) 1356; RV32IA-NEXT: mv s2, a1 1357; RV32IA-NEXT: j .LBB7_2 1358; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start 1359; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1360; RV32IA-NEXT: sw a5, 8(sp) 1361; RV32IA-NEXT: sw a4, 12(sp) 1362; RV32IA-NEXT: addi a1, sp, 8 1363; RV32IA-NEXT: li a4, 5 1364; RV32IA-NEXT: li a5, 5 1365; RV32IA-NEXT: mv a0, s1 1366; RV32IA-NEXT: call __atomic_compare_exchange_8 1367; RV32IA-NEXT: lw a5, 8(sp) 1368; RV32IA-NEXT: lw a4, 12(sp) 1369; RV32IA-NEXT: bnez a0, .LBB7_7 1370; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start 1371; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 1372; RV32IA-NEXT: beq a4, s0, .LBB7_4 1373; RV32IA-NEXT: # %bb.3: # %atomicrmw.start 1374; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1375; RV32IA-NEXT: sltu a0, s0, a4 1376; RV32IA-NEXT: j .LBB7_5 1377; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 1378; RV32IA-NEXT: sltu a0, s2, a5 1379; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start 1380; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1381; RV32IA-NEXT: or a1, a5, a4 1382; RV32IA-NEXT: seqz a1, a1 1383; RV32IA-NEXT: or a0, a1, a0 1384; RV32IA-NEXT: mv a2, s2 1385; RV32IA-NEXT: mv a3, s0 1386; RV32IA-NEXT: bnez a0, .LBB7_1 1387; RV32IA-NEXT: # %bb.6: # %atomicrmw.start 1388; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1389; RV32IA-NEXT: seqz a0, a5 1390; RV32IA-NEXT: sub a3, a4, a0 1391; RV32IA-NEXT: addi a2, a5, -1 1392; RV32IA-NEXT: j .LBB7_1 1393; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end 1394; RV32IA-NEXT: mv a0, a5 1395; RV32IA-NEXT: mv a1, a4 1396; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1397; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1398; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1399; RV32IA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1400; RV32IA-NEXT: .cfi_restore ra 1401; RV32IA-NEXT: .cfi_restore s0 1402; RV32IA-NEXT: .cfi_restore s1 1403; RV32IA-NEXT: .cfi_restore s2 1404; RV32IA-NEXT: addi sp, sp, 32 1405; RV32IA-NEXT: .cfi_def_cfa_offset 0 1406; RV32IA-NEXT: ret 1407; 1408; RV64I-LABEL: atomicrmw_udec_wrap_i64: 1409; RV64I: # %bb.0: 1410; RV64I-NEXT: addi sp, sp, -32 1411; RV64I-NEXT: .cfi_def_cfa_offset 32 1412; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 1413; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 1414; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 1415; RV64I-NEXT: .cfi_offset ra, -8 1416; RV64I-NEXT: .cfi_offset s0, -16 1417; RV64I-NEXT: .cfi_offset s1, -24 1418; RV64I-NEXT: mv s0, a0 1419; RV64I-NEXT: ld a3, 0(a0) 1420; RV64I-NEXT: mv s1, a1 1421; RV64I-NEXT: j .LBB7_2 1422; RV64I-NEXT: .LBB7_1: # %atomicrmw.start 1423; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 1424; RV64I-NEXT: sd a3, 0(sp) 1425; RV64I-NEXT: mv a1, sp 1426; RV64I-NEXT: li a3, 5 1427; RV64I-NEXT: li a4, 5 1428; RV64I-NEXT: mv a0, s0 1429; RV64I-NEXT: call __atomic_compare_exchange_8 1430; RV64I-NEXT: ld a3, 0(sp) 1431; RV64I-NEXT: bnez a0, .LBB7_4 1432; RV64I-NEXT: .LBB7_2: # %atomicrmw.start 1433; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 1434; RV64I-NEXT: seqz a0, a3 1435; RV64I-NEXT: sltu a1, s1, a3 1436; RV64I-NEXT: or a0, a0, a1 1437; RV64I-NEXT: mv a2, s1 1438; RV64I-NEXT: bnez a0, .LBB7_1 1439; RV64I-NEXT: # %bb.3: # %atomicrmw.start 1440; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 1441; RV64I-NEXT: addi a2, a3, -1 1442; RV64I-NEXT: j .LBB7_1 1443; RV64I-NEXT: .LBB7_4: # %atomicrmw.end 1444; RV64I-NEXT: mv a0, a3 1445; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 1446; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 1447; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 1448; RV64I-NEXT: .cfi_restore ra 1449; RV64I-NEXT: .cfi_restore s0 1450; RV64I-NEXT: .cfi_restore s1 1451; RV64I-NEXT: addi sp, sp, 32 1452; RV64I-NEXT: .cfi_def_cfa_offset 0 1453; RV64I-NEXT: ret 1454; 1455; RV64IA-LABEL: atomicrmw_udec_wrap_i64: 1456; RV64IA: # %bb.0: 1457; RV64IA-NEXT: ld a2, 0(a0) 1458; RV64IA-NEXT: j .LBB7_2 1459; RV64IA-NEXT: .LBB7_1: # %atomicrmw.start 1460; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1461; RV64IA-NEXT: .LBB7_5: # %atomicrmw.start 1462; RV64IA-NEXT: # Parent Loop BB7_2 Depth=1 1463; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 1464; RV64IA-NEXT: lr.d.aqrl a2, (a0) 1465; RV64IA-NEXT: bne a2, a3, .LBB7_7 1466; RV64IA-NEXT: # %bb.6: # %atomicrmw.start 1467; RV64IA-NEXT: # in Loop: Header=BB7_5 Depth=2 1468; RV64IA-NEXT: sc.d.rl a5, a4, (a0) 1469; RV64IA-NEXT: bnez a5, .LBB7_5 1470; RV64IA-NEXT: .LBB7_7: # %atomicrmw.start 1471; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1472; RV64IA-NEXT: beq a2, a3, .LBB7_4 1473; RV64IA-NEXT: .LBB7_2: # %atomicrmw.start 1474; RV64IA-NEXT: # =>This Loop Header: Depth=1 1475; RV64IA-NEXT: # Child Loop BB7_5 Depth 2 1476; RV64IA-NEXT: mv a3, a2 1477; RV64IA-NEXT: seqz a2, a2 1478; RV64IA-NEXT: sltu a4, a1, a3 1479; RV64IA-NEXT: or a2, a2, a4 1480; RV64IA-NEXT: mv a4, a1 1481; RV64IA-NEXT: bnez a2, .LBB7_1 1482; RV64IA-NEXT: # %bb.3: # %atomicrmw.start 1483; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 1484; RV64IA-NEXT: addi a4, a3, -1 1485; RV64IA-NEXT: j .LBB7_1 1486; RV64IA-NEXT: .LBB7_4: # %atomicrmw.end 1487; RV64IA-NEXT: mv a0, a2 1488; RV64IA-NEXT: ret 1489 %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst 1490 ret i64 %result 1491} 1492