1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12 3 4define float @raw_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 5; GFX12-LABEL: raw_buffer_atomic_cond_sub_return: 6; GFX12: ; %bb.0: ; %main_body 7; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 8; GFX12-NEXT: s_wait_expcnt 0x0 9; GFX12-NEXT: s_wait_samplecnt 0x0 10; GFX12-NEXT: s_wait_bvhcnt 0x0 11; GFX12-NEXT: s_wait_kmcnt 0x0 12; GFX12-NEXT: v_mov_b32_e32 v0, s16 13; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, off, s[0:3], null th:TH_ATOMIC_RETURN 14; GFX12-NEXT: s_wait_loadcnt 0x0 15; GFX12-NEXT: s_setpc_b64 s[30:31] 16main_body: 17 %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 18 %r = bitcast i32 %orig to float 19 ret float %r 20} 21 22define void @raw_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 23; GFX12-LABEL: raw_buffer_atomic_cond_sub_no_return: 24; GFX12: ; %bb.0: ; %main_body 25; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 26; GFX12-NEXT: s_wait_expcnt 0x0 27; GFX12-NEXT: s_wait_samplecnt 0x0 28; GFX12-NEXT: s_wait_bvhcnt 0x0 29; GFX12-NEXT: s_wait_kmcnt 0x0 30; GFX12-NEXT: v_mov_b32_e32 v0, s16 31; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, off, s[0:3], null th:TH_ATOMIC_RETURN 32; GFX12-NEXT: s_wait_loadcnt 0x0 33; GFX12-NEXT: s_setpc_b64 s[30:31] 34main_body: 35 %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 36 ret void 37} 38 39define void @raw_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 { 40; GFX12-LABEL: raw_buffer_atomic_cond_sub_no_return_forced: 41; GFX12: ; %bb.0: ; %main_body 42; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 43; GFX12-NEXT: s_wait_expcnt 0x0 44; GFX12-NEXT: s_wait_samplecnt 0x0 45; GFX12-NEXT: s_wait_bvhcnt 0x0 46; GFX12-NEXT: s_wait_kmcnt 0x0 47; GFX12-NEXT: v_mov_b32_e32 v0, s16 48; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, off, s[0:3], null 49; GFX12-NEXT: s_setpc_b64 s[30:31] 50main_body: 51 %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 52 ret void 53} 54 55define float @raw_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 56; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_return: 57; GFX12: ; %bb.0: ; %main_body 58; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 59; GFX12-NEXT: s_wait_expcnt 0x0 60; GFX12-NEXT: s_wait_samplecnt 0x0 61; GFX12-NEXT: s_wait_bvhcnt 0x0 62; GFX12-NEXT: s_wait_kmcnt 0x0 63; GFX12-NEXT: v_mov_b32_e32 v0, s16 64; GFX12-NEXT: s_mov_b32 s4, 4 65; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, off, s[0:3], s4 th:TH_ATOMIC_RETURN 66; GFX12-NEXT: s_wait_loadcnt 0x0 67; GFX12-NEXT: s_wait_alu 0xfffe 68; GFX12-NEXT: s_setpc_b64 s[30:31] 69main_body: 70 %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 4, i32 0) 71 %r = bitcast i32 %orig to float 72 ret float %r 73} 74 75define void @raw_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 76; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_no_return: 77; GFX12: ; %bb.0: ; %main_body 78; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 79; GFX12-NEXT: s_wait_expcnt 0x0 80; GFX12-NEXT: s_wait_samplecnt 0x0 81; GFX12-NEXT: s_wait_bvhcnt 0x0 82; GFX12-NEXT: s_wait_kmcnt 0x0 83; GFX12-NEXT: v_mov_b32_e32 v0, s16 84; GFX12-NEXT: s_mov_b32 s4, 4 85; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, off, s[0:3], s4 th:TH_ATOMIC_RETURN 86; GFX12-NEXT: s_wait_loadcnt 0x0 87; GFX12-NEXT: s_wait_alu 0xfffe 88; GFX12-NEXT: s_setpc_b64 s[30:31] 89main_body: 90 %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 4, i32 0) 91 ret void 92} 93 94define void @raw_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 { 95; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_no_return_forced: 96; GFX12: ; %bb.0: ; %main_body 97; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 98; GFX12-NEXT: s_wait_expcnt 0x0 99; GFX12-NEXT: s_wait_samplecnt 0x0 100; GFX12-NEXT: s_wait_bvhcnt 0x0 101; GFX12-NEXT: s_wait_kmcnt 0x0 102; GFX12-NEXT: v_mov_b32_e32 v0, s16 103; GFX12-NEXT: s_mov_b32 s4, 4 104; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, off, s[0:3], s4 105; GFX12-NEXT: s_wait_alu 0xfffe 106; GFX12-NEXT: s_setpc_b64 s[30:31] 107main_body: 108 %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 4, i32 0) 109 ret void 110} 111 112define float @struct_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 113; GFX12-LABEL: struct_buffer_atomic_cond_sub_return: 114; GFX12: ; %bb.0: ; %main_body 115; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 116; GFX12-NEXT: s_wait_expcnt 0x0 117; GFX12-NEXT: s_wait_samplecnt 0x0 118; GFX12-NEXT: s_wait_bvhcnt 0x0 119; GFX12-NEXT: s_wait_kmcnt 0x0 120; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s16 121; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null idxen th:TH_ATOMIC_RETURN 122; GFX12-NEXT: s_wait_loadcnt 0x0 123; GFX12-NEXT: s_setpc_b64 s[30:31] 124main_body: 125 %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 126 %r = bitcast i32 %orig to float 127 ret float %r 128} 129 130define void @struct_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 131; GFX12-LABEL: struct_buffer_atomic_cond_sub_no_return: 132; GFX12: ; %bb.0: ; %main_body 133; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 134; GFX12-NEXT: s_wait_expcnt 0x0 135; GFX12-NEXT: s_wait_samplecnt 0x0 136; GFX12-NEXT: s_wait_bvhcnt 0x0 137; GFX12-NEXT: s_wait_kmcnt 0x0 138; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16 139; GFX12-NEXT: buffer_atomic_cond_sub_u32 v1, v0, s[0:3], null idxen th:TH_ATOMIC_RETURN 140; GFX12-NEXT: s_wait_loadcnt 0x0 141; GFX12-NEXT: s_setpc_b64 s[30:31] 142main_body: 143 %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 144 ret void 145} 146 147define void @struct_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 { 148; GFX12-LABEL: struct_buffer_atomic_cond_sub_no_return_forced: 149; GFX12: ; %bb.0: ; %main_body 150; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 151; GFX12-NEXT: s_wait_expcnt 0x0 152; GFX12-NEXT: s_wait_samplecnt 0x0 153; GFX12-NEXT: s_wait_bvhcnt 0x0 154; GFX12-NEXT: s_wait_kmcnt 0x0 155; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16 156; GFX12-NEXT: buffer_atomic_cond_sub_u32 v1, v0, s[0:3], null idxen 157; GFX12-NEXT: s_setpc_b64 s[30:31] 158main_body: 159 %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 160 ret void 161} 162 163define float @struct_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 164; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_return: 165; GFX12: ; %bb.0: ; %main_body 166; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 167; GFX12-NEXT: s_wait_expcnt 0x0 168; GFX12-NEXT: s_wait_samplecnt 0x0 169; GFX12-NEXT: s_wait_bvhcnt 0x0 170; GFX12-NEXT: s_wait_kmcnt 0x0 171; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s16 172; GFX12-NEXT: s_mov_b32 s4, 4 173; GFX12-NEXT: buffer_atomic_cond_sub_u32 v0, v1, s[0:3], s4 idxen th:TH_ATOMIC_RETURN 174; GFX12-NEXT: s_wait_loadcnt 0x0 175; GFX12-NEXT: s_wait_alu 0xfffe 176; GFX12-NEXT: s_setpc_b64 s[30:31] 177main_body: 178 %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 4, i32 0) 179 %r = bitcast i32 %orig to float 180 ret float %r 181} 182 183define void @struct_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 184; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_no_return: 185; GFX12: ; %bb.0: ; %main_body 186; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 187; GFX12-NEXT: s_wait_expcnt 0x0 188; GFX12-NEXT: s_wait_samplecnt 0x0 189; GFX12-NEXT: s_wait_bvhcnt 0x0 190; GFX12-NEXT: s_wait_kmcnt 0x0 191; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16 192; GFX12-NEXT: s_mov_b32 s4, 4 193; GFX12-NEXT: buffer_atomic_cond_sub_u32 v1, v0, s[0:3], s4 idxen th:TH_ATOMIC_RETURN 194; GFX12-NEXT: s_wait_loadcnt 0x0 195; GFX12-NEXT: s_wait_alu 0xfffe 196; GFX12-NEXT: s_setpc_b64 s[30:31] 197main_body: 198 %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 4, i32 0) 199 ret void 200} 201 202define void @struct_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 { 203; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_no_return_forced: 204; GFX12: ; %bb.0: ; %main_body 205; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 206; GFX12-NEXT: s_wait_expcnt 0x0 207; GFX12-NEXT: s_wait_samplecnt 0x0 208; GFX12-NEXT: s_wait_bvhcnt 0x0 209; GFX12-NEXT: s_wait_kmcnt 0x0 210; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16 211; GFX12-NEXT: s_mov_b32 s4, 4 212; GFX12-NEXT: buffer_atomic_cond_sub_u32 v1, v0, s[0:3], s4 idxen 213; GFX12-NEXT: s_wait_alu 0xfffe 214; GFX12-NEXT: s_setpc_b64 s[30:31] 215main_body: 216 %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 4, i32 0) 217 ret void 218} 219 220declare i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32) #0 221declare i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32, i32) #0 222 223attributes #0 = { nounwind } 224attributes #1 = { nounwind "target-features"="+atomic-csub-no-rtn-insts" } 225 226