1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 4; Not supported in gfx8 or gfx9 5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 7; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 8 9define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 10; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 11; GFX6: ; %bb.0: 12; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc 14; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 15; GFX6-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 18; GFX7: ; %bb.0: 19; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc 21; GFX7-NEXT: s_waitcnt vmcnt(0) 22; GFX7-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 25; GFX10: ; %bb.0: 26; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc 28; GFX10-NEXT: s_waitcnt vmcnt(0) 29; GFX10-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 32; GFX11: ; %bb.0: 33; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen glc 35; GFX11-NEXT: s_waitcnt vmcnt(0) 36; GFX11-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 39; GFX12: ; %bb.0: 40; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 41; GFX12-NEXT: s_wait_expcnt 0x0 42; GFX12-NEXT: s_wait_samplecnt 0x0 43; GFX12-NEXT: s_wait_bvhcnt 0x0 44; GFX12-NEXT: s_wait_kmcnt 0x0 45; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen th:TH_ATOMIC_RETURN 46; GFX12-NEXT: s_wait_loadcnt 0x0 47; GFX12-NEXT: s_setpc_b64 s[30:31] 48 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 49 ret float %ret 50} 51 52define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 53; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 54; GFX6: ; %bb.0: 55; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 glc 57; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 58; GFX6-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 61; GFX7: ; %bb.0: 62; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 glc 64; GFX7-NEXT: s_waitcnt vmcnt(0) 65; GFX7-NEXT: s_setpc_b64 s[30:31] 66; 67; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 68; GFX10: ; %bb.0: 69; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 glc 71; GFX10-NEXT: s_waitcnt vmcnt(0) 72; GFX10-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 75; GFX11: ; %bb.0: 76; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256 glc 78; GFX11-NEXT: s_waitcnt vmcnt(0) 79; GFX11-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 82; GFX12: ; %bb.0: 83; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 84; GFX12-NEXT: s_wait_expcnt 0x0 85; GFX12-NEXT: s_wait_samplecnt 0x0 86; GFX12-NEXT: s_wait_bvhcnt 0x0 87; GFX12-NEXT: s_wait_kmcnt 0x0 88; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256 th:TH_ATOMIC_RETURN 89; GFX12-NEXT: s_wait_loadcnt 0x0 90; GFX12-NEXT: s_setpc_b64 s[30:31] 91 %voffset.add = add i32 %voffset, 256 92 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) 93 ret float %ret 94} 95 96define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 97; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 98; GFX6: ; %bb.0: 99; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX6-NEXT: buffer_atomic_fmin v0, v1, s[16:19], s20 idxen glc 101; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 102; GFX6-NEXT: s_setpc_b64 s[30:31] 103; 104; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 105; GFX7: ; %bb.0: 106; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[16:19], s20 idxen glc 108; GFX7-NEXT: s_waitcnt vmcnt(0) 109; GFX7-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 112; GFX10: ; %bb.0: 113; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[16:19], s20 idxen glc 115; GFX10-NEXT: s_waitcnt vmcnt(0) 116; GFX10-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 119; GFX11: ; %bb.0: 120; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX11-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], s16 idxen glc 122; GFX11-NEXT: s_waitcnt vmcnt(0) 123; GFX11-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 126; GFX12: ; %bb.0: 127; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 128; GFX12-NEXT: s_wait_expcnt 0x0 129; GFX12-NEXT: s_wait_samplecnt 0x0 130; GFX12-NEXT: s_wait_bvhcnt 0x0 131; GFX12-NEXT: s_wait_kmcnt 0x0 132; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s16 idxen th:TH_ATOMIC_RETURN 133; GFX12-NEXT: s_wait_loadcnt 0x0 134; GFX12-NEXT: s_setpc_b64 s[30:31] 135 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 136 ret float %ret 137} 138 139define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 140; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 141; GFX6: ; %bb.0: 142; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc slc 144; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 145; GFX6-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 148; GFX7: ; %bb.0: 149; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc slc 151; GFX7-NEXT: s_waitcnt vmcnt(0) 152; GFX7-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 155; GFX10: ; %bb.0: 156; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc slc 158; GFX10-NEXT: s_waitcnt vmcnt(0) 159; GFX10-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 162; GFX11: ; %bb.0: 163; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen glc slc 165; GFX11-NEXT: s_waitcnt vmcnt(0) 166; GFX11-NEXT: s_setpc_b64 s[30:31] 167; 168; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 169; GFX12: ; %bb.0: 170; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 171; GFX12-NEXT: s_wait_expcnt 0x0 172; GFX12-NEXT: s_wait_samplecnt 0x0 173; GFX12-NEXT: s_wait_bvhcnt 0x0 174; GFX12-NEXT: s_wait_kmcnt 0x0 175; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen th:TH_ATOMIC_NT_RETURN 176; GFX12-NEXT: s_wait_loadcnt 0x0 177; GFX12-NEXT: s_setpc_b64 s[30:31] 178 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 179 ret float %ret 180} 181 182define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 183; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 184; GFX6: ; %bb.0: 185; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen 187; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 188; GFX6-NEXT: s_setpc_b64 s[30:31] 189; 190; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 191; GFX7: ; %bb.0: 192; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen 194; GFX7-NEXT: s_waitcnt vmcnt(0) 195; GFX7-NEXT: s_setpc_b64 s[30:31] 196; 197; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 198; GFX10: ; %bb.0: 199; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen 201; GFX10-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 204; GFX11: ; %bb.0: 205; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen 207; GFX11-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 210; GFX12: ; %bb.0: 211; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 212; GFX12-NEXT: s_wait_expcnt 0x0 213; GFX12-NEXT: s_wait_samplecnt 0x0 214; GFX12-NEXT: s_wait_bvhcnt 0x0 215; GFX12-NEXT: s_wait_kmcnt 0x0 216; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen 217; GFX12-NEXT: s_setpc_b64 s[30:31] 218 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 219 ret void 220} 221 222define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 223; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 224; GFX6: ; %bb.0: 225; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 226; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 227; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 228; GFX6-NEXT: s_setpc_b64 s[30:31] 229; 230; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 231; GFX7: ; %bb.0: 232; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 234; GFX7-NEXT: s_waitcnt vmcnt(0) 235; GFX7-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 238; GFX10: ; %bb.0: 239; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 241; GFX10-NEXT: s_setpc_b64 s[30:31] 242; 243; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 244; GFX11: ; %bb.0: 245; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 246; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256 247; GFX11-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 250; GFX12: ; %bb.0: 251; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 252; GFX12-NEXT: s_wait_expcnt 0x0 253; GFX12-NEXT: s_wait_samplecnt 0x0 254; GFX12-NEXT: s_wait_bvhcnt 0x0 255; GFX12-NEXT: s_wait_kmcnt 0x0 256; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256 257; GFX12-NEXT: s_setpc_b64 s[30:31] 258 %voffset.add = add i32 %voffset, 256 259 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) 260 ret void 261} 262 263; Natural mapping, no voffset 264define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 265; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 266; GFX6: ; %bb.0: 267; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX6-NEXT: buffer_atomic_fmin v0, v1, s[16:19], s20 idxen 269; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 270; GFX6-NEXT: s_setpc_b64 s[30:31] 271; 272; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 273; GFX7: ; %bb.0: 274; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[16:19], s20 idxen 276; GFX7-NEXT: s_waitcnt vmcnt(0) 277; GFX7-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 280; GFX10: ; %bb.0: 281; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[16:19], s20 idxen 283; GFX10-NEXT: s_setpc_b64 s[30:31] 284; 285; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 286; GFX11: ; %bb.0: 287; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GFX11-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], s16 idxen 289; GFX11-NEXT: s_setpc_b64 s[30:31] 290; 291; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 292; GFX12: ; %bb.0: 293; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 294; GFX12-NEXT: s_wait_expcnt 0x0 295; GFX12-NEXT: s_wait_samplecnt 0x0 296; GFX12-NEXT: s_wait_bvhcnt 0x0 297; GFX12-NEXT: s_wait_kmcnt 0x0 298; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s16 idxen 299; GFX12-NEXT: s_setpc_b64 s[30:31] 300 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 301 ret void 302} 303 304define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 305; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 306; GFX6: ; %bb.0: 307; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX6-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen slc 309; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 310; GFX6-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 313; GFX7: ; %bb.0: 314; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX7-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen slc 316; GFX7-NEXT: s_waitcnt vmcnt(0) 317; GFX7-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 320; GFX10: ; %bb.0: 321; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX10-NEXT: buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen slc 323; GFX10-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 326; GFX11: ; %bb.0: 327; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX11-NEXT: buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen slc 329; GFX11-NEXT: s_setpc_b64 s[30:31] 330; 331; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 332; GFX12: ; %bb.0: 333; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 334; GFX12-NEXT: s_wait_expcnt 0x0 335; GFX12-NEXT: s_wait_samplecnt 0x0 336; GFX12-NEXT: s_wait_bvhcnt 0x0 337; GFX12-NEXT: s_wait_kmcnt 0x0 338; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen th:TH_ATOMIC_NT 339; GFX12-NEXT: s_setpc_b64 s[30:31] 340 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 341 ret void 342} 343 344; Test waterfall loop on resource 345define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 346; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 347; GFX6: ; %bb.0: 348; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX6-NEXT: s_mov_b64 s[6:7], exec 350; GFX6-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 351; GFX6-NEXT: v_readfirstlane_b32 s8, v1 352; GFX6-NEXT: v_readfirstlane_b32 s9, v2 353; GFX6-NEXT: v_readfirstlane_b32 s10, v3 354; GFX6-NEXT: v_readfirstlane_b32 s11, v4 355; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 356; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4] 357; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 358; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] 359; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 360; GFX6-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc 361; GFX6-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 362; GFX6-NEXT: ; implicit-def: $vgpr5_vgpr6 363; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] 364; GFX6-NEXT: s_cbranch_execnz .LBB8_1 365; GFX6-NEXT: ; %bb.2: 366; GFX6-NEXT: s_mov_b64 exec, s[6:7] 367; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 368; GFX6-NEXT: s_setpc_b64 s[30:31] 369; 370; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 371; GFX7: ; %bb.0: 372; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; GFX7-NEXT: s_mov_b64 s[6:7], exec 374; GFX7-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 375; GFX7-NEXT: v_readfirstlane_b32 s8, v1 376; GFX7-NEXT: v_readfirstlane_b32 s9, v2 377; GFX7-NEXT: v_readfirstlane_b32 s10, v3 378; GFX7-NEXT: v_readfirstlane_b32 s11, v4 379; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 380; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4] 381; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 382; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] 383; GFX7-NEXT: s_waitcnt vmcnt(0) 384; GFX7-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc 385; GFX7-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 386; GFX7-NEXT: ; implicit-def: $vgpr5_vgpr6 387; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] 388; GFX7-NEXT: s_cbranch_execnz .LBB8_1 389; GFX7-NEXT: ; %bb.2: 390; GFX7-NEXT: s_mov_b64 exec, s[6:7] 391; GFX7-NEXT: s_waitcnt vmcnt(0) 392; GFX7-NEXT: s_setpc_b64 s[30:31] 393; 394; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 395; GFX10: ; %bb.0: 396; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 397; GFX10-NEXT: s_mov_b32 s5, exec_lo 398; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 399; GFX10-NEXT: v_readfirstlane_b32 s8, v1 400; GFX10-NEXT: v_readfirstlane_b32 s9, v2 401; GFX10-NEXT: v_readfirstlane_b32 s10, v3 402; GFX10-NEXT: v_readfirstlane_b32 s11, v4 403; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2] 404; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4] 405; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 406; GFX10-NEXT: s_and_saveexec_b32 s4, s4 407; GFX10-NEXT: s_waitcnt vmcnt(0) 408; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc 409; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 410; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6 411; GFX10-NEXT: s_waitcnt_depctr 0xffe3 412; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 413; GFX10-NEXT: s_cbranch_execnz .LBB8_1 414; GFX10-NEXT: ; %bb.2: 415; GFX10-NEXT: s_mov_b32 exec_lo, s5 416; GFX10-NEXT: s_waitcnt vmcnt(0) 417; GFX10-NEXT: s_setpc_b64 s[30:31] 418; 419; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 420; GFX11: ; %bb.0: 421; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX11-NEXT: s_mov_b32 s2, exec_lo 423; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 424; GFX11-NEXT: v_readfirstlane_b32 s4, v1 425; GFX11-NEXT: v_readfirstlane_b32 s5, v2 426; GFX11-NEXT: v_readfirstlane_b32 s6, v3 427; GFX11-NEXT: v_readfirstlane_b32 s7, v4 428; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 429; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] 430; GFX11-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4] 431; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 432; GFX11-NEXT: s_and_b32 s1, vcc_lo, s1 433; GFX11-NEXT: s_and_saveexec_b32 s1, s1 434; GFX11-NEXT: s_waitcnt vmcnt(0) 435; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 glc 436; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 437; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6 438; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s1 439; GFX11-NEXT: s_cbranch_execnz .LBB8_1 440; GFX11-NEXT: ; %bb.2: 441; GFX11-NEXT: s_mov_b32 exec_lo, s2 442; GFX11-NEXT: s_waitcnt vmcnt(0) 443; GFX11-NEXT: s_setpc_b64 s[30:31] 444; 445; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: 446; GFX12: ; %bb.0: 447; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 448; GFX12-NEXT: s_wait_expcnt 0x0 449; GFX12-NEXT: s_wait_samplecnt 0x0 450; GFX12-NEXT: s_wait_bvhcnt 0x0 451; GFX12-NEXT: s_wait_kmcnt 0x0 452; GFX12-NEXT: s_mov_b32 s2, exec_lo 453; GFX12-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 454; GFX12-NEXT: v_readfirstlane_b32 s4, v1 455; GFX12-NEXT: v_readfirstlane_b32 s5, v2 456; GFX12-NEXT: v_readfirstlane_b32 s6, v3 457; GFX12-NEXT: v_readfirstlane_b32 s7, v4 458; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 459; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] 460; GFX12-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4] 461; GFX12-NEXT: s_wait_alu 0xfffe 462; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 463; GFX12-NEXT: s_and_b32 s1, vcc_lo, s1 464; GFX12-NEXT: s_wait_alu 0xfffe 465; GFX12-NEXT: s_and_saveexec_b32 s1, s1 466; GFX12-NEXT: s_wait_loadcnt 0x0 467; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 th:TH_ATOMIC_RETURN 468; GFX12-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 469; GFX12-NEXT: ; implicit-def: $vgpr5_vgpr6 470; GFX12-NEXT: s_wait_alu 0xfffe 471; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s1 472; GFX12-NEXT: s_cbranch_execnz .LBB8_1 473; GFX12-NEXT: ; %bb.2: 474; GFX12-NEXT: s_mov_b32 exec_lo, s2 475; GFX12-NEXT: s_wait_loadcnt 0x0 476; GFX12-NEXT: s_wait_alu 0xfffe 477; GFX12-NEXT: s_setpc_b64 s[30:31] 478 %voffset.add = add i32 %voffset, 256 479 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) 480 ret float %ret 481} 482 483; Test waterfall loop on soffset 484define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) { 485; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: 486; GFX6: ; %bb.0: 487; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX6-NEXT: s_mov_b64 s[6:7], exec 489; GFX6-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 490; GFX6-NEXT: v_readfirstlane_b32 s8, v1 491; GFX6-NEXT: v_readfirstlane_b32 s9, v2 492; GFX6-NEXT: v_readfirstlane_b32 s10, v3 493; GFX6-NEXT: v_readfirstlane_b32 s11, v4 494; GFX6-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 495; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4] 496; GFX6-NEXT: v_readfirstlane_b32 s12, v7 497; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 498; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, s12, v7 499; GFX6-NEXT: s_and_b64 s[4:5], s[4:5], vcc 500; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] 501; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 502; GFX6-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s12 idxen offen offset:256 glc 503; GFX6-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 504; GFX6-NEXT: ; implicit-def: $vgpr7 505; GFX6-NEXT: ; implicit-def: $vgpr5_vgpr6 506; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] 507; GFX6-NEXT: s_cbranch_execnz .LBB9_1 508; GFX6-NEXT: ; %bb.2: 509; GFX6-NEXT: s_mov_b64 exec, s[6:7] 510; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 511; GFX6-NEXT: s_setpc_b64 s[30:31] 512; 513; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: 514; GFX7: ; %bb.0: 515; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX7-NEXT: s_mov_b64 s[6:7], exec 517; GFX7-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 518; GFX7-NEXT: v_readfirstlane_b32 s8, v1 519; GFX7-NEXT: v_readfirstlane_b32 s9, v2 520; GFX7-NEXT: v_readfirstlane_b32 s10, v3 521; GFX7-NEXT: v_readfirstlane_b32 s11, v4 522; GFX7-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] 523; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4] 524; GFX7-NEXT: v_readfirstlane_b32 s12, v7 525; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 526; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, s12, v7 527; GFX7-NEXT: s_and_b64 s[4:5], s[4:5], vcc 528; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] 529; GFX7-NEXT: s_waitcnt vmcnt(0) 530; GFX7-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s12 idxen offen offset:256 glc 531; GFX7-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 532; GFX7-NEXT: ; implicit-def: $vgpr7 533; GFX7-NEXT: ; implicit-def: $vgpr5_vgpr6 534; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] 535; GFX7-NEXT: s_cbranch_execnz .LBB9_1 536; GFX7-NEXT: ; %bb.2: 537; GFX7-NEXT: s_mov_b64 exec, s[6:7] 538; GFX7-NEXT: s_waitcnt vmcnt(0) 539; GFX7-NEXT: s_setpc_b64 s[30:31] 540; 541; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: 542; GFX10: ; %bb.0: 543; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX10-NEXT: s_mov_b32 s6, exec_lo 545; GFX10-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 546; GFX10-NEXT: v_readfirstlane_b32 s8, v1 547; GFX10-NEXT: v_readfirstlane_b32 s9, v2 548; GFX10-NEXT: v_readfirstlane_b32 s10, v3 549; GFX10-NEXT: v_readfirstlane_b32 s11, v4 550; GFX10-NEXT: v_readfirstlane_b32 s7, v7 551; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2] 552; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4] 553; GFX10-NEXT: v_cmp_eq_u32_e64 s5, s7, v7 554; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 555; GFX10-NEXT: s_and_b32 s4, s4, s5 556; GFX10-NEXT: s_and_saveexec_b32 s4, s4 557; GFX10-NEXT: s_waitcnt vmcnt(0) 558; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s7 idxen offen offset:256 glc 559; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 560; GFX10-NEXT: ; implicit-def: $vgpr7 561; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6 562; GFX10-NEXT: s_waitcnt_depctr 0xffe3 563; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 564; GFX10-NEXT: s_cbranch_execnz .LBB9_1 565; GFX10-NEXT: ; %bb.2: 566; GFX10-NEXT: s_mov_b32 exec_lo, s6 567; GFX10-NEXT: s_waitcnt vmcnt(0) 568; GFX10-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: 571; GFX11: ; %bb.0: 572; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX11-NEXT: s_mov_b32 s2, exec_lo 574; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 575; GFX11-NEXT: v_readfirstlane_b32 s4, v1 576; GFX11-NEXT: v_readfirstlane_b32 s5, v2 577; GFX11-NEXT: v_readfirstlane_b32 s6, v3 578; GFX11-NEXT: v_readfirstlane_b32 s7, v4 579; GFX11-NEXT: v_readfirstlane_b32 s3, v7 580; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 581; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] 582; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] 583; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 584; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 585; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 586; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 587; GFX11-NEXT: s_and_b32 s0, s0, s1 588; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 589; GFX11-NEXT: s_and_saveexec_b32 s0, s0 590; GFX11-NEXT: s_waitcnt vmcnt(0) 591; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 glc 592; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 593; GFX11-NEXT: ; implicit-def: $vgpr7 594; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6 595; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 596; GFX11-NEXT: s_cbranch_execnz .LBB9_1 597; GFX11-NEXT: ; %bb.2: 598; GFX11-NEXT: s_mov_b32 exec_lo, s2 599; GFX11-NEXT: s_waitcnt vmcnt(0) 600; GFX11-NEXT: s_setpc_b64 s[30:31] 601; 602; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: 603; GFX12: ; %bb.0: 604; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 605; GFX12-NEXT: s_wait_expcnt 0x0 606; GFX12-NEXT: s_wait_samplecnt 0x0 607; GFX12-NEXT: s_wait_bvhcnt 0x0 608; GFX12-NEXT: s_wait_kmcnt 0x0 609; GFX12-NEXT: s_mov_b32 s2, exec_lo 610; GFX12-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 611; GFX12-NEXT: v_readfirstlane_b32 s4, v1 612; GFX12-NEXT: v_readfirstlane_b32 s5, v2 613; GFX12-NEXT: v_readfirstlane_b32 s6, v3 614; GFX12-NEXT: v_readfirstlane_b32 s7, v4 615; GFX12-NEXT: v_readfirstlane_b32 s3, v7 616; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 617; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] 618; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] 619; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) 620; GFX12-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 621; GFX12-NEXT: s_wait_alu 0xfffe 622; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 623; GFX12-NEXT: s_wait_alu 0xfffe 624; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 625; GFX12-NEXT: s_and_b32 s0, s0, s1 626; GFX12-NEXT: s_wait_alu 0xfffe 627; GFX12-NEXT: s_and_saveexec_b32 s0, s0 628; GFX12-NEXT: s_wait_loadcnt 0x0 629; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 th:TH_ATOMIC_RETURN 630; GFX12-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 631; GFX12-NEXT: ; implicit-def: $vgpr7 632; GFX12-NEXT: ; implicit-def: $vgpr5_vgpr6 633; GFX12-NEXT: s_wait_alu 0xfffe 634; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 635; GFX12-NEXT: s_cbranch_execnz .LBB9_1 636; GFX12-NEXT: ; %bb.2: 637; GFX12-NEXT: s_mov_b32 exec_lo, s2 638; GFX12-NEXT: s_wait_loadcnt 0x0 639; GFX12-NEXT: s_wait_alu 0xfffe 640; GFX12-NEXT: s_setpc_b64 s[30:31] 641 %voffset.add = add i32 %voffset, 256 642 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) 643 ret float %ret 644} 645 646declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) 647