1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI 3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=GFX7 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100 7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12 8 9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI 10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7 11; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10 12; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030 13; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100 14; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12 15 16declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg) 17declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg) 18 19 20define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 21; SI-LABEL: raw_buffer_atomic_min_noret_f32: 22; SI: ; %bb.0: ; %main_body 23; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 24; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 25; SI-NEXT: s_waitcnt lgkmcnt(0) 26; SI-NEXT: v_mov_b32_e32 v0, s6 27; SI-NEXT: v_mov_b32_e32 v1, s7 28; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 29; SI-NEXT: s_endpgm 30; 31; GFX7-LABEL: raw_buffer_atomic_min_noret_f32: 32; GFX7: ; %bb.0: ; %main_body 33; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 34; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 35; GFX7-NEXT: s_waitcnt lgkmcnt(0) 36; GFX7-NEXT: v_mov_b32_e32 v0, s6 37; GFX7-NEXT: v_mov_b32_e32 v1, s7 38; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 39; GFX7-NEXT: s_endpgm 40; 41; GFX10-LABEL: raw_buffer_atomic_min_noret_f32: 42; GFX10: ; %bb.0: ; %main_body 43; GFX10-NEXT: s_clause 0x1 44; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 45; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 46; GFX10-NEXT: s_waitcnt lgkmcnt(0) 47; GFX10-NEXT: v_mov_b32_e32 v0, s6 48; GFX10-NEXT: v_mov_b32_e32 v1, s7 49; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 50; GFX10-NEXT: s_endpgm 51; 52; GFX1030-LABEL: raw_buffer_atomic_min_noret_f32: 53; GFX1030: ; %bb.0: ; %main_body 54; GFX1030-NEXT: s_clause 0x1 55; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 56; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 57; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 58; GFX1030-NEXT: v_mov_b32_e32 v0, s6 59; GFX1030-NEXT: v_mov_b32_e32 v1, s7 60; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 61; GFX1030-NEXT: s_endpgm 62; 63; GFX1100-LABEL: raw_buffer_atomic_min_noret_f32: 64; GFX1100: ; %bb.0: ; %main_body 65; GFX1100-NEXT: s_clause 0x1 66; GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 67; GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 68; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 69; GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 70; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 71; GFX1100-NEXT: s_endpgm 72; 73; GFX12-LABEL: raw_buffer_atomic_min_noret_f32: 74; GFX12: ; %bb.0: ; %main_body 75; GFX12-NEXT: s_clause 0x1 76; GFX12-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 77; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 78; GFX12-NEXT: s_wait_kmcnt 0x0 79; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 80; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen 81; GFX12-NEXT: s_endpgm 82; 83; G_SI-LABEL: raw_buffer_atomic_min_noret_f32: 84; G_SI: ; %bb.0: ; %main_body 85; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 86; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 87; G_SI-NEXT: s_waitcnt lgkmcnt(0) 88; G_SI-NEXT: v_mov_b32_e32 v0, s6 89; G_SI-NEXT: v_mov_b32_e32 v1, s7 90; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 91; G_SI-NEXT: s_endpgm 92; 93; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f32: 94; G_GFX7: ; %bb.0: ; %main_body 95; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 96; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 97; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 98; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 99; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 100; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 101; G_GFX7-NEXT: s_endpgm 102; 103; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f32: 104; G_GFX10: ; %bb.0: ; %main_body 105; G_GFX10-NEXT: s_clause 0x1 106; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 107; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 108; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 109; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 110; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 111; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 112; G_GFX10-NEXT: s_endpgm 113; 114; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f32: 115; G_GFX1030: ; %bb.0: ; %main_body 116; G_GFX1030-NEXT: s_clause 0x1 117; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 118; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 119; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 120; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 121; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 122; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 123; G_GFX1030-NEXT: s_endpgm 124; 125; G_GFX1100-LABEL: raw_buffer_atomic_min_noret_f32: 126; G_GFX1100: ; %bb.0: ; %main_body 127; G_GFX1100-NEXT: s_clause 0x1 128; G_GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 129; G_GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 130; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 131; G_GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 132; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 133; G_GFX1100-NEXT: s_endpgm 134main_body: 135 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 136 ret void 137} 138 139define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 140; SI-LABEL: raw_buffer_atomic_min_rtn_f32: 141; SI: ; %bb.0: ; %main_body 142; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 143; SI-NEXT: s_mov_b32 s3, 0xf000 144; SI-NEXT: s_mov_b32 s2, -1 145; SI-NEXT: s_waitcnt vmcnt(0) 146; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 147; SI-NEXT: s_endpgm 148; 149; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32: 150; GFX7: ; %bb.0: ; %main_body 151; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 152; GFX7-NEXT: s_mov_b32 s3, 0xf000 153; GFX7-NEXT: s_mov_b32 s2, -1 154; GFX7-NEXT: s_waitcnt vmcnt(0) 155; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 156; GFX7-NEXT: s_endpgm 157; 158; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32: 159; GFX10: ; %bb.0: ; %main_body 160; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 161; GFX10-NEXT: s_waitcnt vmcnt(0) 162; GFX10-NEXT: global_store_dword v[0:1], v0, off 163; GFX10-NEXT: s_endpgm 164; 165; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32: 166; GFX1030: ; %bb.0: ; %main_body 167; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 168; GFX1030-NEXT: s_waitcnt vmcnt(0) 169; GFX1030-NEXT: global_store_dword v[0:1], v0, off 170; GFX1030-NEXT: s_endpgm 171; 172; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32: 173; GFX1100: ; %bb.0: ; %main_body 174; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 175; GFX1100-NEXT: s_waitcnt vmcnt(0) 176; GFX1100-NEXT: global_store_b32 v[0:1], v0, off 177; GFX1100-NEXT: s_endpgm 178; 179; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32: 180; GFX12: ; %bb.0: ; %main_body 181; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN 182; GFX12-NEXT: s_wait_loadcnt 0x0 183; GFX12-NEXT: global_store_b32 v[0:1], v0, off 184; GFX12-NEXT: s_endpgm 185; 186; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32: 187; G_SI: ; %bb.0: ; %main_body 188; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 189; G_SI-NEXT: s_mov_b32 s2, -1 190; G_SI-NEXT: s_mov_b32 s3, 0xf000 191; G_SI-NEXT: s_waitcnt vmcnt(0) 192; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 193; G_SI-NEXT: s_endpgm 194; 195; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32: 196; G_GFX7: ; %bb.0: ; %main_body 197; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 198; G_GFX7-NEXT: s_mov_b32 s2, -1 199; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 200; G_GFX7-NEXT: s_waitcnt vmcnt(0) 201; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 202; G_GFX7-NEXT: s_endpgm 203; 204; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32: 205; G_GFX10: ; %bb.0: ; %main_body 206; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 207; G_GFX10-NEXT: s_waitcnt vmcnt(0) 208; G_GFX10-NEXT: global_store_dword v[0:1], v0, off 209; G_GFX10-NEXT: s_endpgm 210; 211; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32: 212; G_GFX1030: ; %bb.0: ; %main_body 213; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 214; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 215; G_GFX1030-NEXT: global_store_dword v[0:1], v0, off 216; G_GFX1030-NEXT: s_endpgm 217; 218; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32: 219; G_GFX1100: ; %bb.0: ; %main_body 220; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 221; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 222; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off 223; G_GFX1100-NEXT: s_endpgm 224main_body: 225 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 226 store float %ret, ptr addrspace(1) undef 227 ret void 228} 229 230define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, ptr addrspace(3) %out) { 231; SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 232; SI: ; %bb.0: ; %main_body 233; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 234; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 235; SI-NEXT: s_mov_b32 m0, -1 236; SI-NEXT: s_waitcnt lgkmcnt(0) 237; SI-NEXT: v_mov_b32_e32 v0, s6 238; SI-NEXT: v_mov_b32_e32 v1, s7 239; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 240; SI-NEXT: s_load_dword s0, s[4:5], 0xf 241; SI-NEXT: s_waitcnt lgkmcnt(0) 242; SI-NEXT: v_mov_b32_e32 v1, s0 243; SI-NEXT: s_waitcnt vmcnt(0) 244; SI-NEXT: ds_write_b32 v1, v0 245; SI-NEXT: s_endpgm 246; 247; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 248; GFX7: ; %bb.0: ; %main_body 249; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 250; GFX7-NEXT: s_mov_b32 m0, -1 251; GFX7-NEXT: s_waitcnt lgkmcnt(0) 252; GFX7-NEXT: v_mov_b32_e32 v0, s4 253; GFX7-NEXT: v_mov_b32_e32 v1, s5 254; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 255; GFX7-NEXT: v_mov_b32_e32 v1, s6 256; GFX7-NEXT: s_waitcnt vmcnt(0) 257; GFX7-NEXT: ds_write_b32 v1, v0 258; GFX7-NEXT: s_endpgm 259; 260; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 261; GFX10: ; %bb.0: ; %main_body 262; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 263; GFX10-NEXT: s_waitcnt lgkmcnt(0) 264; GFX10-NEXT: v_mov_b32_e32 v0, s12 265; GFX10-NEXT: v_mov_b32_e32 v1, s13 266; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[8:11], 4 offen glc slc 267; GFX10-NEXT: v_mov_b32_e32 v1, s14 268; GFX10-NEXT: s_waitcnt vmcnt(0) 269; GFX10-NEXT: ds_write_b32 v1, v0 270; GFX10-NEXT: s_endpgm 271; 272; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 273; GFX1030: ; %bb.0: ; %main_body 274; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 275; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 276; GFX1030-NEXT: v_mov_b32_e32 v0, s4 277; GFX1030-NEXT: v_mov_b32_e32 v1, s5 278; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 279; GFX1030-NEXT: v_mov_b32_e32 v1, s6 280; GFX1030-NEXT: s_waitcnt vmcnt(0) 281; GFX1030-NEXT: ds_write_b32 v1, v0 282; GFX1030-NEXT: s_endpgm 283; 284; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 285; GFX1100: ; %bb.0: ; %main_body 286; GFX1100-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 287; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 288; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 289; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc 290; GFX1100-NEXT: v_mov_b32_e32 v1, s6 291; GFX1100-NEXT: s_waitcnt vmcnt(0) 292; GFX1100-NEXT: ds_store_b32 v1, v0 293; GFX1100-NEXT: s_endpgm 294; 295; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 296; GFX12: ; %bb.0: ; %main_body 297; GFX12-NEXT: s_clause 0x1 298; GFX12-NEXT: s_load_b96 s[8:10], s[4:5], 0x34 299; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 300; GFX12-NEXT: s_mov_b32 s4, 4 301; GFX12-NEXT: s_wait_kmcnt 0x0 302; GFX12-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 303; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN 304; GFX12-NEXT: v_mov_b32_e32 v1, s10 305; GFX12-NEXT: s_wait_loadcnt 0x0 306; GFX12-NEXT: ds_store_b32 v1, v0 307; GFX12-NEXT: s_endpgm 308; 309; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 310; G_SI: ; %bb.0: ; %main_body 311; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 312; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 313; G_SI-NEXT: s_mov_b32 m0, -1 314; G_SI-NEXT: s_waitcnt lgkmcnt(0) 315; G_SI-NEXT: v_mov_b32_e32 v0, s6 316; G_SI-NEXT: v_mov_b32_e32 v1, s7 317; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 318; G_SI-NEXT: s_load_dword s0, s[4:5], 0xf 319; G_SI-NEXT: s_waitcnt lgkmcnt(0) 320; G_SI-NEXT: v_mov_b32_e32 v1, s0 321; G_SI-NEXT: s_waitcnt vmcnt(0) 322; G_SI-NEXT: ds_write_b32 v1, v0 323; G_SI-NEXT: s_endpgm 324; 325; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 326; G_GFX7: ; %bb.0: ; %main_body 327; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 328; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 329; G_GFX7-NEXT: s_mov_b32 m0, -1 330; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 331; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 332; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 333; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 334; G_GFX7-NEXT: s_load_dword s0, s[4:5], 0xf 335; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 336; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 337; G_GFX7-NEXT: s_waitcnt vmcnt(0) 338; G_GFX7-NEXT: ds_write_b32 v1, v0 339; G_GFX7-NEXT: s_endpgm 340; 341; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 342; G_GFX10: ; %bb.0: ; %main_body 343; G_GFX10-NEXT: s_clause 0x1 344; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 345; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 346; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 347; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 348; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 349; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 350; G_GFX10-NEXT: s_waitcnt_depctr 0xffe3 351; G_GFX10-NEXT: s_load_dword s0, s[4:5], 0x3c 352; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 353; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 354; G_GFX10-NEXT: s_waitcnt vmcnt(0) 355; G_GFX10-NEXT: ds_write_b32 v1, v0 356; G_GFX10-NEXT: s_endpgm 357; 358; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 359; G_GFX1030: ; %bb.0: ; %main_body 360; G_GFX1030-NEXT: s_clause 0x1 361; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 362; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 363; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 364; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 365; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 366; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 367; G_GFX1030-NEXT: s_load_dword s0, s[4:5], 0x3c 368; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 369; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0 370; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 371; G_GFX1030-NEXT: ds_write_b32 v1, v0 372; G_GFX1030-NEXT: s_endpgm 373; 374; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: 375; G_GFX1100: ; %bb.0: ; %main_body 376; G_GFX1100-NEXT: s_clause 0x1 377; G_GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 378; G_GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 379; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 380; G_GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 381; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc 382; G_GFX1100-NEXT: s_load_b32 s0, s[4:5], 0x3c 383; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 384; G_GFX1100-NEXT: v_mov_b32_e32 v1, s0 385; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 386; G_GFX1100-NEXT: ds_store_b32 v1, v0 387; G_GFX1100-NEXT: s_endpgm 388main_body: 389 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 390 store float %ret, ptr addrspace(3) %out, align 8 391 ret void 392} 393 394define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 395; SI-LABEL: raw_buffer_atomic_max_noret_f32: 396; SI: ; %bb.0: ; %main_body 397; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 398; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 399; SI-NEXT: s_waitcnt lgkmcnt(0) 400; SI-NEXT: v_mov_b32_e32 v0, s6 401; SI-NEXT: v_mov_b32_e32 v1, s7 402; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 403; SI-NEXT: s_endpgm 404; 405; GFX7-LABEL: raw_buffer_atomic_max_noret_f32: 406; GFX7: ; %bb.0: ; %main_body 407; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 408; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 409; GFX7-NEXT: s_waitcnt lgkmcnt(0) 410; GFX7-NEXT: v_mov_b32_e32 v0, s6 411; GFX7-NEXT: v_mov_b32_e32 v1, s7 412; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 413; GFX7-NEXT: s_endpgm 414; 415; GFX10-LABEL: raw_buffer_atomic_max_noret_f32: 416; GFX10: ; %bb.0: ; %main_body 417; GFX10-NEXT: s_clause 0x1 418; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 419; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 420; GFX10-NEXT: s_waitcnt lgkmcnt(0) 421; GFX10-NEXT: v_mov_b32_e32 v0, s6 422; GFX10-NEXT: v_mov_b32_e32 v1, s7 423; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 424; GFX10-NEXT: s_endpgm 425; 426; GFX1030-LABEL: raw_buffer_atomic_max_noret_f32: 427; GFX1030: ; %bb.0: ; %main_body 428; GFX1030-NEXT: s_clause 0x1 429; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 430; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 431; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 432; GFX1030-NEXT: v_mov_b32_e32 v0, s6 433; GFX1030-NEXT: v_mov_b32_e32 v1, s7 434; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 435; GFX1030-NEXT: s_endpgm 436; 437; GFX1100-LABEL: raw_buffer_atomic_max_noret_f32: 438; GFX1100: ; %bb.0: ; %main_body 439; GFX1100-NEXT: s_clause 0x1 440; GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 441; GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 442; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 443; GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 444; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen 445; GFX1100-NEXT: s_endpgm 446; 447; GFX12-LABEL: raw_buffer_atomic_max_noret_f32: 448; GFX12: ; %bb.0: ; %main_body 449; GFX12-NEXT: s_clause 0x1 450; GFX12-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 451; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 452; GFX12-NEXT: s_wait_kmcnt 0x0 453; GFX12-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 454; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen 455; GFX12-NEXT: s_endpgm 456; 457; G_SI-LABEL: raw_buffer_atomic_max_noret_f32: 458; G_SI: ; %bb.0: ; %main_body 459; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 460; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 461; G_SI-NEXT: s_waitcnt lgkmcnt(0) 462; G_SI-NEXT: v_mov_b32_e32 v0, s6 463; G_SI-NEXT: v_mov_b32_e32 v1, s7 464; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 465; G_SI-NEXT: s_endpgm 466; 467; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f32: 468; G_GFX7: ; %bb.0: ; %main_body 469; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 470; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 471; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 472; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 473; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 474; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 475; G_GFX7-NEXT: s_endpgm 476; 477; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f32: 478; G_GFX10: ; %bb.0: ; %main_body 479; G_GFX10-NEXT: s_clause 0x1 480; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 481; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 482; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 483; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 484; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 485; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 486; G_GFX10-NEXT: s_endpgm 487; 488; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f32: 489; G_GFX1030: ; %bb.0: ; %main_body 490; G_GFX1030-NEXT: s_clause 0x1 491; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 492; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 493; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 494; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 495; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 496; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 497; G_GFX1030-NEXT: s_endpgm 498; 499; G_GFX1100-LABEL: raw_buffer_atomic_max_noret_f32: 500; G_GFX1100: ; %bb.0: ; %main_body 501; G_GFX1100-NEXT: s_clause 0x1 502; G_GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 503; G_GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 504; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 505; G_GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 506; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen 507; G_GFX1100-NEXT: s_endpgm 508main_body: 509 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 510 ret void 511} 512 513define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) { 514; SI-LABEL: raw_buffer_atomic_max_rtn_f32: 515; SI: ; %bb.0: ; %main_body 516; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 517; SI-NEXT: s_mov_b32 s3, 0xf000 518; SI-NEXT: s_mov_b32 s2, -1 519; SI-NEXT: s_waitcnt vmcnt(0) 520; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 521; SI-NEXT: s_endpgm 522; 523; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32: 524; GFX7: ; %bb.0: ; %main_body 525; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 526; GFX7-NEXT: s_mov_b32 s3, 0xf000 527; GFX7-NEXT: s_mov_b32 s2, -1 528; GFX7-NEXT: s_waitcnt vmcnt(0) 529; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 530; GFX7-NEXT: s_endpgm 531; 532; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32: 533; GFX10: ; %bb.0: ; %main_body 534; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 535; GFX10-NEXT: s_waitcnt vmcnt(0) 536; GFX10-NEXT: global_store_dword v[0:1], v0, off 537; GFX10-NEXT: s_endpgm 538; 539; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32: 540; GFX1030: ; %bb.0: ; %main_body 541; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 542; GFX1030-NEXT: s_waitcnt vmcnt(0) 543; GFX1030-NEXT: global_store_dword v[0:1], v0, off 544; GFX1030-NEXT: s_endpgm 545; 546; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32: 547; GFX1100: ; %bb.0: ; %main_body 548; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc 549; GFX1100-NEXT: s_waitcnt vmcnt(0) 550; GFX1100-NEXT: global_store_b32 v[0:1], v0, off 551; GFX1100-NEXT: s_endpgm 552; 553; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32: 554; GFX12: ; %bb.0: ; %main_body 555; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN 556; GFX12-NEXT: s_wait_loadcnt 0x0 557; GFX12-NEXT: global_store_b32 v[0:1], v0, off 558; GFX12-NEXT: s_endpgm 559; 560; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32: 561; G_SI: ; %bb.0: ; %main_body 562; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 563; G_SI-NEXT: s_mov_b32 s2, -1 564; G_SI-NEXT: s_mov_b32 s3, 0xf000 565; G_SI-NEXT: s_waitcnt vmcnt(0) 566; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 567; G_SI-NEXT: s_endpgm 568; 569; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32: 570; G_GFX7: ; %bb.0: ; %main_body 571; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 572; G_GFX7-NEXT: s_mov_b32 s2, -1 573; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 574; G_GFX7-NEXT: s_waitcnt vmcnt(0) 575; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 576; G_GFX7-NEXT: s_endpgm 577; 578; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32: 579; G_GFX10: ; %bb.0: ; %main_body 580; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 581; G_GFX10-NEXT: s_waitcnt vmcnt(0) 582; G_GFX10-NEXT: global_store_dword v[0:1], v0, off 583; G_GFX10-NEXT: s_endpgm 584; 585; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32: 586; G_GFX1030: ; %bb.0: ; %main_body 587; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 588; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 589; G_GFX1030-NEXT: global_store_dword v[0:1], v0, off 590; G_GFX1030-NEXT: s_endpgm 591; 592; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32: 593; G_GFX1100: ; %bb.0: ; %main_body 594; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc 595; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 596; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off 597; G_GFX1100-NEXT: s_endpgm 598main_body: 599 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 600 store float %ret, ptr addrspace(1) undef 601 ret void 602} 603 604define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, ptr addrspace(1) %out) { 605; SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 606; SI: ; %bb.0: ; %main_body 607; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 608; SI-NEXT: s_waitcnt lgkmcnt(0) 609; SI-NEXT: v_mov_b32_e32 v0, s4 610; SI-NEXT: v_mov_b32_e32 v1, s5 611; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 612; SI-NEXT: s_mov_b32 s3, 0xf000 613; SI-NEXT: s_mov_b32 s2, -1 614; SI-NEXT: s_mov_b32 s0, s6 615; SI-NEXT: s_mov_b32 s1, s7 616; SI-NEXT: s_waitcnt vmcnt(0) 617; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 618; SI-NEXT: s_endpgm 619; 620; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 621; GFX7: ; %bb.0: ; %main_body 622; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 623; GFX7-NEXT: s_waitcnt lgkmcnt(0) 624; GFX7-NEXT: v_mov_b32_e32 v0, s4 625; GFX7-NEXT: v_mov_b32_e32 v1, s5 626; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 627; GFX7-NEXT: s_mov_b32 s3, 0xf000 628; GFX7-NEXT: s_mov_b32 s2, -1 629; GFX7-NEXT: s_mov_b32 s0, s6 630; GFX7-NEXT: s_mov_b32 s1, s7 631; GFX7-NEXT: s_waitcnt vmcnt(0) 632; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 633; GFX7-NEXT: s_endpgm 634; 635; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 636; GFX10: ; %bb.0: ; %main_body 637; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 638; GFX10-NEXT: s_waitcnt lgkmcnt(0) 639; GFX10-NEXT: v_mov_b32_e32 v0, s12 640; GFX10-NEXT: v_mov_b32_e32 v1, s13 641; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc 642; GFX10-NEXT: v_mov_b32_e32 v1, 0 643; GFX10-NEXT: s_waitcnt vmcnt(0) 644; GFX10-NEXT: global_store_dword v1, v0, s[14:15] 645; GFX10-NEXT: s_endpgm 646; 647; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 648; GFX1030: ; %bb.0: ; %main_body 649; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 650; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 651; GFX1030-NEXT: v_mov_b32_e32 v0, s4 652; GFX1030-NEXT: v_mov_b32_e32 v1, s5 653; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 654; GFX1030-NEXT: v_mov_b32_e32 v1, 0 655; GFX1030-NEXT: s_waitcnt vmcnt(0) 656; GFX1030-NEXT: global_store_dword v1, v0, s[6:7] 657; GFX1030-NEXT: s_endpgm 658; 659; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 660; GFX1100: ; %bb.0: ; %main_body 661; GFX1100-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 662; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 663; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 664; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc 665; GFX1100-NEXT: v_mov_b32_e32 v1, 0 666; GFX1100-NEXT: s_waitcnt vmcnt(0) 667; GFX1100-NEXT: global_store_b32 v1, v0, s[6:7] 668; GFX1100-NEXT: s_endpgm 669; 670; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 671; GFX12: ; %bb.0: ; %main_body 672; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 673; GFX12-NEXT: s_wait_kmcnt 0x0 674; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 675; GFX12-NEXT: s_mov_b32 s4, 4 676; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN 677; GFX12-NEXT: v_mov_b32_e32 v1, 0 678; GFX12-NEXT: s_wait_loadcnt 0x0 679; GFX12-NEXT: global_store_b32 v1, v0, s[6:7] 680; GFX12-NEXT: s_endpgm 681; 682; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 683; G_SI: ; %bb.0: ; %main_body 684; G_SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 685; G_SI-NEXT: s_waitcnt lgkmcnt(0) 686; G_SI-NEXT: v_mov_b32_e32 v0, s4 687; G_SI-NEXT: v_mov_b32_e32 v1, s5 688; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 689; G_SI-NEXT: s_mov_b32 s2, -1 690; G_SI-NEXT: s_mov_b32 s3, 0xf000 691; G_SI-NEXT: s_mov_b64 s[0:1], s[6:7] 692; G_SI-NEXT: s_waitcnt vmcnt(0) 693; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 694; G_SI-NEXT: s_endpgm 695; 696; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 697; G_GFX7: ; %bb.0: ; %main_body 698; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 699; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 700; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 701; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 702; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 703; G_GFX7-NEXT: s_mov_b32 s2, -1 704; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 705; G_GFX7-NEXT: s_mov_b64 s[0:1], s[6:7] 706; G_GFX7-NEXT: s_waitcnt vmcnt(0) 707; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 708; G_GFX7-NEXT: s_endpgm 709; 710; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 711; G_GFX10: ; %bb.0: ; %main_body 712; G_GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 713; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 714; G_GFX10-NEXT: v_mov_b32_e32 v0, s12 715; G_GFX10-NEXT: v_mov_b32_e32 v1, s13 716; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc 717; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 718; G_GFX10-NEXT: s_waitcnt vmcnt(0) 719; G_GFX10-NEXT: global_store_dword v1, v0, s[14:15] 720; G_GFX10-NEXT: s_endpgm 721; 722; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 723; G_GFX1030: ; %bb.0: ; %main_body 724; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 725; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 726; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 727; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 728; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 729; G_GFX1030-NEXT: v_mov_b32_e32 v1, 0 730; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 731; G_GFX1030-NEXT: global_store_dword v1, v0, s[6:7] 732; G_GFX1030-NEXT: s_endpgm 733; 734; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: 735; G_GFX1100: ; %bb.0: ; %main_body 736; G_GFX1100-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 737; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 738; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 739; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc 740; G_GFX1100-NEXT: v_mov_b32_e32 v1, 0 741; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 742; G_GFX1100-NEXT: global_store_b32 v1, v0, s[6:7] 743; G_GFX1100-NEXT: s_endpgm 744main_body: 745 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 746 store float %ret, ptr addrspace(1) %out, align 8 747 ret void 748} 749