1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI 3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=GFX7 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030 6 7; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI 8; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7 9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10 10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030 11 12declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg) 13declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg) 14 15 16define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 17; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 18; SI: ; %bb.0: ; %main_body 19; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 20; SI-NEXT: s_load_dword s8, s[4:5], 0xf 21; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 22; SI-NEXT: s_waitcnt lgkmcnt(0) 23; SI-NEXT: v_mov_b32_e32 v0, s6 24; SI-NEXT: v_mov_b32_e32 v1, s7 25; SI-NEXT: v_mov_b32_e32 v2, s8 26; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 27; SI-NEXT: s_endpgm 28; 29; GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 30; GFX7: ; %bb.0: ; %main_body 31; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 32; GFX7-NEXT: s_load_dword s8, s[4:5], 0xf 33; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 34; GFX7-NEXT: s_waitcnt lgkmcnt(0) 35; GFX7-NEXT: v_mov_b32_e32 v0, s6 36; GFX7-NEXT: v_mov_b32_e32 v1, s7 37; GFX7-NEXT: v_mov_b32_e32 v2, s8 38; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 39; GFX7-NEXT: s_endpgm 40; 41; GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 42; GFX10: ; %bb.0: ; %main_body 43; GFX10-NEXT: s_clause 0x2 44; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 45; GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c 46; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 47; GFX10-NEXT: s_waitcnt lgkmcnt(0) 48; GFX10-NEXT: v_mov_b32_e32 v0, s6 49; GFX10-NEXT: v_mov_b32_e32 v1, s7 50; GFX10-NEXT: v_mov_b32_e32 v2, s8 51; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 52; GFX10-NEXT: s_endpgm 53; 54; GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 55; GFX1030: ; %bb.0: ; %main_body 56; GFX1030-NEXT: s_clause 0x2 57; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 58; GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c 59; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 60; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 61; GFX1030-NEXT: v_mov_b32_e32 v0, s6 62; GFX1030-NEXT: v_mov_b32_e32 v1, s7 63; GFX1030-NEXT: v_mov_b32_e32 v2, s8 64; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 65; GFX1030-NEXT: s_endpgm 66; 67; G_SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 68; G_SI: ; %bb.0: ; %main_body 69; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 70; G_SI-NEXT: s_load_dword s8, s[4:5], 0xf 71; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 72; G_SI-NEXT: s_waitcnt lgkmcnt(0) 73; G_SI-NEXT: v_mov_b32_e32 v0, s6 74; G_SI-NEXT: v_mov_b32_e32 v1, s7 75; G_SI-NEXT: v_mov_b32_e32 v2, s8 76; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 77; G_SI-NEXT: s_endpgm 78; 79; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 80; G_GFX7: ; %bb.0: ; %main_body 81; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 82; G_GFX7-NEXT: s_load_dword s8, s[4:5], 0xf 83; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 84; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 85; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 86; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 87; G_GFX7-NEXT: v_mov_b32_e32 v2, s8 88; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 89; G_GFX7-NEXT: s_endpgm 90; 91; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 92; G_GFX10: ; %bb.0: ; %main_body 93; G_GFX10-NEXT: s_clause 0x2 94; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 95; G_GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c 96; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 97; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 98; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 99; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 100; G_GFX10-NEXT: v_mov_b32_e32 v2, s8 101; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 102; G_GFX10-NEXT: s_endpgm 103; 104; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 105; G_GFX1030: ; %bb.0: ; %main_body 106; G_GFX1030-NEXT: s_clause 0x2 107; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 108; G_GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c 109; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 110; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 111; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 112; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 113; G_GFX1030-NEXT: v_mov_b32_e32 v2, s8 114; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen 115; G_GFX1030-NEXT: s_endpgm 116main_body: 117 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 118 ret void 119} 120 121define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 122; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 123; SI: ; %bb.0: ; %main_body 124; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 125; SI-NEXT: s_mov_b32 m0, -1 126; SI-NEXT: s_waitcnt vmcnt(0) 127; SI-NEXT: ds_write_b64 v0, v[0:1] 128; SI-NEXT: s_endpgm 129; 130; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 131; GFX7: ; %bb.0: ; %main_body 132; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 133; GFX7-NEXT: s_mov_b32 m0, -1 134; GFX7-NEXT: s_waitcnt vmcnt(0) 135; GFX7-NEXT: ds_write_b64 v0, v[0:1] 136; GFX7-NEXT: s_endpgm 137; 138; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 139; GFX10: ; %bb.0: ; %main_body 140; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 141; GFX10-NEXT: s_waitcnt vmcnt(0) 142; GFX10-NEXT: ds_write_b64 v0, v[0:1] 143; GFX10-NEXT: s_endpgm 144; 145; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 146; GFX1030: ; %bb.0: ; %main_body 147; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 148; GFX1030-NEXT: s_waitcnt vmcnt(0) 149; GFX1030-NEXT: ds_write_b64 v0, v[0:1] 150; GFX1030-NEXT: s_endpgm 151; 152; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 153; G_SI: ; %bb.0: ; %main_body 154; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 155; G_SI-NEXT: s_mov_b32 m0, -1 156; G_SI-NEXT: s_waitcnt vmcnt(0) 157; G_SI-NEXT: ds_write_b64 v0, v[0:1] 158; G_SI-NEXT: s_endpgm 159; 160; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 161; G_GFX7: ; %bb.0: ; %main_body 162; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 163; G_GFX7-NEXT: s_mov_b32 m0, -1 164; G_GFX7-NEXT: s_waitcnt vmcnt(0) 165; G_GFX7-NEXT: ds_write_b64 v0, v[0:1] 166; G_GFX7-NEXT: s_endpgm 167; 168; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 169; G_GFX10: ; %bb.0: ; %main_body 170; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 171; G_GFX10-NEXT: s_waitcnt vmcnt(0) 172; G_GFX10-NEXT: ds_write_b64 v0, v[0:1] 173; G_GFX10-NEXT: s_endpgm 174; 175; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 176; G_GFX1030: ; %bb.0: ; %main_body 177; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc 178; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 179; G_GFX1030-NEXT: ds_write_b64 v0, v[0:1] 180; G_GFX1030-NEXT: s_endpgm 181main_body: 182 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 183 store double %ret, ptr addrspace(3) undef 184 ret void 185} 186 187define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) { 188; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 189; SI: ; %bb.0: ; %main_body 190; SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 191; SI-NEXT: s_mov_b32 m0, -1 192; SI-NEXT: s_waitcnt vmcnt(0) 193; SI-NEXT: ds_write_b64 v3, v[0:1] 194; SI-NEXT: s_endpgm 195; 196; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 197; GFX7: ; %bb.0: ; %main_body 198; GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 199; GFX7-NEXT: s_mov_b32 m0, -1 200; GFX7-NEXT: s_waitcnt vmcnt(0) 201; GFX7-NEXT: ds_write_b64 v3, v[0:1] 202; GFX7-NEXT: s_endpgm 203; 204; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 205; GFX10: ; %bb.0: ; %main_body 206; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 207; GFX10-NEXT: s_waitcnt vmcnt(0) 208; GFX10-NEXT: ds_write_b64 v3, v[0:1] 209; GFX10-NEXT: s_endpgm 210; 211; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 212; GFX1030: ; %bb.0: ; %main_body 213; GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 214; GFX1030-NEXT: s_waitcnt vmcnt(0) 215; GFX1030-NEXT: ds_write_b64 v3, v[0:1] 216; GFX1030-NEXT: s_endpgm 217; 218; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 219; G_SI: ; %bb.0: ; %main_body 220; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 221; G_SI-NEXT: s_mov_b32 m0, -1 222; G_SI-NEXT: s_waitcnt vmcnt(0) 223; G_SI-NEXT: ds_write_b64 v3, v[0:1] 224; G_SI-NEXT: s_endpgm 225; 226; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 227; G_GFX7: ; %bb.0: ; %main_body 228; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 229; G_GFX7-NEXT: s_mov_b32 m0, -1 230; G_GFX7-NEXT: s_waitcnt vmcnt(0) 231; G_GFX7-NEXT: ds_write_b64 v3, v[0:1] 232; G_GFX7-NEXT: s_endpgm 233; 234; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 235; G_GFX10: ; %bb.0: ; %main_body 236; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 237; G_GFX10-NEXT: s_waitcnt vmcnt(0) 238; G_GFX10-NEXT: ds_write_b64 v3, v[0:1] 239; G_GFX10-NEXT: s_endpgm 240; 241; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 242; G_GFX1030: ; %bb.0: ; %main_body 243; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc 244; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 245; G_GFX1030-NEXT: ds_write_b64 v3, v[0:1] 246; G_GFX1030-NEXT: s_endpgm 247main_body: 248 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 249 store double %ret, ptr addrspace(3) %out, align 8 250 ret void 251} 252 253define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 254; SI-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 255; SI: ; %bb.0: ; %main_body 256; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 257; SI-NEXT: s_load_dword s8, s[4:5], 0xf 258; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 259; SI-NEXT: s_waitcnt lgkmcnt(0) 260; SI-NEXT: v_mov_b32_e32 v0, s6 261; SI-NEXT: v_mov_b32_e32 v1, s7 262; SI-NEXT: v_mov_b32_e32 v2, s8 263; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 264; SI-NEXT: s_endpgm 265; 266; GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 267; GFX7: ; %bb.0: ; %main_body 268; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 269; GFX7-NEXT: s_load_dword s8, s[4:5], 0xf 270; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 271; GFX7-NEXT: s_waitcnt lgkmcnt(0) 272; GFX7-NEXT: v_mov_b32_e32 v0, s6 273; GFX7-NEXT: v_mov_b32_e32 v1, s7 274; GFX7-NEXT: v_mov_b32_e32 v2, s8 275; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 276; GFX7-NEXT: s_endpgm 277; 278; GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 279; GFX10: ; %bb.0: ; %main_body 280; GFX10-NEXT: s_clause 0x2 281; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 282; GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c 283; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 284; GFX10-NEXT: s_waitcnt lgkmcnt(0) 285; GFX10-NEXT: v_mov_b32_e32 v0, s6 286; GFX10-NEXT: v_mov_b32_e32 v1, s7 287; GFX10-NEXT: v_mov_b32_e32 v2, s8 288; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 289; GFX10-NEXT: s_endpgm 290; 291; GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 292; GFX1030: ; %bb.0: ; %main_body 293; GFX1030-NEXT: s_clause 0x2 294; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 295; GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c 296; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 297; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 298; GFX1030-NEXT: v_mov_b32_e32 v0, s6 299; GFX1030-NEXT: v_mov_b32_e32 v1, s7 300; GFX1030-NEXT: v_mov_b32_e32 v2, s8 301; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 302; GFX1030-NEXT: s_endpgm 303; 304; G_SI-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 305; G_SI: ; %bb.0: ; %main_body 306; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 307; G_SI-NEXT: s_load_dword s8, s[4:5], 0xf 308; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 309; G_SI-NEXT: s_waitcnt lgkmcnt(0) 310; G_SI-NEXT: v_mov_b32_e32 v0, s6 311; G_SI-NEXT: v_mov_b32_e32 v1, s7 312; G_SI-NEXT: v_mov_b32_e32 v2, s8 313; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 314; G_SI-NEXT: s_endpgm 315; 316; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 317; G_GFX7: ; %bb.0: ; %main_body 318; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 319; G_GFX7-NEXT: s_load_dword s8, s[4:5], 0xf 320; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 321; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 322; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 323; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 324; G_GFX7-NEXT: v_mov_b32_e32 v2, s8 325; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 326; G_GFX7-NEXT: s_endpgm 327; 328; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 329; G_GFX10: ; %bb.0: ; %main_body 330; G_GFX10-NEXT: s_clause 0x2 331; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 332; G_GFX10-NEXT: s_load_dword s8, s[4:5], 0x3c 333; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 334; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 335; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 336; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 337; G_GFX10-NEXT: v_mov_b32_e32 v2, s8 338; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 339; G_GFX10-NEXT: s_endpgm 340; 341; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 342; G_GFX1030: ; %bb.0: ; %main_body 343; G_GFX1030-NEXT: s_clause 0x2 344; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 345; G_GFX1030-NEXT: s_load_dword s8, s[4:5], 0x3c 346; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 347; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 348; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 349; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 350; G_GFX1030-NEXT: v_mov_b32_e32 v2, s8 351; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen 352; G_GFX1030-NEXT: s_endpgm 353main_body: 354 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 355 ret void 356} 357 358define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 359; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 360; SI: ; %bb.0: ; %main_body 361; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 362; SI-NEXT: s_mov_b32 m0, -1 363; SI-NEXT: s_waitcnt vmcnt(0) 364; SI-NEXT: ds_write_b64 v0, v[0:1] 365; SI-NEXT: s_endpgm 366; 367; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 368; GFX7: ; %bb.0: ; %main_body 369; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 370; GFX7-NEXT: s_mov_b32 m0, -1 371; GFX7-NEXT: s_waitcnt vmcnt(0) 372; GFX7-NEXT: ds_write_b64 v0, v[0:1] 373; GFX7-NEXT: s_endpgm 374; 375; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 376; GFX10: ; %bb.0: ; %main_body 377; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 378; GFX10-NEXT: s_waitcnt vmcnt(0) 379; GFX10-NEXT: ds_write_b64 v0, v[0:1] 380; GFX10-NEXT: s_endpgm 381; 382; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 383; GFX1030: ; %bb.0: ; %main_body 384; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 385; GFX1030-NEXT: s_waitcnt vmcnt(0) 386; GFX1030-NEXT: ds_write_b64 v0, v[0:1] 387; GFX1030-NEXT: s_endpgm 388; 389; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 390; G_SI: ; %bb.0: ; %main_body 391; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 392; G_SI-NEXT: s_mov_b32 m0, -1 393; G_SI-NEXT: s_waitcnt vmcnt(0) 394; G_SI-NEXT: ds_write_b64 v0, v[0:1] 395; G_SI-NEXT: s_endpgm 396; 397; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 398; G_GFX7: ; %bb.0: ; %main_body 399; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 400; G_GFX7-NEXT: s_mov_b32 m0, -1 401; G_GFX7-NEXT: s_waitcnt vmcnt(0) 402; G_GFX7-NEXT: ds_write_b64 v0, v[0:1] 403; G_GFX7-NEXT: s_endpgm 404; 405; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 406; G_GFX10: ; %bb.0: ; %main_body 407; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 408; G_GFX10-NEXT: s_waitcnt vmcnt(0) 409; G_GFX10-NEXT: ds_write_b64 v0, v[0:1] 410; G_GFX10-NEXT: s_endpgm 411; 412; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 413; G_GFX1030: ; %bb.0: ; %main_body 414; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc 415; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 416; G_GFX1030-NEXT: ds_write_b64 v0, v[0:1] 417; G_GFX1030-NEXT: s_endpgm 418main_body: 419 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 420 store double %ret, ptr addrspace(3) undef 421 ret void 422} 423 424define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) { 425; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 426; SI: ; %bb.0: ; %main_body 427; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 428; SI-NEXT: s_mov_b32 m0, -1 429; SI-NEXT: s_waitcnt lgkmcnt(0) 430; SI-NEXT: v_mov_b32_e32 v0, s4 431; SI-NEXT: v_mov_b32_e32 v1, s5 432; SI-NEXT: v_mov_b32_e32 v2, s6 433; SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc 434; SI-NEXT: v_mov_b32_e32 v2, s7 435; SI-NEXT: s_waitcnt vmcnt(0) 436; SI-NEXT: ds_write_b64 v2, v[0:1] 437; SI-NEXT: s_endpgm 438; 439; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 440; GFX7: ; %bb.0: ; %main_body 441; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 442; GFX7-NEXT: s_mov_b32 m0, -1 443; GFX7-NEXT: s_waitcnt lgkmcnt(0) 444; GFX7-NEXT: v_mov_b32_e32 v0, s4 445; GFX7-NEXT: v_mov_b32_e32 v1, s5 446; GFX7-NEXT: v_mov_b32_e32 v2, s6 447; GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc 448; GFX7-NEXT: v_mov_b32_e32 v2, s7 449; GFX7-NEXT: s_waitcnt vmcnt(0) 450; GFX7-NEXT: ds_write_b64 v2, v[0:1] 451; GFX7-NEXT: s_endpgm 452; 453; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 454; GFX10: ; %bb.0: ; %main_body 455; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 456; GFX10-NEXT: s_waitcnt lgkmcnt(0) 457; GFX10-NEXT: v_mov_b32_e32 v0, s12 458; GFX10-NEXT: v_mov_b32_e32 v1, s13 459; GFX10-NEXT: v_mov_b32_e32 v2, s14 460; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[8:11], 4 offen glc slc 461; GFX10-NEXT: v_mov_b32_e32 v2, s15 462; GFX10-NEXT: s_waitcnt vmcnt(0) 463; GFX10-NEXT: ds_write_b64 v2, v[0:1] 464; GFX10-NEXT: s_endpgm 465; 466; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 467; GFX1030: ; %bb.0: ; %main_body 468; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 469; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 470; GFX1030-NEXT: v_mov_b32_e32 v0, s4 471; GFX1030-NEXT: v_mov_b32_e32 v1, s5 472; GFX1030-NEXT: v_mov_b32_e32 v2, s6 473; GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc 474; GFX1030-NEXT: v_mov_b32_e32 v2, s7 475; GFX1030-NEXT: s_waitcnt vmcnt(0) 476; GFX1030-NEXT: ds_write_b64 v2, v[0:1] 477; GFX1030-NEXT: s_endpgm 478; 479; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 480; G_SI: ; %bb.0: ; %main_body 481; G_SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 482; G_SI-NEXT: s_mov_b32 m0, -1 483; G_SI-NEXT: s_waitcnt lgkmcnt(0) 484; G_SI-NEXT: v_mov_b32_e32 v0, s4 485; G_SI-NEXT: v_mov_b32_e32 v1, s5 486; G_SI-NEXT: v_mov_b32_e32 v2, s6 487; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc 488; G_SI-NEXT: v_mov_b32_e32 v2, s7 489; G_SI-NEXT: s_waitcnt vmcnt(0) 490; G_SI-NEXT: ds_write_b64 v2, v[0:1] 491; G_SI-NEXT: s_endpgm 492; 493; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 494; G_GFX7: ; %bb.0: ; %main_body 495; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 496; G_GFX7-NEXT: s_mov_b32 m0, -1 497; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 498; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 499; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 500; G_GFX7-NEXT: v_mov_b32_e32 v2, s6 501; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc 502; G_GFX7-NEXT: v_mov_b32_e32 v2, s7 503; G_GFX7-NEXT: s_waitcnt vmcnt(0) 504; G_GFX7-NEXT: ds_write_b64 v2, v[0:1] 505; G_GFX7-NEXT: s_endpgm 506; 507; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 508; G_GFX10: ; %bb.0: ; %main_body 509; G_GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 510; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 511; G_GFX10-NEXT: v_mov_b32_e32 v0, s12 512; G_GFX10-NEXT: v_mov_b32_e32 v1, s13 513; G_GFX10-NEXT: v_mov_b32_e32 v2, s14 514; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[8:11], 4 offen glc slc 515; G_GFX10-NEXT: v_mov_b32_e32 v2, s15 516; G_GFX10-NEXT: s_waitcnt vmcnt(0) 517; G_GFX10-NEXT: ds_write_b64 v2, v[0:1] 518; G_GFX10-NEXT: s_endpgm 519; 520; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 521; G_GFX1030: ; %bb.0: ; %main_body 522; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 523; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 524; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 525; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 526; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6 527; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc 528; G_GFX1030-NEXT: v_mov_b32_e32 v2, s7 529; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 530; G_GFX1030-NEXT: ds_write_b64 v2, v[0:1] 531; G_GFX1030-NEXT: s_endpgm 532main_body: 533 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 534 store double %ret, ptr addrspace(3) %out, align 8 535 ret void 536} 537