1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX940 4 5declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 6declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) 7declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg) 8declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg) 9declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 10declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) 11declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg) 12declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg) 13declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 14declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) 15declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) 16declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg) 17declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1) 18 19define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { 20; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64: 21; GFX90A: ; %bb.0: ; %main_body 22; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 23; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 24; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 25; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 26; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 27; GFX90A-NEXT: v_mov_b32_e32 v2, s8 28; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen 29; GFX90A-NEXT: s_endpgm 30; 31; GFX940-LABEL: raw_buffer_atomic_add_noret_f64: 32; GFX940: ; %bb.0: ; %main_body 33; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 34; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 35; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 36; GFX940-NEXT: s_waitcnt lgkmcnt(0) 37; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 38; GFX940-NEXT: v_mov_b32_e32 v2, s8 39; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen 40; GFX940-NEXT: s_endpgm 41main_body: 42 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 43 ret void 44} 45 46define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 47; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64: 48; GFX90A: ; %bb.0: ; %main_body 49; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc 50; GFX90A-NEXT: s_waitcnt vmcnt(0) 51; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 52; GFX90A-NEXT: s_endpgm 53; 54; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64: 55; GFX940: ; %bb.0: ; %main_body 56; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 57; GFX940-NEXT: s_waitcnt vmcnt(0) 58; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 59; GFX940-NEXT: s_endpgm 60main_body: 61 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 62 store double %ret, ptr undef 63 ret void 64} 65 66define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 67; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: 68; GFX90A: ; %bb.0: ; %main_body 69; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 70; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 71; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 72; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 73; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 74; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 75; GFX90A-NEXT: v_mov_b32_e32 v2, s10 76; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen glc slc 77; GFX90A-NEXT: v_mov_b32_e32 v2, 0 78; GFX90A-NEXT: s_waitcnt vmcnt(0) 79; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 80; GFX90A-NEXT: s_endpgm 81; 82; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: 83; GFX940: ; %bb.0: ; %main_body 84; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 85; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 86; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 87; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 88; GFX940-NEXT: s_waitcnt lgkmcnt(0) 89; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 90; GFX940-NEXT: v_mov_b32_e32 v2, s10 91; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt 92; GFX940-NEXT: v_mov_b32_e32 v2, 0 93; GFX940-NEXT: s_waitcnt vmcnt(0) 94; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 95; GFX940-NEXT: s_endpgm 96main_body: 97 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 98 store double %ret, ptr addrspace(1) %out, align 8 99 ret void 100} 101 102define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) { 103; GFX90A-LABEL: raw_ptr_buffer_atomic_add_noret_f64: 104; GFX90A: ; %bb.0: ; %main_body 105; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 106; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 107; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 108; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 109; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 110; GFX90A-NEXT: v_mov_b32_e32 v2, s8 111; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen 112; GFX90A-NEXT: s_endpgm 113; 114; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64: 115; GFX940: ; %bb.0: ; %main_body 116; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 117; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 118; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 119; GFX940-NEXT: s_waitcnt lgkmcnt(0) 120; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 121; GFX940-NEXT: v_mov_b32_e32 v2, s8 122; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen 123; GFX940-NEXT: s_endpgm 124main_body: 125 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 126 ret void 127} 128 129define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 130; GFX90A-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: 131; GFX90A: ; %bb.0: ; %main_body 132; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc 133; GFX90A-NEXT: s_waitcnt vmcnt(0) 134; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 135; GFX90A-NEXT: s_endpgm 136; 137; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64: 138; GFX940: ; %bb.0: ; %main_body 139; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0 140; GFX940-NEXT: s_waitcnt vmcnt(0) 141; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 142; GFX940-NEXT: s_endpgm 143main_body: 144 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 145 store double %ret, ptr undef 146 ret void 147} 148 149define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 150; GFX90A-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: 151; GFX90A: ; %bb.0: ; %main_body 152; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 153; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 154; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 155; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 156; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 157; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 158; GFX90A-NEXT: v_mov_b32_e32 v2, s10 159; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen glc slc 160; GFX90A-NEXT: v_mov_b32_e32 v2, 0 161; GFX90A-NEXT: s_waitcnt vmcnt(0) 162; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 163; GFX90A-NEXT: s_endpgm 164; 165; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc: 166; GFX940: ; %bb.0: ; %main_body 167; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 168; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 169; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 170; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 171; GFX940-NEXT: s_waitcnt lgkmcnt(0) 172; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 173; GFX940-NEXT: v_mov_b32_e32 v2, s10 174; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt 175; GFX940-NEXT: v_mov_b32_e32 v2, 0 176; GFX940-NEXT: s_waitcnt vmcnt(0) 177; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 178; GFX940-NEXT: s_endpgm 179main_body: 180 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 181 store double %ret, ptr addrspace(1) %out, align 8 182 ret void 183} 184 185define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { 186; GFX90A-LABEL: struct_buffer_atomic_add_noret_f64: 187; GFX90A: ; %bb.0: ; %main_body 188; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 189; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 190; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 191; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 192; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 193; GFX90A-NEXT: v_mov_b32_e32 v2, s8 194; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen 195; GFX90A-NEXT: s_endpgm 196; 197; GFX940-LABEL: struct_buffer_atomic_add_noret_f64: 198; GFX940: ; %bb.0: ; %main_body 199; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 200; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 201; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 202; GFX940-NEXT: s_waitcnt lgkmcnt(0) 203; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 204; GFX940-NEXT: v_mov_b32_e32 v2, s8 205; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen 206; GFX940-NEXT: s_endpgm 207main_body: 208 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 209 ret void 210} 211 212define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 213; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64: 214; GFX90A: ; %bb.0: ; %main_body 215; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc 216; GFX90A-NEXT: s_waitcnt vmcnt(0) 217; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 218; GFX90A-NEXT: s_endpgm 219; 220; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64: 221; GFX940: ; %bb.0: ; %main_body 222; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 223; GFX940-NEXT: s_waitcnt vmcnt(0) 224; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 225; GFX940-NEXT: s_endpgm 226main_body: 227 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 228 store double %ret, ptr undef 229 ret void 230} 231 232define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 233; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: 234; GFX90A: ; %bb.0: ; %main_body 235; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 236; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 237; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 238; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 239; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 240; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 241; GFX90A-NEXT: v_mov_b32_e32 v2, s10 242; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc 243; GFX90A-NEXT: v_mov_b32_e32 v2, 0 244; GFX90A-NEXT: s_waitcnt vmcnt(0) 245; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 246; GFX90A-NEXT: s_endpgm 247; 248; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: 249; GFX940: ; %bb.0: ; %main_body 250; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 251; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 252; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 253; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 254; GFX940-NEXT: s_waitcnt lgkmcnt(0) 255; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 256; GFX940-NEXT: v_mov_b32_e32 v2, s10 257; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt 258; GFX940-NEXT: v_mov_b32_e32 v2, 0 259; GFX940-NEXT: s_waitcnt vmcnt(0) 260; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 261; GFX940-NEXT: s_endpgm 262main_body: 263 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 264 store double %ret, ptr addrspace(1) %out, align 8 265 ret void 266} 267 268define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) { 269; GFX90A-LABEL: struct_ptr_buffer_atomic_add_noret_f64: 270; GFX90A: ; %bb.0: ; %main_body 271; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 272; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 273; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 274; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 275; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 276; GFX90A-NEXT: v_mov_b32_e32 v2, s8 277; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen 278; GFX90A-NEXT: s_endpgm 279; 280; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64: 281; GFX940: ; %bb.0: ; %main_body 282; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 283; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 284; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 285; GFX940-NEXT: s_waitcnt lgkmcnt(0) 286; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 287; GFX940-NEXT: v_mov_b32_e32 v2, s8 288; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen 289; GFX940-NEXT: s_endpgm 290main_body: 291 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 292 ret void 293} 294 295define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 296; GFX90A-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: 297; GFX90A: ; %bb.0: ; %main_body 298; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc 299; GFX90A-NEXT: s_waitcnt vmcnt(0) 300; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 301; GFX90A-NEXT: s_endpgm 302; 303; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64: 304; GFX940: ; %bb.0: ; %main_body 305; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0 306; GFX940-NEXT: s_waitcnt vmcnt(0) 307; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 308; GFX940-NEXT: s_endpgm 309main_body: 310 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 311 store double %ret, ptr undef 312 ret void 313} 314 315define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 316; GFX90A-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: 317; GFX90A: ; %bb.0: ; %main_body 318; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 319; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 320; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 321; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 322; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 323; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 324; GFX90A-NEXT: v_mov_b32_e32 v2, s10 325; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc 326; GFX90A-NEXT: v_mov_b32_e32 v2, 0 327; GFX90A-NEXT: s_waitcnt vmcnt(0) 328; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 329; GFX90A-NEXT: s_endpgm 330; 331; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc: 332; GFX940: ; %bb.0: ; %main_body 333; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 334; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 335; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 336; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 337; GFX940-NEXT: s_waitcnt lgkmcnt(0) 338; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 339; GFX940-NEXT: v_mov_b32_e32 v2, s10 340; GFX940-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt 341; GFX940-NEXT: v_mov_b32_e32 v2, 0 342; GFX940-NEXT: s_waitcnt vmcnt(0) 343; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 344; GFX940-NEXT: s_endpgm 345main_body: 346 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 347 store double %ret, ptr addrspace(1) %out, align 8 348 ret void 349} 350 351define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { 352; GFX90A-LABEL: raw_buffer_atomic_min_noret_f64: 353; GFX90A: ; %bb.0: ; %main_body 354; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 355; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 356; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 357; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 358; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 359; GFX90A-NEXT: v_mov_b32_e32 v2, s8 360; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen 361; GFX90A-NEXT: s_endpgm 362; 363; GFX940-LABEL: raw_buffer_atomic_min_noret_f64: 364; GFX940: ; %bb.0: ; %main_body 365; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 366; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 367; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 368; GFX940-NEXT: s_waitcnt lgkmcnt(0) 369; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 370; GFX940-NEXT: v_mov_b32_e32 v2, s8 371; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen 372; GFX940-NEXT: s_endpgm 373main_body: 374 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 375 ret void 376} 377 378define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 379; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64: 380; GFX90A: ; %bb.0: ; %main_body 381; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc 382; GFX90A-NEXT: s_waitcnt vmcnt(0) 383; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 384; GFX90A-NEXT: s_endpgm 385; 386; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64: 387; GFX940: ; %bb.0: ; %main_body 388; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 389; GFX940-NEXT: s_waitcnt vmcnt(0) 390; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 391; GFX940-NEXT: s_endpgm 392main_body: 393 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 394 store double %ret, ptr undef 395 ret void 396} 397 398define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 399; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: 400; GFX90A: ; %bb.0: ; %main_body 401; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 402; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 403; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 404; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 405; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 406; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 407; GFX90A-NEXT: v_mov_b32_e32 v2, s10 408; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen glc slc 409; GFX90A-NEXT: v_mov_b32_e32 v2, 0 410; GFX90A-NEXT: s_waitcnt vmcnt(0) 411; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 412; GFX90A-NEXT: s_endpgm 413; 414; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: 415; GFX940: ; %bb.0: ; %main_body 416; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 417; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 418; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 419; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 420; GFX940-NEXT: s_waitcnt lgkmcnt(0) 421; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 422; GFX940-NEXT: v_mov_b32_e32 v2, s10 423; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt 424; GFX940-NEXT: v_mov_b32_e32 v2, 0 425; GFX940-NEXT: s_waitcnt vmcnt(0) 426; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 427; GFX940-NEXT: s_endpgm 428main_body: 429 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 430 store double %ret, ptr addrspace(1) %out, align 8 431 ret void 432} 433 434define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) { 435; GFX90A-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 436; GFX90A: ; %bb.0: ; %main_body 437; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 438; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 439; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 440; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 441; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 442; GFX90A-NEXT: v_mov_b32_e32 v2, s8 443; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen 444; GFX90A-NEXT: s_endpgm 445; 446; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64: 447; GFX940: ; %bb.0: ; %main_body 448; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 449; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 450; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 451; GFX940-NEXT: s_waitcnt lgkmcnt(0) 452; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 453; GFX940-NEXT: v_mov_b32_e32 v2, s8 454; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen 455; GFX940-NEXT: s_endpgm 456main_body: 457 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 458 ret void 459} 460 461define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 462; GFX90A-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 463; GFX90A: ; %bb.0: ; %main_body 464; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc 465; GFX90A-NEXT: s_waitcnt vmcnt(0) 466; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 467; GFX90A-NEXT: s_endpgm 468; 469; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64: 470; GFX940: ; %bb.0: ; %main_body 471; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0 472; GFX940-NEXT: s_waitcnt vmcnt(0) 473; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 474; GFX940-NEXT: s_endpgm 475main_body: 476 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 477 store double %ret, ptr undef 478 ret void 479} 480 481define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 482; GFX90A-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 483; GFX90A: ; %bb.0: ; %main_body 484; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 485; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 486; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 487; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 488; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 489; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 490; GFX90A-NEXT: v_mov_b32_e32 v2, s10 491; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen glc slc 492; GFX90A-NEXT: v_mov_b32_e32 v2, 0 493; GFX90A-NEXT: s_waitcnt vmcnt(0) 494; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 495; GFX90A-NEXT: s_endpgm 496; 497; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc: 498; GFX940: ; %bb.0: ; %main_body 499; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 500; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 501; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 502; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 503; GFX940-NEXT: s_waitcnt lgkmcnt(0) 504; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 505; GFX940-NEXT: v_mov_b32_e32 v2, s10 506; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt 507; GFX940-NEXT: v_mov_b32_e32 v2, 0 508; GFX940-NEXT: s_waitcnt vmcnt(0) 509; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 510; GFX940-NEXT: s_endpgm 511main_body: 512 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 513 store double %ret, ptr addrspace(1) %out, align 8 514 ret void 515} 516 517define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { 518; GFX90A-LABEL: struct_buffer_atomic_min_noret_f64: 519; GFX90A: ; %bb.0: ; %main_body 520; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 521; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 522; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 523; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 524; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 525; GFX90A-NEXT: v_mov_b32_e32 v2, s8 526; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen 527; GFX90A-NEXT: s_endpgm 528; 529; GFX940-LABEL: struct_buffer_atomic_min_noret_f64: 530; GFX940: ; %bb.0: ; %main_body 531; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 532; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 533; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 534; GFX940-NEXT: s_waitcnt lgkmcnt(0) 535; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 536; GFX940-NEXT: v_mov_b32_e32 v2, s8 537; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen 538; GFX940-NEXT: s_endpgm 539main_body: 540 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 541 ret void 542} 543 544define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 545; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64: 546; GFX90A: ; %bb.0: ; %main_body 547; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc 548; GFX90A-NEXT: s_waitcnt vmcnt(0) 549; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 550; GFX90A-NEXT: s_endpgm 551; 552; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64: 553; GFX940: ; %bb.0: ; %main_body 554; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 555; GFX940-NEXT: s_waitcnt vmcnt(0) 556; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 557; GFX940-NEXT: s_endpgm 558main_body: 559 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 560 store double %ret, ptr undef 561 ret void 562} 563 564define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 565; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: 566; GFX90A: ; %bb.0: ; %main_body 567; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 568; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 569; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 570; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 571; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 572; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 573; GFX90A-NEXT: v_mov_b32_e32 v2, s10 574; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc 575; GFX90A-NEXT: v_mov_b32_e32 v2, 0 576; GFX90A-NEXT: s_waitcnt vmcnt(0) 577; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 578; GFX90A-NEXT: s_endpgm 579; 580; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: 581; GFX940: ; %bb.0: ; %main_body 582; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 583; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 584; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 585; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 586; GFX940-NEXT: s_waitcnt lgkmcnt(0) 587; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 588; GFX940-NEXT: v_mov_b32_e32 v2, s10 589; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt 590; GFX940-NEXT: v_mov_b32_e32 v2, 0 591; GFX940-NEXT: s_waitcnt vmcnt(0) 592; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 593; GFX940-NEXT: s_endpgm 594main_body: 595 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 596 store double %ret, ptr addrspace(1) %out, align 8 597 ret void 598} 599 600define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) { 601; GFX90A-LABEL: struct_ptr_buffer_atomic_min_noret_f64: 602; GFX90A: ; %bb.0: ; %main_body 603; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 604; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 605; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 606; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 607; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 608; GFX90A-NEXT: v_mov_b32_e32 v2, s8 609; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen 610; GFX90A-NEXT: s_endpgm 611; 612; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64: 613; GFX940: ; %bb.0: ; %main_body 614; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 615; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 616; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 617; GFX940-NEXT: s_waitcnt lgkmcnt(0) 618; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 619; GFX940-NEXT: v_mov_b32_e32 v2, s8 620; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen 621; GFX940-NEXT: s_endpgm 622main_body: 623 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 624 ret void 625} 626 627define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 628; GFX90A-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: 629; GFX90A: ; %bb.0: ; %main_body 630; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc 631; GFX90A-NEXT: s_waitcnt vmcnt(0) 632; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 633; GFX90A-NEXT: s_endpgm 634; 635; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64: 636; GFX940: ; %bb.0: ; %main_body 637; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0 638; GFX940-NEXT: s_waitcnt vmcnt(0) 639; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 640; GFX940-NEXT: s_endpgm 641main_body: 642 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 643 store double %ret, ptr undef 644 ret void 645} 646 647define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 648; GFX90A-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: 649; GFX90A: ; %bb.0: ; %main_body 650; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 651; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 652; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 653; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 654; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 655; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 656; GFX90A-NEXT: v_mov_b32_e32 v2, s10 657; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc 658; GFX90A-NEXT: v_mov_b32_e32 v2, 0 659; GFX90A-NEXT: s_waitcnt vmcnt(0) 660; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 661; GFX90A-NEXT: s_endpgm 662; 663; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc: 664; GFX940: ; %bb.0: ; %main_body 665; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 666; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 667; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 668; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 669; GFX940-NEXT: s_waitcnt lgkmcnt(0) 670; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 671; GFX940-NEXT: v_mov_b32_e32 v2, s10 672; GFX940-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt 673; GFX940-NEXT: v_mov_b32_e32 v2, 0 674; GFX940-NEXT: s_waitcnt vmcnt(0) 675; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 676; GFX940-NEXT: s_endpgm 677main_body: 678 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 679 store double %ret, ptr addrspace(1) %out, align 8 680 ret void 681} 682 683define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { 684; GFX90A-LABEL: raw_buffer_atomic_max_noret_f64: 685; GFX90A: ; %bb.0: ; %main_body 686; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 687; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 688; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 689; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 690; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 691; GFX90A-NEXT: v_mov_b32_e32 v2, s8 692; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen 693; GFX90A-NEXT: s_endpgm 694; 695; GFX940-LABEL: raw_buffer_atomic_max_noret_f64: 696; GFX940: ; %bb.0: ; %main_body 697; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 698; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 699; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 700; GFX940-NEXT: s_waitcnt lgkmcnt(0) 701; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 702; GFX940-NEXT: v_mov_b32_e32 v2, s8 703; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen 704; GFX940-NEXT: s_endpgm 705main_body: 706 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 707 ret void 708} 709 710define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 711; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64: 712; GFX90A: ; %bb.0: ; %main_body 713; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc 714; GFX90A-NEXT: s_waitcnt vmcnt(0) 715; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 716; GFX90A-NEXT: s_endpgm 717; 718; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64: 719; GFX940: ; %bb.0: ; %main_body 720; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 721; GFX940-NEXT: s_waitcnt vmcnt(0) 722; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 723; GFX940-NEXT: s_endpgm 724main_body: 725 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 726 store double %ret, ptr undef 727 ret void 728} 729 730define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 731; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: 732; GFX90A: ; %bb.0: ; %main_body 733; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 734; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 735; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 736; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 737; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 738; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 739; GFX90A-NEXT: v_mov_b32_e32 v2, s10 740; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen glc slc 741; GFX90A-NEXT: v_mov_b32_e32 v2, 0 742; GFX90A-NEXT: s_waitcnt vmcnt(0) 743; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 744; GFX90A-NEXT: s_endpgm 745; 746; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: 747; GFX940: ; %bb.0: ; %main_body 748; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 749; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 750; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 751; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 752; GFX940-NEXT: s_waitcnt lgkmcnt(0) 753; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 754; GFX940-NEXT: v_mov_b32_e32 v2, s10 755; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt 756; GFX940-NEXT: v_mov_b32_e32 v2, 0 757; GFX940-NEXT: s_waitcnt vmcnt(0) 758; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 759; GFX940-NEXT: s_endpgm 760main_body: 761 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 762 store double %ret, ptr addrspace(1) %out, align 8 763 ret void 764} 765 766define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) { 767; GFX90A-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 768; GFX90A: ; %bb.0: ; %main_body 769; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 770; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 771; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 772; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 773; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 774; GFX90A-NEXT: v_mov_b32_e32 v2, s8 775; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen 776; GFX90A-NEXT: s_endpgm 777; 778; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64: 779; GFX940: ; %bb.0: ; %main_body 780; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 781; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 782; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 783; GFX940-NEXT: s_waitcnt lgkmcnt(0) 784; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 785; GFX940-NEXT: v_mov_b32_e32 v2, s8 786; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen 787; GFX940-NEXT: s_endpgm 788main_body: 789 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 790 ret void 791} 792 793define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 794; GFX90A-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 795; GFX90A: ; %bb.0: ; %main_body 796; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc 797; GFX90A-NEXT: s_waitcnt vmcnt(0) 798; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 799; GFX90A-NEXT: s_endpgm 800; 801; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64: 802; GFX940: ; %bb.0: ; %main_body 803; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0 804; GFX940-NEXT: s_waitcnt vmcnt(0) 805; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 806; GFX940-NEXT: s_endpgm 807main_body: 808 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 809 store double %ret, ptr undef 810 ret void 811} 812 813define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 814; GFX90A-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 815; GFX90A: ; %bb.0: ; %main_body 816; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 817; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 818; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 819; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 820; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 821; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 822; GFX90A-NEXT: v_mov_b32_e32 v2, s10 823; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen glc slc 824; GFX90A-NEXT: v_mov_b32_e32 v2, 0 825; GFX90A-NEXT: s_waitcnt vmcnt(0) 826; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 827; GFX90A-NEXT: s_endpgm 828; 829; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc: 830; GFX940: ; %bb.0: ; %main_body 831; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 832; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 833; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 834; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 835; GFX940-NEXT: s_waitcnt lgkmcnt(0) 836; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 837; GFX940-NEXT: v_mov_b32_e32 v2, s10 838; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt 839; GFX940-NEXT: v_mov_b32_e32 v2, 0 840; GFX940-NEXT: s_waitcnt vmcnt(0) 841; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 842; GFX940-NEXT: s_endpgm 843main_body: 844 %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 845 store double %ret, ptr addrspace(1) %out, align 8 846 ret void 847} 848 849define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) { 850; GFX90A-LABEL: struct_buffer_atomic_max_noret_f64: 851; GFX90A: ; %bb.0: ; %main_body 852; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 853; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 854; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 855; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 856; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 857; GFX90A-NEXT: v_mov_b32_e32 v2, s8 858; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen 859; GFX90A-NEXT: s_endpgm 860; 861; GFX940-LABEL: struct_buffer_atomic_max_noret_f64: 862; GFX940: ; %bb.0: ; %main_body 863; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 864; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 865; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 866; GFX940-NEXT: s_waitcnt lgkmcnt(0) 867; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 868; GFX940-NEXT: v_mov_b32_e32 v2, s8 869; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen 870; GFX940-NEXT: s_endpgm 871main_body: 872 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 873 ret void 874} 875 876define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 877; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64: 878; GFX90A: ; %bb.0: ; %main_body 879; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc 880; GFX90A-NEXT: s_waitcnt vmcnt(0) 881; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 882; GFX90A-NEXT: s_endpgm 883; 884; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64: 885; GFX940: ; %bb.0: ; %main_body 886; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 887; GFX940-NEXT: s_waitcnt vmcnt(0) 888; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 889; GFX940-NEXT: s_endpgm 890main_body: 891 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 892 store double %ret, ptr undef 893 ret void 894} 895 896define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 897; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: 898; GFX90A: ; %bb.0: ; %main_body 899; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 900; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 901; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 902; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 903; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 904; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 905; GFX90A-NEXT: v_mov_b32_e32 v2, s10 906; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc 907; GFX90A-NEXT: v_mov_b32_e32 v2, 0 908; GFX90A-NEXT: s_waitcnt vmcnt(0) 909; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 910; GFX90A-NEXT: s_endpgm 911; 912; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: 913; GFX940: ; %bb.0: ; %main_body 914; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 915; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 916; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 917; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 918; GFX940-NEXT: s_waitcnt lgkmcnt(0) 919; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 920; GFX940-NEXT: v_mov_b32_e32 v2, s10 921; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt 922; GFX940-NEXT: v_mov_b32_e32 v2, 0 923; GFX940-NEXT: s_waitcnt vmcnt(0) 924; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 925; GFX940-NEXT: s_endpgm 926main_body: 927 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 928 store double %ret, ptr addrspace(1) %out, align 8 929 ret void 930} 931 932define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) { 933; GFX90A-LABEL: struct_ptr_buffer_atomic_max_noret_f64: 934; GFX90A: ; %bb.0: ; %main_body 935; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 936; GFX90A-NEXT: s_load_dword s8, s[4:5], 0x3c 937; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 938; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 939; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 940; GFX90A-NEXT: v_mov_b32_e32 v2, s8 941; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen 942; GFX90A-NEXT: s_endpgm 943; 944; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64: 945; GFX940: ; %bb.0: ; %main_body 946; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 947; GFX940-NEXT: s_load_dword s8, s[4:5], 0x3c 948; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 949; GFX940-NEXT: s_waitcnt lgkmcnt(0) 950; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 951; GFX940-NEXT: v_mov_b32_e32 v2, s8 952; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen 953; GFX940-NEXT: s_endpgm 954main_body: 955 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 956 ret void 957} 958 959define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) { 960; GFX90A-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: 961; GFX90A: ; %bb.0: ; %main_body 962; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc 963; GFX90A-NEXT: s_waitcnt vmcnt(0) 964; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 965; GFX90A-NEXT: s_endpgm 966; 967; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64: 968; GFX940: ; %bb.0: ; %main_body 969; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0 970; GFX940-NEXT: s_waitcnt vmcnt(0) 971; GFX940-NEXT: flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1 972; GFX940-NEXT: s_endpgm 973main_body: 974 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 975 store double %ret, ptr undef 976 ret void 977} 978 979define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) { 980; GFX90A-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: 981; GFX90A: ; %bb.0: ; %main_body 982; GFX90A-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 983; GFX90A-NEXT: s_load_dword s10, s[4:5], 0x3c 984; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 985; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 986; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 987; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1] 988; GFX90A-NEXT: v_mov_b32_e32 v2, s10 989; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc 990; GFX90A-NEXT: v_mov_b32_e32 v2, 0 991; GFX90A-NEXT: s_waitcnt vmcnt(0) 992; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 993; GFX90A-NEXT: s_endpgm 994; 995; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc: 996; GFX940: ; %bb.0: ; %main_body 997; GFX940-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 998; GFX940-NEXT: s_load_dword s10, s[4:5], 0x3c 999; GFX940-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1000; GFX940-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 1001; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1002; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[6:7] 1003; GFX940-NEXT: v_mov_b32_e32 v2, s10 1004; GFX940-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt 1005; GFX940-NEXT: v_mov_b32_e32 v2, 0 1006; GFX940-NEXT: s_waitcnt vmcnt(0) 1007; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1 1008; GFX940-NEXT: s_endpgm 1009main_body: 1010 %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 1011 store double %ret, ptr addrspace(1) %out, align 8 1012 ret void 1013} 1014 1015define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 { 1016; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat: 1017; GFX90A: ; %bb.0: ; %main_body 1018; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1019; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1020; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1021; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1022; GFX90A-NEXT: buffer_wbl2 1023; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1024; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1025; GFX90A-NEXT: s_waitcnt vmcnt(0) 1026; GFX90A-NEXT: buffer_invl2 1027; GFX90A-NEXT: buffer_wbinvl1_vol 1028; GFX90A-NEXT: s_endpgm 1029; 1030; GFX940-LABEL: global_atomic_fadd_f64_noret_pat: 1031; GFX940: ; %bb.0: ; %main_body 1032; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1033; GFX940-NEXT: v_mov_b32_e32 v2, 0 1034; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1035; GFX940-NEXT: buffer_wbl2 sc0 sc1 1036; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1037; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 1038; GFX940-NEXT: s_waitcnt vmcnt(0) 1039; GFX940-NEXT: buffer_inv sc0 sc1 1040; GFX940-NEXT: s_endpgm 1041main_body: 1042 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 1043 ret void 1044} 1045 1046define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 { 1047; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent: 1048; GFX90A: ; %bb.0: ; %main_body 1049; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1050; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1051; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1052; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1053; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1054; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1055; GFX90A-NEXT: s_waitcnt vmcnt(0) 1056; GFX90A-NEXT: buffer_wbinvl1_vol 1057; GFX90A-NEXT: s_endpgm 1058; 1059; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent: 1060; GFX940: ; %bb.0: ; %main_body 1061; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1062; GFX940-NEXT: v_mov_b32_e32 v2, 0 1063; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1064; GFX940-NEXT: buffer_wbl2 sc1 1065; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1066; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1067; GFX940-NEXT: s_waitcnt vmcnt(0) 1068; GFX940-NEXT: buffer_inv sc1 1069; GFX940-NEXT: s_endpgm 1070main_body: 1071 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1072 ret void 1073} 1074 1075define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 { 1076; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system: 1077; GFX90A: ; %bb.0: ; %main_body 1078; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1079; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1080; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1081; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1082; GFX90A-NEXT: buffer_wbl2 1083; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1084; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1085; GFX90A-NEXT: s_waitcnt vmcnt(0) 1086; GFX90A-NEXT: buffer_invl2 1087; GFX90A-NEXT: buffer_wbinvl1_vol 1088; GFX90A-NEXT: s_endpgm 1089; 1090; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system: 1091; GFX940: ; %bb.0: ; %main_body 1092; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1093; GFX940-NEXT: v_mov_b32_e32 v2, 0 1094; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1095; GFX940-NEXT: buffer_wbl2 sc0 sc1 1096; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1097; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] sc1 1098; GFX940-NEXT: s_waitcnt vmcnt(0) 1099; GFX940-NEXT: buffer_inv sc0 sc1 1100; GFX940-NEXT: s_endpgm 1101main_body: 1102 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 1103 ret void 1104} 1105 1106define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 { 1107; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush: 1108; GFX90A: ; %bb.0: ; %main_body 1109; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1110; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1111; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1112; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1113; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1114; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1115; GFX90A-NEXT: s_waitcnt vmcnt(0) 1116; GFX90A-NEXT: buffer_wbinvl1_vol 1117; GFX90A-NEXT: s_endpgm 1118; 1119; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush: 1120; GFX940: ; %bb.0: ; %main_body 1121; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1122; GFX940-NEXT: v_mov_b32_e32 v2, 0 1123; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1124; GFX940-NEXT: buffer_wbl2 sc1 1125; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1126; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1127; GFX940-NEXT: s_waitcnt vmcnt(0) 1128; GFX940-NEXT: buffer_inv sc1 1129; GFX940-NEXT: s_endpgm 1130main_body: 1131 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1132 ret void 1133} 1134 1135define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 { 1136; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat: 1137; GFX90A: ; %bb.0: ; %main_body 1138; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1139; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1140; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1141; GFX90A-NEXT: buffer_wbl2 1142; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc 1143; GFX90A-NEXT: s_waitcnt vmcnt(0) 1144; GFX90A-NEXT: buffer_invl2 1145; GFX90A-NEXT: buffer_wbinvl1_vol 1146; GFX90A-NEXT: s_setpc_b64 s[30:31] 1147; 1148; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat: 1149; GFX940: ; %bb.0: ; %main_body 1150; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1151; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1152; GFX940-NEXT: buffer_wbl2 sc0 sc1 1153; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 1154; GFX940-NEXT: s_waitcnt vmcnt(0) 1155; GFX940-NEXT: buffer_inv sc0 sc1 1156; GFX940-NEXT: s_setpc_b64 s[30:31] 1157main_body: 1158 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 1159 ret double %ret 1160} 1161 1162define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 { 1163; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent: 1164; GFX90A: ; %bb.0: ; %main_body 1165; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1167; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1168; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc 1169; GFX90A-NEXT: s_waitcnt vmcnt(0) 1170; GFX90A-NEXT: buffer_wbinvl1_vol 1171; GFX90A-NEXT: s_setpc_b64 s[30:31] 1172; 1173; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent: 1174; GFX940: ; %bb.0: ; %main_body 1175; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1176; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1177; GFX940-NEXT: buffer_wbl2 sc1 1178; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 1179; GFX940-NEXT: s_waitcnt vmcnt(0) 1180; GFX940-NEXT: buffer_inv sc1 1181; GFX940-NEXT: s_setpc_b64 s[30:31] 1182main_body: 1183 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 1184 ret double %ret 1185} 1186 1187define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 { 1188; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system: 1189; GFX90A: ; %bb.0: ; %main_body 1190; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1191; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1192; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1193; GFX90A-NEXT: buffer_wbl2 1194; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc 1195; GFX90A-NEXT: s_waitcnt vmcnt(0) 1196; GFX90A-NEXT: buffer_invl2 1197; GFX90A-NEXT: buffer_wbinvl1_vol 1198; GFX90A-NEXT: s_setpc_b64 s[30:31] 1199; 1200; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system: 1201; GFX940: ; %bb.0: ; %main_body 1202; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1203; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1204; GFX940-NEXT: buffer_wbl2 sc0 sc1 1205; GFX940-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1 1206; GFX940-NEXT: s_waitcnt vmcnt(0) 1207; GFX940-NEXT: buffer_inv sc0 sc1 1208; GFX940-NEXT: s_setpc_b64 s[30:31] 1209main_body: 1210 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0 1211 ret double %ret 1212} 1213 1214define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) { 1215; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: 1216; GFX90A: ; %bb.0: ; %main_body 1217; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1218; GFX90A-NEXT: s_mov_b64 s[2:3], 0 1219; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1220; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1221; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1222; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1223; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1] 1224; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start 1225; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 1226; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 1227; GFX90A-NEXT: global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc 1228; GFX90A-NEXT: s_waitcnt vmcnt(0) 1229; GFX90A-NEXT: buffer_wbinvl1_vol 1230; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 1231; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3] 1232; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] 1233; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3] 1234; GFX90A-NEXT: s_cbranch_execnz .LBB43_1 1235; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 1236; GFX90A-NEXT: s_endpgm 1237; 1238; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe: 1239; GFX940: ; %bb.0: ; %main_body 1240; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1241; GFX940-NEXT: v_mov_b32_e32 v2, 0 1242; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1243; GFX940-NEXT: buffer_wbl2 sc1 1244; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1245; GFX940-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] 1246; GFX940-NEXT: s_waitcnt vmcnt(0) 1247; GFX940-NEXT: buffer_inv sc1 1248; GFX940-NEXT: s_endpgm 1249main_body: 1250 %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst 1251 ret void 1252} 1253 1254define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 { 1255; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat: 1256; GFX90A: ; %bb.0: ; %main_body 1257; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1258; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1259; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1260; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1261; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] 1262; GFX90A-NEXT: buffer_wbl2 1263; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] 1264; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1265; GFX90A-NEXT: buffer_invl2 1266; GFX90A-NEXT: buffer_wbinvl1_vol 1267; GFX90A-NEXT: s_endpgm 1268; 1269; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat: 1270; GFX940: ; %bb.0: ; %main_body 1271; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1272; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1273; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1274; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 1275; GFX940-NEXT: buffer_wbl2 sc0 sc1 1276; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 1277; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1278; GFX940-NEXT: buffer_inv sc0 sc1 1279; GFX940-NEXT: s_endpgm 1280main_body: 1281 %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1282 ret void 1283} 1284 1285define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 { 1286; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent: 1287; GFX90A: ; %bb.0: ; %main_body 1288; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1289; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1290; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1291; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1292; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] 1293; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] 1294; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1295; GFX90A-NEXT: buffer_wbinvl1_vol 1296; GFX90A-NEXT: s_endpgm 1297; 1298; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent: 1299; GFX940: ; %bb.0: ; %main_body 1300; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1301; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1302; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1303; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 1304; GFX940-NEXT: buffer_wbl2 sc1 1305; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] 1306; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1307; GFX940-NEXT: buffer_inv sc1 1308; GFX940-NEXT: s_endpgm 1309main_body: 1310 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1311 ret void 1312} 1313 1314define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 { 1315; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system: 1316; GFX90A: ; %bb.0: ; %main_body 1317; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1318; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1319; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1320; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1321; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1] 1322; GFX90A-NEXT: buffer_wbl2 1323; GFX90A-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] 1324; GFX90A-NEXT: s_waitcnt vmcnt(0) 1325; GFX90A-NEXT: buffer_invl2 1326; GFX90A-NEXT: buffer_wbinvl1_vol 1327; GFX90A-NEXT: s_endpgm 1328; 1329; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system: 1330; GFX940: ; %bb.0: ; %main_body 1331; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1332; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1333; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1334; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 1335; GFX940-NEXT: buffer_wbl2 sc0 sc1 1336; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] sc1 1337; GFX940-NEXT: s_waitcnt vmcnt(0) 1338; GFX940-NEXT: buffer_inv sc0 sc1 1339; GFX940-NEXT: s_endpgm 1340main_body: 1341 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1342 ret void 1343} 1344 1345define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 { 1346; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat: 1347; GFX90A: ; %bb.0: ; %main_body 1348; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1350; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1351; GFX90A-NEXT: buffer_wbl2 1352; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc 1353; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1354; GFX90A-NEXT: buffer_invl2 1355; GFX90A-NEXT: buffer_wbinvl1_vol 1356; GFX90A-NEXT: s_setpc_b64 s[30:31] 1357; 1358; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat: 1359; GFX940: ; %bb.0: ; %main_body 1360; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1361; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1362; GFX940-NEXT: buffer_wbl2 sc0 sc1 1363; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 1364; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1365; GFX940-NEXT: buffer_inv sc0 sc1 1366; GFX940-NEXT: s_setpc_b64 s[30:31] 1367main_body: 1368 %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1369 ret double %ret 1370} 1371 1372define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 { 1373; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: 1374; GFX90A: ; %bb.0: ; %main_body 1375; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1376; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1377; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1378; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc 1379; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1380; GFX90A-NEXT: buffer_wbinvl1_vol 1381; GFX90A-NEXT: s_setpc_b64 s[30:31] 1382; 1383; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent: 1384; GFX940: ; %bb.0: ; %main_body 1385; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1386; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1387; GFX940-NEXT: buffer_wbl2 sc1 1388; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 1389; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1390; GFX940-NEXT: buffer_inv sc1 1391; GFX940-NEXT: s_setpc_b64 s[30:31] 1392main_body: 1393 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1394 ret double %ret 1395} 1396 1397define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 { 1398; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system: 1399; GFX90A: ; %bb.0: ; %main_body 1400; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1401; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1402; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1403; GFX90A-NEXT: buffer_wbl2 1404; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc 1405; GFX90A-NEXT: s_waitcnt vmcnt(0) 1406; GFX90A-NEXT: buffer_invl2 1407; GFX90A-NEXT: buffer_wbinvl1_vol 1408; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1409; GFX90A-NEXT: s_setpc_b64 s[30:31] 1410; 1411; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system: 1412; GFX940: ; %bb.0: ; %main_body 1413; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1414; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1415; GFX940-NEXT: buffer_wbl2 sc0 sc1 1416; GFX940-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1 1417; GFX940-NEXT: s_waitcnt vmcnt(0) 1418; GFX940-NEXT: buffer_inv sc0 sc1 1419; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1420; GFX940-NEXT: s_setpc_b64 s[30:31] 1421main_body: 1422 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 1423 ret double %ret 1424} 1425 1426define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) { 1427; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: 1428; GFX90A: ; %bb.0: ; %main_body 1429; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 1430; GFX90A-NEXT: s_mov_b64 s[0:1], 0 1431; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1432; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 1433; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1] 1434; GFX90A-NEXT: v_pk_mov_b32 v[4:5], s[2:3], s[2:3] op_sel:[0,1] 1435; GFX90A-NEXT: .LBB50_1: ; %atomicrmw.start 1436; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 1437; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1438; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0 1439; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc 1440; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1441; GFX90A-NEXT: buffer_wbinvl1_vol 1442; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3] 1443; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1] 1444; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] 1445; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1] 1446; GFX90A-NEXT: s_cbranch_execnz .LBB50_1 1447; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end 1448; GFX90A-NEXT: s_endpgm 1449; 1450; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe: 1451; GFX940: ; %bb.0: ; %main_body 1452; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1453; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1454; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1455; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[0:1] 1456; GFX940-NEXT: buffer_wbl2 sc1 1457; GFX940-NEXT: flat_atomic_add_f64 v[2:3], v[0:1] 1458; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1459; GFX940-NEXT: buffer_inv sc1 1460; GFX940-NEXT: s_endpgm 1461main_body: 1462 %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1 1463 ret void 1464} 1465 1466define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) { 1467; GFX90A-LABEL: local_atomic_fadd_f64_noret: 1468; GFX90A: ; %bb.0: ; %main_body 1469; GFX90A-NEXT: s_load_dword s2, s[4:5], 0x24 1470; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c 1471; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1472; GFX90A-NEXT: v_mov_b32_e32 v2, s2 1473; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 1474; GFX90A-NEXT: ds_add_f64 v2, v[0:1] 1475; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1476; GFX90A-NEXT: s_endpgm 1477; 1478; GFX940-LABEL: local_atomic_fadd_f64_noret: 1479; GFX940: ; %bb.0: ; %main_body 1480; GFX940-NEXT: s_load_dword s2, s[4:5], 0x24 1481; GFX940-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c 1482; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1483; GFX940-NEXT: v_mov_b32_e32 v2, s2 1484; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] 1485; GFX940-NEXT: ds_add_f64 v2, v[0:1] 1486; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1487; GFX940-NEXT: s_endpgm 1488main_body: 1489 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) 1490 ret void 1491} 1492 1493define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) { 1494; GFX90A-LABEL: local_atomic_fadd_f64_rtn: 1495; GFX90A: ; %bb.0: ; %main_body 1496; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1497; GFX90A-NEXT: v_mov_b32_e32 v3, v2 1498; GFX90A-NEXT: v_mov_b32_e32 v2, v1 1499; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1500; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1501; GFX90A-NEXT: s_setpc_b64 s[30:31] 1502; 1503; GFX940-LABEL: local_atomic_fadd_f64_rtn: 1504; GFX940: ; %bb.0: ; %main_body 1505; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1506; GFX940-NEXT: v_mov_b32_e32 v3, v2 1507; GFX940-NEXT: v_mov_b32_e32 v2, v1 1508; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1509; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1510; GFX940-NEXT: s_setpc_b64 s[30:31] 1511main_body: 1512 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) 1513 ret double %ret 1514} 1515 1516define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 { 1517; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat: 1518; GFX90A: ; %bb.0: ; %main_body 1519; GFX90A-NEXT: s_load_dword s0, s[4:5], 0x24 1520; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1521; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1522; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1523; GFX90A-NEXT: v_mov_b32_e32 v2, s0 1524; GFX90A-NEXT: ds_add_f64 v2, v[0:1] 1525; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1526; GFX90A-NEXT: s_endpgm 1527; 1528; GFX940-LABEL: local_atomic_fadd_f64_noret_pat: 1529; GFX940: ; %bb.0: ; %main_body 1530; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 1531; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1532; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1533; GFX940-NEXT: v_mov_b32_e32 v2, s0 1534; GFX940-NEXT: ds_add_f64 v2, v[0:1] 1535; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1536; GFX940-NEXT: s_endpgm 1537main_body: 1538 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 1539 ret void 1540} 1541 1542define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 { 1543; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush: 1544; GFX90A: ; %bb.0: ; %main_body 1545; GFX90A-NEXT: s_load_dword s0, s[4:5], 0x24 1546; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1547; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1548; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1549; GFX90A-NEXT: v_mov_b32_e32 v2, s0 1550; GFX90A-NEXT: ds_add_f64 v2, v[0:1] 1551; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1552; GFX90A-NEXT: s_endpgm 1553; 1554; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush: 1555; GFX940: ; %bb.0: ; %main_body 1556; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 1557; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1558; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1559; GFX940-NEXT: v_mov_b32_e32 v2, s0 1560; GFX940-NEXT: ds_add_f64 v2, v[0:1] 1561; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1562; GFX940-NEXT: s_endpgm 1563main_body: 1564 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 1565 ret void 1566} 1567 1568define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 { 1569; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: 1570; GFX90A: ; %bb.0: ; %main_body 1571; GFX90A-NEXT: s_load_dword s0, s[4:5], 0x24 1572; GFX90A-NEXT: v_mov_b32_e32 v0, 0 1573; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 1574; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX90A-NEXT: v_mov_b32_e32 v2, s0 1576; GFX90A-NEXT: ds_add_f64 v2, v[0:1] 1577; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1578; GFX90A-NEXT: s_endpgm 1579; 1580; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe: 1581; GFX940: ; %bb.0: ; %main_body 1582; GFX940-NEXT: s_load_dword s0, s[4:5], 0x24 1583; GFX940-NEXT: v_mov_b64_e32 v[0:1], 4.0 1584; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1585; GFX940-NEXT: v_mov_b32_e32 v2, s0 1586; GFX940-NEXT: ds_add_f64 v2, v[0:1] 1587; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1588; GFX940-NEXT: s_endpgm 1589main_body: 1590 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst 1591 ret void 1592} 1593 1594define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 { 1595; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat: 1596; GFX90A: ; %bb.0: ; %main_body 1597; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1598; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1599; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 1600; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1601; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1602; GFX90A-NEXT: s_setpc_b64 s[30:31] 1603; 1604; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat: 1605; GFX940: ; %bb.0: ; %main_body 1606; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1607; GFX940-NEXT: v_mov_b64_e32 v[2:3], 4.0 1608; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1609; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1610; GFX940-NEXT: s_setpc_b64 s[30:31] 1611main_body: 1612 %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0 1613 ret double %ret 1614} 1615 1616define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 { 1617; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: 1618; GFX90A: ; %bb.0: ; %main_body 1619; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1620; GFX90A-NEXT: v_mov_b32_e32 v3, v2 1621; GFX90A-NEXT: v_mov_b32_e32 v2, v1 1622; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1623; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1624; GFX90A-NEXT: s_setpc_b64 s[30:31] 1625; 1626; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe: 1627; GFX940: ; %bb.0: ; %main_body 1628; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1629; GFX940-NEXT: v_mov_b32_e32 v3, v2 1630; GFX940-NEXT: v_mov_b32_e32 v2, v1 1631; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1632; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1633; GFX940-NEXT: s_setpc_b64 s[30:31] 1634main_body: 1635 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) 1636 ret double %ret 1637} 1638 1639define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 { 1640; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: 1641; GFX90A: ; %bb.0: ; %main_body 1642; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1643; GFX90A-NEXT: v_mov_b32_e32 v3, v2 1644; GFX90A-NEXT: v_mov_b32_e32 v2, v1 1645; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1646; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 1647; GFX90A-NEXT: s_setpc_b64 s[30:31] 1648; 1649; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe: 1650; GFX940: ; %bb.0: ; %main_body 1651; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652; GFX940-NEXT: v_mov_b32_e32 v3, v2 1653; GFX940-NEXT: v_mov_b32_e32 v2, v1 1654; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 1655; GFX940-NEXT: s_waitcnt lgkmcnt(0) 1656; GFX940-NEXT: s_setpc_b64 s[30:31] 1657main_body: 1658 %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0) 1659 ret double %ret 1660} 1661 1662attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" } 1663attributes #1 = { nounwind } 1664attributes #2 = { "denormal-fp-math"="ieee,ieee" } 1665attributes #3 = { "denormal-fp-math"="ieee,ieee" } 1666attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" } 1667 1668!0 = !{} 1669!1 = !{i32 5, i32 6} 1670