1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 | FileCheck %s -check-prefix=GFX12-SDAG 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 -global-isel-abort=2 | FileCheck %s -check-prefix=GFX12-GISEL 4 5declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) 6declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg) 7declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) 8declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32) 9 10define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 11; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret_offset: 12; GFX12-SDAG: ; %bb.0: 13; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 14; GFX12-SDAG-NEXT: s_endpgm 15; 16; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret_offset: 17; GFX12-GISEL: ; %bb.0: 18; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 19; GFX12-GISEL-NEXT: s_endpgm 20 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0) 21 ret void 22} 23 24define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 25; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_noret: 26; GFX12-SDAG: ; %bb.0: 27; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen 28; GFX12-SDAG-NEXT: s_endpgm 29; 30; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_noret: 31; GFX12-GISEL: ; %bb.0: 32; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen 33; GFX12-GISEL-NEXT: s_endpgm 34 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 35 ret void 36} 37 38define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret_offset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 39; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret_offset: 40; GFX12-SDAG: ; %bb.0: 41; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN 42; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 43; GFX12-SDAG-NEXT: ; return to shader part epilog 44; 45; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret_offset: 46; GFX12-GISEL: ; %bb.0: 47; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s4 offset:92 th:TH_ATOMIC_RETURN 48; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 49; GFX12-GISEL-NEXT: ; return to shader part epilog 50 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 92, i32 %soffset, i32 0) 51 ret <2 x half> %ret 52} 53 54define amdgpu_ps <2 x half> @raw_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 55; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2f16_ret: 56; GFX12-SDAG: ; %bb.0: 57; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN 58; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 59; GFX12-SDAG-NEXT: ; return to shader part epilog 60; 61; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2f16_ret: 62; GFX12-GISEL: ; %bb.0: 63; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN 64; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 65; GFX12-GISEL-NEXT: ; return to shader part epilog 66 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 67 ret <2 x half> %ret 68} 69 70define amdgpu_ps float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 71; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_ret: 72; GFX12-SDAG: ; %bb.0: 73; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN 74; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 75; GFX12-SDAG-NEXT: ; return to shader part epilog 76; 77; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_ret: 78; GFX12-GISEL: ; %bb.0: 79; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN 80; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 81; GFX12-GISEL-NEXT: ; return to shader part epilog 82 %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 83 %r = bitcast <2 x half> %orig to float 84 ret float %r 85} 86 87define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 88; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2f16_noret: 89; GFX12-SDAG: ; %bb.0: 90; GFX12-SDAG-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen 91; GFX12-SDAG-NEXT: s_endpgm 92; 93; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2f16_noret: 94; GFX12-GISEL: ; %bb.0: 95; GFX12-GISEL-NEXT: buffer_atomic_pk_add_f16 v0, v[1:2], s[0:3], s4 idxen offen 96; GFX12-GISEL-NEXT: s_endpgm 97 %orig = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 98 ret void 99} 100 101define amdgpu_ps float @struct_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 102; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_ret: 103; GFX12-SDAG: ; %bb.0: 104; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN 105; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0 106; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 107; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 108; GFX12-SDAG-NEXT: flat_store_b32 v[1:2], v0 109; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 110; GFX12-SDAG-NEXT: s_wait_dscnt 0x0 111; GFX12-SDAG-NEXT: ; return to shader part epilog 112; 113; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_ret: 114; GFX12-GISEL: ; %bb.0: 115; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen th:TH_ATOMIC_RETURN 116; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 117; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0 118; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 119; GFX12-GISEL-NEXT: flat_store_b32 v[1:2], v0 120; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 121; GFX12-GISEL-NEXT: s_wait_dscnt 0x0 122; GFX12-GISEL-NEXT: ; return to shader part epilog 123 %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 124 store <2 x bfloat> %orig, ptr null 125 ret float 1.0 126} 127 128define amdgpu_ps void @struct_buffer_atomic_add_v2bf16_noret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 129; GFX12-SDAG-LABEL: struct_buffer_atomic_add_v2bf16_noret: 130; GFX12-SDAG: ; %bb.0: 131; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen 132; GFX12-SDAG-NEXT: s_endpgm 133; 134; GFX12-GISEL-LABEL: struct_buffer_atomic_add_v2bf16_noret: 135; GFX12-GISEL: ; %bb.0: 136; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v[1:2], s[0:3], s4 idxen offen 137; GFX12-GISEL-NEXT: s_endpgm 138 %orig = call <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 139 ret void 140} 141 142define amdgpu_ps void @raw_buffer_atomic_add_v2bf16(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 143; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16: 144; GFX12-SDAG: ; %bb.0: 145; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen 146; GFX12-SDAG-NEXT: s_endpgm 147; 148; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16: 149; GFX12-GISEL: ; %bb.0: 150; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen 151; GFX12-GISEL-NEXT: s_endpgm 152 %ret = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 153 ret void 154} 155 156define amdgpu_ps float @raw_buffer_atomic_add_v2bf16_ret(<2 x bfloat> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 157; GFX12-SDAG-LABEL: raw_buffer_atomic_add_v2bf16_ret: 158; GFX12-SDAG: ; %bb.0: 159; GFX12-SDAG-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN 160; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0 161; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 162; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 163; GFX12-SDAG-NEXT: flat_store_b32 v[1:2], v0 164; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 165; GFX12-SDAG-NEXT: s_wait_dscnt 0x0 166; GFX12-SDAG-NEXT: ; return to shader part epilog 167; 168; GFX12-GISEL-LABEL: raw_buffer_atomic_add_v2bf16_ret: 169; GFX12-GISEL: ; %bb.0: 170; GFX12-GISEL-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_RETURN 171; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 172; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, 0 173; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 174; GFX12-GISEL-NEXT: flat_store_b32 v[1:2], v0 175; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 176; GFX12-GISEL-NEXT: s_wait_dscnt 0x0 177; GFX12-GISEL-NEXT: ; return to shader part epilog 178 %orig = call <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 179 store <2 x bfloat> %orig, ptr null 180 ret float 1.0 181} 182