1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s 3; FIXME: Test with SI when argument lowering not broken for f16 4 5; Natural mapping 6define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 7 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset 8 ; CHECK: bb.1 (%ir-block.0): 9 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 10 ; CHECK-NEXT: {{ $}} 11 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 16 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 17 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 18 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 19 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 20 ; CHECK-NEXT: S_ENDPGM 0 21 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 22 ret void 23} 24 25; Copies for VGPR arguments 26define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(ptr addrspace(8) inreg %rsrc, float inreg %val, i32 inreg %voffset, i32 inreg %soffset) { 27 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset 28 ; CHECK: bb.1 (%ir-block.0): 29 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8 30 ; CHECK-NEXT: {{ $}} 31 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 32 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 33 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 34 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 35 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 36 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7 37 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8 38 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 39 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 40 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 41 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 42 ; CHECK-NEXT: S_ENDPGM 0 43 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 44 ret void 45} 46 47; Waterfall for rsrc 48define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(ptr addrspace(8) %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 49 ; CHECK-LABEL: name: raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset 50 ; CHECK: bb.1 (%ir-block.0): 51 ; CHECK-NEXT: successors: %bb.2(0x80000000) 52 ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 53 ; CHECK-NEXT: {{ $}} 54 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 55 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 56 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 57 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 58 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 59 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 60 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 61 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 62 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 63 ; CHECK-NEXT: {{ $}} 64 ; CHECK-NEXT: bb.2: 65 ; CHECK-NEXT: successors: %bb.3(0x80000000) 66 ; CHECK-NEXT: {{ $}} 67 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 68 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 69 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 70 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 71 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 72 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 73 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 74 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 75 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 76 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 77 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 78 ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 79 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 80 ; CHECK-NEXT: {{ $}} 81 ; CHECK-NEXT: bb.3: 82 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 83 ; CHECK-NEXT: {{ $}} 84 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 85 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 86 ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 87 ; CHECK-NEXT: {{ $}} 88 ; CHECK-NEXT: bb.4: 89 ; CHECK-NEXT: successors: %bb.5(0x80000000) 90 ; CHECK-NEXT: {{ $}} 91 ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 92 ; CHECK-NEXT: {{ $}} 93 ; CHECK-NEXT: bb.5: 94 ; CHECK-NEXT: S_ENDPGM 0 95 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 96 ret void 97} 98 99; Waterfall for soffset 100define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 %soffset) { 101 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset 102 ; CHECK: bb.1 (%ir-block.0): 103 ; CHECK-NEXT: successors: %bb.2(0x80000000) 104 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2 105 ; CHECK-NEXT: {{ $}} 106 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 107 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 108 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 109 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 110 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 111 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 112 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 113 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 114 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 115 ; CHECK-NEXT: {{ $}} 116 ; CHECK-NEXT: bb.2: 117 ; CHECK-NEXT: successors: %bb.3(0x80000000) 118 ; CHECK-NEXT: {{ $}} 119 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 120 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec 121 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]] 122 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[COPY7]], implicit-def $exec, implicit-def $scc, implicit $exec 123 ; CHECK-NEXT: {{ $}} 124 ; CHECK-NEXT: bb.3: 125 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 126 ; CHECK-NEXT: {{ $}} 127 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 128 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 129 ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 130 ; CHECK-NEXT: {{ $}} 131 ; CHECK-NEXT: bb.4: 132 ; CHECK-NEXT: successors: %bb.5(0x80000000) 133 ; CHECK-NEXT: {{ $}} 134 ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 135 ; CHECK-NEXT: {{ $}} 136 ; CHECK-NEXT: bb.5: 137 ; CHECK-NEXT: S_ENDPGM 0 138 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 139 ret void 140} 141 142; Waterfall for rsrc and soffset 143define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(ptr addrspace(8) %rsrc, float %val, i32 %voffset, i32 %soffset) { 144 ; CHECK-LABEL: name: raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset 145 ; CHECK: bb.1 (%ir-block.0): 146 ; CHECK-NEXT: successors: %bb.2(0x80000000) 147 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 148 ; CHECK-NEXT: {{ $}} 149 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 150 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 151 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 152 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 153 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 154 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 155 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 156 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 157 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 158 ; CHECK-NEXT: {{ $}} 159 ; CHECK-NEXT: bb.2: 160 ; CHECK-NEXT: successors: %bb.3(0x80000000) 161 ; CHECK-NEXT: {{ $}} 162 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 163 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 164 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 165 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 166 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 167 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 168 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 169 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 170 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 171 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 172 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 173 ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 174 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 175 ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 176 ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 177 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 178 ; CHECK-NEXT: {{ $}} 179 ; CHECK-NEXT: bb.3: 180 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 181 ; CHECK-NEXT: {{ $}} 182 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 183 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 184 ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 185 ; CHECK-NEXT: {{ $}} 186 ; CHECK-NEXT: bb.4: 187 ; CHECK-NEXT: successors: %bb.5(0x80000000) 188 ; CHECK-NEXT: {{ $}} 189 ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 190 ; CHECK-NEXT: {{ $}} 191 ; CHECK-NEXT: bb.5: 192 ; CHECK-NEXT: S_ENDPGM 0 193 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 194 ret void 195} 196 197define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 198 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc 199 ; CHECK: bb.1 (%ir-block.0): 200 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 201 ; CHECK-NEXT: {{ $}} 202 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 203 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 204 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 205 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 206 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 207 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 208 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 209 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 210 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 211 ; CHECK-NEXT: S_ENDPGM 0 212 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 1) 213 ret void 214} 215 216define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 217 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc 218 ; CHECK: bb.1 (%ir-block.0): 219 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 220 ; CHECK-NEXT: {{ $}} 221 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 222 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 223 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 224 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 225 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 226 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 227 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 228 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 229 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 230 ; CHECK-NEXT: S_ENDPGM 0 231 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) 232 ret void 233} 234 235define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 236 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc 237 ; CHECK: bb.1 (%ir-block.0): 238 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 239 ; CHECK-NEXT: {{ $}} 240 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 241 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 242 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 243 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 244 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 245 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 246 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 247 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 248 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 249 ; CHECK-NEXT: S_ENDPGM 0 250 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 3) 251 ret void 252} 253 254define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 255 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc 256 ; CHECK: bb.1 (%ir-block.0): 257 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 258 ; CHECK-NEXT: {{ $}} 259 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 260 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 261 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 262 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 263 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 264 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 265 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 266 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 267 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 268 ; CHECK-NEXT: S_ENDPGM 0 269 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 4) 270 ret void 271} 272 273define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 274 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc 275 ; CHECK: bb.1 (%ir-block.0): 276 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 277 ; CHECK-NEXT: {{ $}} 278 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 279 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 280 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 281 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 282 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 283 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 284 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 285 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 286 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 287 ; CHECK-NEXT: S_ENDPGM 0 288 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 6) 289 ret void 290} 291 292define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 293 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc 294 ; CHECK: bb.1 (%ir-block.0): 295 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 296 ; CHECK-NEXT: {{ $}} 297 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 298 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 299 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 300 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 301 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 302 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 303 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 304 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 305 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 306 ; CHECK-NEXT: S_ENDPGM 0 307 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 5) 308 ret void 309} 310 311define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 312 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc 313 ; CHECK: bb.1 (%ir-block.0): 314 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 315 ; CHECK-NEXT: {{ $}} 316 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 317 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 318 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 319 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 320 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 321 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 322 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 323 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 324 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 325 ; CHECK-NEXT: S_ENDPGM 0 326 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 7) 327 ret void 328} 329 330define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_volatile(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 331 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_volatile 332 ; CHECK: bb.1 (%ir-block.0): 333 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 334 ; CHECK-NEXT: {{ $}} 335 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 336 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 337 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 338 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 339 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 340 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 341 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 342 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 343 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (volatile dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 344 ; CHECK-NEXT: S_ENDPGM 0 345 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 -2147483648) 346 ret void 347} 348 349define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(ptr addrspace(8) inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 350 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 351 ; CHECK: bb.1 (%ir-block.0): 352 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 353 ; CHECK-NEXT: {{ $}} 354 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 355 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 356 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 357 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 358 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 359 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 360 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 361 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 362 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 363 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 364 ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>) into %ir.rsrc, align 1, addrspace 8) 365 ; CHECK-NEXT: S_ENDPGM 0 366 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 367 ret void 368} 369 370define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(ptr addrspace(8) inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) { 371 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 372 ; CHECK: bb.1 (%ir-block.0): 373 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 374 ; CHECK-NEXT: {{ $}} 375 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 376 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 377 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 378 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 379 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 380 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 381 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 382 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 383 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 384 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 385 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 386 ; CHECK-NEXT: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>) into %ir.rsrc, align 1, addrspace 8) 387 ; CHECK-NEXT: S_ENDPGM 0 388 call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 389 ret void 390} 391 392define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(ptr addrspace(8) inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 393 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 394 ; CHECK: bb.1 (%ir-block.0): 395 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 396 ; CHECK-NEXT: {{ $}} 397 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 398 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 399 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 400 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 401 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 402 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 403 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 404 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 405 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 406 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 407 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 408 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 409 ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>) into %ir.rsrc, align 1, addrspace 8) 410 ; CHECK-NEXT: S_ENDPGM 0 411 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 412 ret void 413} 414 415define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8(ptr addrspace(8) inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) { 416 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8 417 ; CHECK: bb.1 (%ir-block.0): 418 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 419 ; CHECK-NEXT: {{ $}} 420 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 421 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 422 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 423 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 424 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 425 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 426 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 427 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 428 ; CHECK-NEXT: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8) into %ir.rsrc, addrspace 8) 429 ; CHECK-NEXT: S_ENDPGM 0 430 %val.trunc = trunc i32 %val to i8 431 call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %val.trunc, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 432 ret void 433} 434 435define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16(ptr addrspace(8) inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) { 436 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16 437 ; CHECK: bb.1 (%ir-block.0): 438 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 439 ; CHECK-NEXT: {{ $}} 440 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 441 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 442 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 443 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 444 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 445 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 446 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 447 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 448 ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) 449 ; CHECK-NEXT: S_ENDPGM 0 450 %val.trunc = trunc i32 %val to i16 451 call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %val.trunc, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 452 ret void 453} 454 455define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(ptr addrspace(8) inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) { 456 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16 457 ; CHECK: bb.1 (%ir-block.0): 458 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 459 ; CHECK-NEXT: {{ $}} 460 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 461 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 462 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 463 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 464 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 465 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 466 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 467 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 468 ; CHECK-NEXT: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16) into %ir.rsrc, align 1, addrspace 8) 469 ; CHECK-NEXT: S_ENDPGM 0 470 call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 471 ret void 472} 473 474define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(ptr addrspace(8) inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 475 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16 476 ; CHECK: bb.1 (%ir-block.0): 477 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 478 ; CHECK-NEXT: {{ $}} 479 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 480 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 481 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 482 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 483 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 484 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 485 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 486 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 487 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) 488 ; CHECK-NEXT: S_ENDPGM 0 489 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 490 ret void 491} 492 493define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(ptr addrspace(8) inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { 494 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 495 ; CHECK: bb.1 (%ir-block.0): 496 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 497 ; CHECK-NEXT: {{ $}} 498 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 499 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 500 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 501 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 502 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 503 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 504 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 505 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 506 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 507 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 508 ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) 509 ; CHECK-NEXT: S_ENDPGM 0 510 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 511 ret void 512} 513 514define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(ptr addrspace(8) %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) { 515 ; CHECK-LABEL: name: raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16 516 ; CHECK: bb.1 (%ir-block.0): 517 ; CHECK-NEXT: successors: %bb.2(0x80000000) 518 ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 519 ; CHECK-NEXT: {{ $}} 520 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 521 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 522 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 523 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 524 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 525 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 526 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 527 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 528 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2 529 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 530 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 531 ; CHECK-NEXT: {{ $}} 532 ; CHECK-NEXT: bb.2: 533 ; CHECK-NEXT: successors: %bb.3(0x80000000) 534 ; CHECK-NEXT: {{ $}} 535 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 536 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 537 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 538 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 539 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 540 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 541 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 542 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 543 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 544 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 545 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec 546 ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 547 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 548 ; CHECK-NEXT: {{ $}} 549 ; CHECK-NEXT: bb.3: 550 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 551 ; CHECK-NEXT: {{ $}} 552 ; CHECK-NEXT: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE2]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>) into %ir.rsrc, align 1, addrspace 8) 553 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 554 ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 555 ; CHECK-NEXT: {{ $}} 556 ; CHECK-NEXT: bb.4: 557 ; CHECK-NEXT: successors: %bb.5(0x80000000) 558 ; CHECK-NEXT: {{ $}} 559 ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 560 ; CHECK-NEXT: {{ $}} 561 ; CHECK-NEXT: bb.5: 562 ; CHECK-NEXT: S_ENDPGM 0 563 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 564 ret void 565} 566 567define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095(ptr addrspace(8) inreg %rsrc, float %val, i32 inreg %soffset) { 568 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095 569 ; CHECK: bb.1 (%ir-block.0): 570 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 571 ; CHECK-NEXT: {{ $}} 572 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 573 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 574 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 575 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 576 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 577 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 578 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 579 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 580 ; CHECK-NEXT: S_ENDPGM 0 581 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) 582 ret void 583} 584 585define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096(ptr addrspace(8) inreg %rsrc, float %val, i32 inreg %soffset) { 586 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096 587 ; CHECK: bb.1 (%ir-block.0): 588 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 589 ; CHECK-NEXT: {{ $}} 590 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 591 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 592 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 593 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 594 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 595 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 596 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 597 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 598 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 599 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 600 ; CHECK-NEXT: S_ENDPGM 0 601 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 4096, i32 %soffset, i32 0) 602 ret void 603} 604 605define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 606 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16 607 ; CHECK: bb.1 (%ir-block.0): 608 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 609 ; CHECK-NEXT: {{ $}} 610 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 611 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 612 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 613 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 614 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 615 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 616 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 617 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 618 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 619 ; CHECK-NEXT: S_ENDPGM 0 620 %voffset.add = add i32 %voffset, 16 621 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 622 ret void 623} 624 625define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 626 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095 627 ; CHECK: bb.1 (%ir-block.0): 628 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 629 ; CHECK-NEXT: {{ $}} 630 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 631 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 632 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 633 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 634 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 635 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 636 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 637 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 638 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 639 ; CHECK-NEXT: S_ENDPGM 0 640 %voffset.add = add i32 %voffset, 4095 641 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 642 ret void 643} 644 645define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096(ptr addrspace(8) inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 646 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096 647 ; CHECK: bb.1 (%ir-block.0): 648 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 649 ; CHECK-NEXT: {{ $}} 650 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 651 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 652 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 653 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 654 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 655 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 656 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 657 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 658 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 659 ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec 660 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 661 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 662 ; CHECK-NEXT: S_ENDPGM 0 663 %voffset.add = add i32 %voffset, 4096 664 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 665 ret void 666} 667 668define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(ptr addrspace(8) inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 669 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095 670 ; CHECK: bb.1 (%ir-block.0): 671 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 672 ; CHECK-NEXT: {{ $}} 673 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 674 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 675 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 676 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 677 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 678 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 679 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 680 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 681 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) 682 ; CHECK-NEXT: S_ENDPGM 0 683 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4095, i32 0) 684 ret void 685} 686 687define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(ptr addrspace(8) inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 688 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096 689 ; CHECK: bb.1 (%ir-block.0): 690 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 691 ; CHECK-NEXT: {{ $}} 692 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 693 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 694 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 695 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 696 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 697 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 698 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 699 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 700 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) 701 ; CHECK-NEXT: S_ENDPGM 0 702 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 4096, i32 0) 703 ret void 704} 705 706define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16(ptr addrspace(8) inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 707 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16 708 ; CHECK: bb.1 (%ir-block.0): 709 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 710 ; CHECK-NEXT: {{ $}} 711 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 712 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 713 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 714 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 715 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 716 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 717 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 718 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 719 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) 720 ; CHECK-NEXT: S_ENDPGM 0 721 %voffset.add = add i32 %voffset, 16 722 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 723 ret void 724} 725 726define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095(ptr addrspace(8) inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 727 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095 728 ; CHECK: bb.1 (%ir-block.0): 729 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 730 ; CHECK-NEXT: {{ $}} 731 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 732 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 733 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 734 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 735 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 736 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 737 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 738 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 739 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) 740 ; CHECK-NEXT: S_ENDPGM 0 741 %voffset.add = add i32 %voffset, 4095 742 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 743 ret void 744} 745 746define amdgpu_ps void @raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096(ptr addrspace(8) inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) { 747 ; CHECK-LABEL: name: raw_ptr_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096 748 ; CHECK: bb.1 (%ir-block.0): 749 ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 750 ; CHECK-NEXT: {{ $}} 751 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 752 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 753 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 754 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 755 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 756 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 757 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 758 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 759 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 760 ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec 761 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 762 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>) into %ir.rsrc, align 1, addrspace 8) 763 ; CHECK-NEXT: S_ENDPGM 0 764 %voffset.add = add i32 %voffset, 4096 765 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 766 ret void 767} 768 769; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 770define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000(ptr addrspace(8) %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 771 ; CHECK-LABEL: name: raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000 772 ; CHECK: bb.1 (%ir-block.0): 773 ; CHECK-NEXT: successors: %bb.2(0x80000000) 774 ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 775 ; CHECK-NEXT: {{ $}} 776 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 777 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 778 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 779 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 780 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 781 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 782 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2 783 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 784 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 785 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 786 ; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec 787 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 788 ; CHECK-NEXT: {{ $}} 789 ; CHECK-NEXT: bb.2: 790 ; CHECK-NEXT: successors: %bb.3(0x80000000) 791 ; CHECK-NEXT: {{ $}} 792 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 793 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 794 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 795 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 796 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 797 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 798 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 799 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 800 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 801 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 802 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec 803 ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 804 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 805 ; CHECK-NEXT: {{ $}} 806 ; CHECK-NEXT: bb.3: 807 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 808 ; CHECK-NEXT: {{ $}} 809 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE1]], [[COPY6]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 810 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 811 ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 812 ; CHECK-NEXT: {{ $}} 813 ; CHECK-NEXT: bb.4: 814 ; CHECK-NEXT: successors: %bb.5(0x80000000) 815 ; CHECK-NEXT: {{ $}} 816 ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 817 ; CHECK-NEXT: {{ $}} 818 ; CHECK-NEXT: bb.5: 819 ; CHECK-NEXT: S_ENDPGM 0 820 %voffset.add = add i32 %voffset, 5000 821 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 822 ret void 823} 824 825; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop. 826define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset(ptr addrspace(8) %rsrc, float %val, i32 inreg %soffset) { 827 ; CHECK-LABEL: name: raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset 828 ; CHECK: bb.1 (%ir-block.0): 829 ; CHECK-NEXT: successors: %bb.2(0x80000000) 830 ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 831 ; CHECK-NEXT: {{ $}} 832 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 833 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 834 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 835 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 836 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 837 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2 838 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 839 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 840 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 841 ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 842 ; CHECK-NEXT: {{ $}} 843 ; CHECK-NEXT: bb.2: 844 ; CHECK-NEXT: successors: %bb.3(0x80000000) 845 ; CHECK-NEXT: {{ $}} 846 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 847 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 848 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 849 ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 850 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 851 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 852 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 853 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 854 ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 855 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 856 ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 857 ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 858 ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 859 ; CHECK-NEXT: {{ $}} 860 ; CHECK-NEXT: bb.3: 861 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 862 ; CHECK-NEXT: {{ $}} 863 ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY5]], 904, 0, 0, implicit $exec :: (dereferenceable store (s32) into %ir.rsrc, align 1, addrspace 8) 864 ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 865 ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 866 ; CHECK-NEXT: {{ $}} 867 ; CHECK-NEXT: bb.4: 868 ; CHECK-NEXT: successors: %bb.5(0x80000000) 869 ; CHECK-NEXT: {{ $}} 870 ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 871 ; CHECK-NEXT: {{ $}} 872 ; CHECK-NEXT: bb.5: 873 ; CHECK-NEXT: S_ENDPGM 0 874 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 5000, i32 %soffset, i32 0) 875 ret void 876} 877 878declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32 immarg) 879declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32 immarg) 880 881declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32 immarg) 882declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) 883declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg) 884 885declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) 886declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg) 887declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg) 888declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg) 889