1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=GFX12 %s 5 6define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) { 7 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32 8 ; GFX8: bb.1 (%ir-block.0): 9 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 10 ; GFX8-NEXT: {{ $}} 11 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 16 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 17 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 18 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 19 ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 20 ; GFX8-NEXT: S_ENDPGM 0 21 ; 22 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32 23 ; GFX12: bb.1 (%ir-block.0): 24 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 25 ; GFX12-NEXT: {{ $}} 26 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 27 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 28 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 29 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 30 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 31 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 32 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 33 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 34 ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 35 ; GFX12-NEXT: S_ENDPGM 0 36 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 37 ret void 38} 39 40define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) { 41 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32 42 ; GFX8: bb.1 (%ir-block.0): 43 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 44 ; GFX8-NEXT: {{ $}} 45 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 46 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 47 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 48 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 49 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 50 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 51 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 52 ; GFX8-NEXT: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 53 ; GFX8-NEXT: S_ENDPGM 0 54 ; 55 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32 56 ; GFX12: bb.1 (%ir-block.0): 57 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 58 ; GFX12-NEXT: {{ $}} 59 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 60 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 61 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 62 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 63 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 64 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 65 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 66 ; GFX12-NEXT: BUFFER_STORE_FORMAT_X_VBUFFER_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 67 ; GFX12-NEXT: S_ENDPGM 0 68 call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 69 ret void 70} 71 72define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 73 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 74 ; GFX8: bb.1 (%ir-block.0): 75 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 76 ; GFX8-NEXT: {{ $}} 77 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 78 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 79 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 80 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 81 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 82 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 83 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 84 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 85 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 86 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 87 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 88 ; GFX8-NEXT: S_ENDPGM 0 89 ; 90 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32 91 ; GFX12: bb.1 (%ir-block.0): 92 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 93 ; GFX12-NEXT: {{ $}} 94 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 95 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 96 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 97 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 98 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 99 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 100 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 101 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 102 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 103 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 104 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 105 ; GFX12-NEXT: S_ENDPGM 0 106 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 107 ret void 108} 109 110define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) { 111 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 112 ; GFX8: bb.1 (%ir-block.0): 113 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 114 ; GFX8-NEXT: {{ $}} 115 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 116 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 117 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 118 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 119 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 120 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 121 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 122 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 123 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 124 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 125 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 126 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) 127 ; GFX8-NEXT: S_ENDPGM 0 128 ; 129 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32 130 ; GFX12: bb.1 (%ir-block.0): 131 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 132 ; GFX12-NEXT: {{ $}} 133 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 134 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 135 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 136 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 137 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 138 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 139 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 140 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 141 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2 142 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 143 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 144 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZ_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) 145 ; GFX12-NEXT: S_ENDPGM 0 146 call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 147 ret void 148} 149 150define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 151 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 152 ; GFX8: bb.1 (%ir-block.0): 153 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 154 ; GFX8-NEXT: {{ $}} 155 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 156 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 157 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 158 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 159 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 160 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 161 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 162 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 163 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 164 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 165 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 166 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 167 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 168 ; GFX8-NEXT: S_ENDPGM 0 169 ; 170 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 171 ; GFX12: bb.1 (%ir-block.0): 172 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 173 ; GFX12-NEXT: {{ $}} 174 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 175 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 176 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 177 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 178 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 179 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 180 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 181 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 182 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 183 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 184 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 185 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 186 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 187 ; GFX12-NEXT: S_ENDPGM 0 188 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 189 ret void 190} 191 192define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 193 ; GFX8-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 194 ; GFX8: bb.1 (%ir-block.0): 195 ; GFX8-NEXT: successors: %bb.2(0x80000000) 196 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 197 ; GFX8-NEXT: {{ $}} 198 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 199 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 200 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 201 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 202 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 203 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 204 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 205 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 206 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 207 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 208 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 209 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 210 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 211 ; GFX8-NEXT: {{ $}} 212 ; GFX8-NEXT: bb.2: 213 ; GFX8-NEXT: successors: %bb.3(0x80000000) 214 ; GFX8-NEXT: {{ $}} 215 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 216 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 217 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 218 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 219 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 220 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 221 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 222 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 223 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 224 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec 225 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 226 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 227 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 228 ; GFX8-NEXT: {{ $}} 229 ; GFX8-NEXT: bb.3: 230 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 231 ; GFX8-NEXT: {{ $}} 232 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 233 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 234 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 235 ; GFX8-NEXT: {{ $}} 236 ; GFX8-NEXT: bb.4: 237 ; GFX8-NEXT: successors: %bb.5(0x80000000) 238 ; GFX8-NEXT: {{ $}} 239 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 240 ; GFX8-NEXT: {{ $}} 241 ; GFX8-NEXT: bb.5: 242 ; GFX8-NEXT: S_ENDPGM 0 243 ; 244 ; GFX12-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32 245 ; GFX12: bb.1 (%ir-block.0): 246 ; GFX12-NEXT: successors: %bb.2(0x80000000) 247 ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 248 ; GFX12-NEXT: {{ $}} 249 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 250 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 251 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 252 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 253 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 254 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 255 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 256 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 257 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 258 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 259 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 260 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 261 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 262 ; GFX12-NEXT: {{ $}} 263 ; GFX12-NEXT: bb.2: 264 ; GFX12-NEXT: successors: %bb.3(0x80000000) 265 ; GFX12-NEXT: {{ $}} 266 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 267 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 268 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 269 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 270 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 271 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 272 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 273 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 274 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 275 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec 276 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 277 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 278 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec 279 ; GFX12-NEXT: {{ $}} 280 ; GFX12-NEXT: bb.3: 281 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 282 ; GFX12-NEXT: {{ $}} 283 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 284 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 285 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 286 ; GFX12-NEXT: {{ $}} 287 ; GFX12-NEXT: bb.4: 288 ; GFX12-NEXT: successors: %bb.5(0x80000000) 289 ; GFX12-NEXT: {{ $}} 290 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 291 ; GFX12-NEXT: {{ $}} 292 ; GFX12-NEXT: bb.5: 293 ; GFX12-NEXT: S_ENDPGM 0 294 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 295 ret void 296} 297 298define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 299 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095 300 ; GFX8: bb.1 (%ir-block.0): 301 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 302 ; GFX8-NEXT: {{ $}} 303 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 304 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 305 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 306 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 307 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 308 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 309 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 310 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 311 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 312 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 313 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 314 ; GFX8-NEXT: S_ENDPGM 0 315 ; 316 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095 317 ; GFX12: bb.1 (%ir-block.0): 318 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 319 ; GFX12-NEXT: {{ $}} 320 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 321 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 322 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 323 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 324 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 325 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 326 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 327 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 328 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 329 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095 330 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 331 ; GFX12-NEXT: S_ENDPGM 0 332 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0) 333 ret void 334} 335 336define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 337 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096 338 ; GFX8: bb.1 (%ir-block.0): 339 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 340 ; GFX8-NEXT: {{ $}} 341 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 342 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 343 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 344 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 345 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 346 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 347 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 348 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 349 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 350 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 351 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 352 ; GFX8-NEXT: S_ENDPGM 0 353 ; 354 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096 355 ; GFX12: bb.1 (%ir-block.0): 356 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 357 ; GFX12-NEXT: {{ $}} 358 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 359 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 360 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 361 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 362 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 363 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 364 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 365 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 366 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 367 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 368 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 369 ; GFX12-NEXT: S_ENDPGM 0 370 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0) 371 ret void 372} 373 374define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 375 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16 376 ; GFX8: bb.1 (%ir-block.0): 377 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 378 ; GFX8-NEXT: {{ $}} 379 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 380 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 381 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 382 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 383 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 384 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 385 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 386 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 387 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 388 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 389 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 390 ; GFX8-NEXT: S_ENDPGM 0 391 ; 392 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16 393 ; GFX12: bb.1 (%ir-block.0): 394 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 395 ; GFX12-NEXT: {{ $}} 396 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 397 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 398 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 399 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 400 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 401 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 402 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 403 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 404 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 405 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 406 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 407 ; GFX12-NEXT: S_ENDPGM 0 408 %voffset.add = add i32 %voffset, 16 409 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 410 ret void 411} 412 413define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 414 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095 415 ; GFX8: bb.1 (%ir-block.0): 416 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 417 ; GFX8-NEXT: {{ $}} 418 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 419 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 420 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 421 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 422 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 423 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 424 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 425 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 426 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 427 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 428 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 429 ; GFX8-NEXT: S_ENDPGM 0 430 ; 431 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095 432 ; GFX12: bb.1 (%ir-block.0): 433 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 434 ; GFX12-NEXT: {{ $}} 435 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 436 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 437 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 438 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 439 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 440 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 441 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 442 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 443 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 444 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 445 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 446 ; GFX12-NEXT: S_ENDPGM 0 447 %voffset.add = add i32 %voffset, 4095 448 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 449 ret void 450} 451 452define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) { 453 ; GFX8-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096 454 ; GFX8: bb.1 (%ir-block.0): 455 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 456 ; GFX8-NEXT: {{ $}} 457 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 458 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 459 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 460 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 461 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 462 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 463 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 464 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 465 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 466 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 467 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 468 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 469 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec 470 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 471 ; GFX8-NEXT: S_ENDPGM 0 472 ; 473 ; GFX12-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096 474 ; GFX12: bb.1 (%ir-block.0): 475 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 476 ; GFX12-NEXT: {{ $}} 477 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 478 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 479 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 480 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 481 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 482 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 483 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 484 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 485 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 486 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 487 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XY_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 488 ; GFX12-NEXT: S_ENDPGM 0 489 %voffset.add = add i32 %voffset, 4096 490 call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 491 ret void 492} 493 494 495; Check what happens with offset add inside a waterfall loop 496define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) { 497 ; GFX8-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096 498 ; GFX8: bb.1 (%ir-block.0): 499 ; GFX8-NEXT: successors: %bb.2(0x80000000) 500 ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 501 ; GFX8-NEXT: {{ $}} 502 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 503 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 504 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 505 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 506 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 507 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 508 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 509 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 510 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 511 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 512 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 513 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 514 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 515 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 516 ; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec 517 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 518 ; GFX8-NEXT: {{ $}} 519 ; GFX8-NEXT: bb.2: 520 ; GFX8-NEXT: successors: %bb.3(0x80000000) 521 ; GFX8-NEXT: {{ $}} 522 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 523 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 524 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 525 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 526 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 527 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 528 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 529 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 530 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 531 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 532 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 533 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 534 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 535 ; GFX8-NEXT: {{ $}} 536 ; GFX8-NEXT: bb.3: 537 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 538 ; GFX8-NEXT: {{ $}} 539 ; GFX8-NEXT: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 540 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 541 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 542 ; GFX8-NEXT: {{ $}} 543 ; GFX8-NEXT: bb.4: 544 ; GFX8-NEXT: successors: %bb.5(0x80000000) 545 ; GFX8-NEXT: {{ $}} 546 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 547 ; GFX8-NEXT: {{ $}} 548 ; GFX8-NEXT: bb.5: 549 ; GFX8-NEXT: S_ENDPGM 0 550 ; 551 ; GFX12-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096 552 ; GFX12: bb.1 (%ir-block.0): 553 ; GFX12-NEXT: successors: %bb.2(0x80000000) 554 ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 555 ; GFX12-NEXT: {{ $}} 556 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 557 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 558 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 559 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 560 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 561 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 562 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5 563 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6 564 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7 565 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 566 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8 567 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2 568 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 569 ; GFX12-NEXT: {{ $}} 570 ; GFX12-NEXT: bb.2: 571 ; GFX12-NEXT: successors: %bb.3(0x80000000) 572 ; GFX12-NEXT: {{ $}} 573 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 574 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 575 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 576 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 577 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 578 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 579 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 580 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 581 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 582 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec 583 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 584 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 585 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec 586 ; GFX12-NEXT: {{ $}} 587 ; GFX12-NEXT: bb.3: 588 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 589 ; GFX12-NEXT: {{ $}} 590 ; GFX12-NEXT: BUFFER_STORE_FORMAT_XYZW_VBUFFER_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4096, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 591 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 592 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 593 ; GFX12-NEXT: {{ $}} 594 ; GFX12-NEXT: bb.4: 595 ; GFX12-NEXT: successors: %bb.5(0x80000000) 596 ; GFX12-NEXT: {{ $}} 597 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 598 ; GFX12-NEXT: {{ $}} 599 ; GFX12-NEXT: bb.5: 600 ; GFX12-NEXT: S_ENDPGM 0 601 %voffset.add = add i32 %voffset, 4096 602 call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) 603 ret void 604} 605 606declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg) 607declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg) 608declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg) 609declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) 610