1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s 4 5; Natural mapping 6define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 7 ; GFX8-LABEL: name: struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 8 ; GFX8: bb.1 (%ir-block.0): 9 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 10 ; GFX8-NEXT: {{ $}} 11 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 12 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 13 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 14 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 15 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 16 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 17 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 18 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 19 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 20 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 21 ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 22 ; GFX8-NEXT: S_ENDPGM 0 23 ; 24 ; GFX12-LABEL: name: struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 25 ; GFX12: bb.1 (%ir-block.0): 26 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 27 ; GFX12-NEXT: {{ $}} 28 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 29 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 30 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 31 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 32 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 33 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 34 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 35 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 36 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 37 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 38 ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 39 ; GFX12-NEXT: S_ENDPGM 0 40 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 41 ret void 42} 43 44define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<2 x float> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 45 ; GFX8-LABEL: name: struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 46 ; GFX8: bb.1 (%ir-block.0): 47 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 48 ; GFX8-NEXT: {{ $}} 49 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 50 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 51 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 52 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 53 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 54 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 55 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 56 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 57 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 58 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 59 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 60 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 61 ; GFX8-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 62 ; GFX8-NEXT: S_ENDPGM 0 63 ; 64 ; GFX12-LABEL: name: struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 65 ; GFX12: bb.1 (%ir-block.0): 66 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 67 ; GFX12-NEXT: {{ $}} 68 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 69 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 70 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 71 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 72 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 73 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 74 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 75 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 76 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 77 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 78 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 79 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 80 ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), align 1, addrspace 8) 81 ; GFX12-NEXT: S_ENDPGM 0 82 call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 83 ret void 84} 85 86define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<3 x float> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 87 ; GFX8-LABEL: name: struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 88 ; GFX8: bb.1 (%ir-block.0): 89 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 90 ; GFX8-NEXT: {{ $}} 91 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 92 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 93 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 94 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 95 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 96 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 97 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 98 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 99 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 100 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 101 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 102 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 103 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 104 ; GFX8-NEXT: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) 105 ; GFX8-NEXT: S_ENDPGM 0 106 ; 107 ; GFX12-LABEL: name: struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 108 ; GFX12: bb.1 (%ir-block.0): 109 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 110 ; GFX12-NEXT: {{ $}} 111 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 112 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 113 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 114 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2 115 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 116 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 117 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 118 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr5 119 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 120 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 121 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 122 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 123 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 124 ; GFX12-NEXT: BUFFER_STORE_DWORDX3_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, implicit $exec :: (dereferenceable store (<3 x s32>), align 1, addrspace 8) 125 ; GFX12-NEXT: S_ENDPGM 0 126 call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 127 ret void 128} 129 130define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x float> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 131 ; GFX8-LABEL: name: struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 132 ; GFX8: bb.1 (%ir-block.0): 133 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 134 ; GFX8-NEXT: {{ $}} 135 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 136 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 137 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 138 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 139 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 140 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 141 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 142 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 143 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 144 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 145 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 146 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 147 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 148 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 149 ; GFX8-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 150 ; GFX8-NEXT: S_ENDPGM 0 151 ; 152 ; GFX12-LABEL: name: struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 153 ; GFX12: bb.1 (%ir-block.0): 154 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 155 ; GFX12-NEXT: {{ $}} 156 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 157 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 158 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 159 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 160 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 161 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 162 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 163 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 164 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 165 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 166 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 167 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5 168 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6 169 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 170 ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 171 ; GFX12-NEXT: S_ENDPGM 0 172 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 173 ret void 174} 175 176define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__vgpr_soffset(<4 x float> inreg %val, <4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { 177 ; GFX8-LABEL: name: struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__vgpr_soffset 178 ; GFX8: bb.1 (%ir-block.0): 179 ; GFX8-NEXT: successors: %bb.2(0x80000000) 180 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 181 ; GFX8-NEXT: {{ $}} 182 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 183 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 184 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 185 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 186 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 187 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 188 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 189 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 190 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 191 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 192 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 193 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 194 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 195 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] 196 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 197 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 198 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 199 ; GFX8-NEXT: {{ $}} 200 ; GFX8-NEXT: bb.2: 201 ; GFX8-NEXT: successors: %bb.3(0x80000000) 202 ; GFX8-NEXT: {{ $}} 203 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 204 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 205 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 206 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 207 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 208 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 209 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 210 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 211 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 212 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 213 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec 214 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 215 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 216 ; GFX8-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec 217 ; GFX8-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 218 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 219 ; GFX8-NEXT: {{ $}} 220 ; GFX8-NEXT: bb.3: 221 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 222 ; GFX8-NEXT: {{ $}} 223 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 224 ; GFX8-NEXT: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 225 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 226 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 227 ; GFX8-NEXT: {{ $}} 228 ; GFX8-NEXT: bb.4: 229 ; GFX8-NEXT: successors: %bb.5(0x80000000) 230 ; GFX8-NEXT: {{ $}} 231 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 232 ; GFX8-NEXT: {{ $}} 233 ; GFX8-NEXT: bb.5: 234 ; GFX8-NEXT: S_ENDPGM 0 235 ; 236 ; GFX12-LABEL: name: struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__vgpr_soffset 237 ; GFX12: bb.1 (%ir-block.0): 238 ; GFX12-NEXT: successors: %bb.2(0x80000000) 239 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 240 ; GFX12-NEXT: {{ $}} 241 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 242 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 243 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 244 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 245 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 246 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 247 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 248 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 249 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 250 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 251 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 252 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr7 253 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr4 254 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE]] 255 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 256 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] 257 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 258 ; GFX12-NEXT: {{ $}} 259 ; GFX12-NEXT: bb.2: 260 ; GFX12-NEXT: successors: %bb.3(0x80000000) 261 ; GFX12-NEXT: {{ $}} 262 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 263 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 264 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 265 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 266 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 267 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 268 ; GFX12-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 269 ; GFX12-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 270 ; GFX12-NEXT: [[COPY17:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 271 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 272 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY17]], [[COPY15]], implicit $exec 273 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 274 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 275 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec 276 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 277 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 278 ; GFX12-NEXT: {{ $}} 279 ; GFX12-NEXT: bb.3: 280 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 281 ; GFX12-NEXT: {{ $}} 282 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1 283 ; GFX12-NEXT: BUFFER_STORE_DWORDX4_VBUFFER_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s32>), align 1, addrspace 8) 284 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 285 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 286 ; GFX12-NEXT: {{ $}} 287 ; GFX12-NEXT: bb.4: 288 ; GFX12-NEXT: successors: %bb.5(0x80000000) 289 ; GFX12-NEXT: {{ $}} 290 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 291 ; GFX12-NEXT: {{ $}} 292 ; GFX12-NEXT: bb.5: 293 ; GFX12-NEXT: S_ENDPGM 0 294 call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 295 ret void 296} 297 298define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 299 ; GFX8-LABEL: name: struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 300 ; GFX8: bb.1 (%ir-block.0): 301 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 302 ; GFX8-NEXT: {{ $}} 303 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 304 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 305 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 306 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 307 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 308 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 309 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 310 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 311 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 312 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 313 ; GFX8-NEXT: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) 314 ; GFX8-NEXT: S_ENDPGM 0 315 ; 316 ; GFX12-LABEL: name: struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 317 ; GFX12: bb.1 (%ir-block.0): 318 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 319 ; GFX12-NEXT: {{ $}} 320 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 321 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 322 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 323 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 324 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 325 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 326 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 327 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 328 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 329 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 330 ; GFX12-NEXT: BUFFER_STORE_BYTE_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s8), addrspace 8) 331 ; GFX12-NEXT: S_ENDPGM 0 332 %val.trunc = trunc i32 %val to i8 333 call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 334 ret void 335} 336 337define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(i32 %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 338 ; GFX8-LABEL: name: struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 339 ; GFX8: bb.1 (%ir-block.0): 340 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 341 ; GFX8-NEXT: {{ $}} 342 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 343 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 344 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 345 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 346 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 347 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 348 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 349 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 350 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 351 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 352 ; GFX8-NEXT: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) 353 ; GFX8-NEXT: S_ENDPGM 0 354 ; 355 ; GFX12-LABEL: name: struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 356 ; GFX12: bb.1 (%ir-block.0): 357 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 358 ; GFX12-NEXT: {{ $}} 359 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 360 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 361 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 362 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 363 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 364 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 365 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 366 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 367 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 368 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 369 ; GFX12-NEXT: BUFFER_STORE_SHORT_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (s16), align 1, addrspace 8) 370 ; GFX12-NEXT: S_ENDPGM 0 371 %val.trunc = trunc i32 %val to i16 372 call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 373 ret void 374} 375 376define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 377 ; GFX8-LABEL: name: struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc 378 ; GFX8: bb.1 (%ir-block.0): 379 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 380 ; GFX8-NEXT: {{ $}} 381 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 382 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 383 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 384 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 385 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 386 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 387 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 388 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 389 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 390 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 391 ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 392 ; GFX8-NEXT: S_ENDPGM 0 393 ; 394 ; GFX12-LABEL: name: struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc 395 ; GFX12: bb.1 (%ir-block.0): 396 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 397 ; GFX12-NEXT: {{ $}} 398 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 399 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 400 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 401 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 402 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 403 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 404 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 405 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 406 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 407 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 408 ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8) 409 ; GFX12-NEXT: S_ENDPGM 0 410 call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) 411 ret void 412} 413 414define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 415 ; GFX8-LABEL: name: struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 416 ; GFX8: bb.1 (%ir-block.0): 417 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 418 ; GFX8-NEXT: {{ $}} 419 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 420 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 421 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 422 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 423 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 424 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 425 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 426 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 427 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 428 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 429 ; GFX8-NEXT: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) 430 ; GFX8-NEXT: S_ENDPGM 0 431 ; 432 ; GFX12-LABEL: name: struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 433 ; GFX12: bb.1 (%ir-block.0): 434 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 435 ; GFX12-NEXT: {{ $}} 436 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 437 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 438 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 439 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 440 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 441 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 442 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 443 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 444 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 445 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 446 ; GFX12-NEXT: BUFFER_STORE_DWORD_VBUFFER_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s16>), align 1, addrspace 8) 447 ; GFX12-NEXT: S_ENDPGM 0 448 call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 449 ret void 450} 451 452; FIXME: 453; define amdgpu_ps void @struct_buffer_store_v3f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<3 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 454; call void @llvm.amdgcn.struct.buffer.store.v3f16(<3 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 455; ret void 456; } 457 458define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 459 ; GFX8-LABEL: name: struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 460 ; GFX8: bb.1 (%ir-block.0): 461 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 462 ; GFX8-NEXT: {{ $}} 463 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 464 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 465 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 466 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 467 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 468 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 469 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 470 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 471 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 472 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 473 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 474 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 475 ; GFX8-NEXT: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) 476 ; GFX8-NEXT: S_ENDPGM 0 477 ; 478 ; GFX12-LABEL: name: struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset 479 ; GFX12: bb.1 (%ir-block.0): 480 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3 481 ; GFX12-NEXT: {{ $}} 482 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 483 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 484 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 485 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 486 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 487 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 488 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 489 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 490 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 491 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 492 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 493 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 494 ; GFX12-NEXT: BUFFER_STORE_DWORDX2_VBUFFER_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, implicit $exec :: (dereferenceable store (<4 x s16>), align 1, addrspace 8) 495 ; GFX12-NEXT: S_ENDPGM 0 496 call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 497 ret void 498} 499 500declare void @llvm.amdgcn.struct.buffer.store.i8(i8, <4 x i32>, i32, i32, i32, i32 immarg) 501declare void @llvm.amdgcn.struct.buffer.store.i16(i16, <4 x i32>, i32, i32, i32, i32 immarg) 502 503declare void @llvm.amdgcn.struct.buffer.store.f16(half, <4 x i32>, i32, i32, i32, i32 immarg) 504declare void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) 505declare void @llvm.amdgcn.struct.buffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32 immarg) 506declare void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32 immarg) 507 508declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) 509declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32 immarg) 510declare void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32 immarg) 511declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) 512