1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s 4 5; Natural mapping 6define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 7 ; GFX8-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 8 ; GFX8: bb.1 (%ir-block.0): 9 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 10 ; GFX8-NEXT: {{ $}} 11 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 16 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 17 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 18 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 19 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 20 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 21 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 22 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 23 ; 24 ; GFX12-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 25 ; GFX12: bb.1 (%ir-block.0): 26 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 27 ; GFX12-NEXT: {{ $}} 28 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 29 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 30 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 31 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 32 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 33 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 34 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 35 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 36 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 37 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 38 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 39 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 40 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 41 ret float %val 42} 43 44; Natural mapping 45define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 46 ; GFX8-LABEL: name: struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 47 ; GFX8: bb.1 (%ir-block.0): 48 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 49 ; GFX8-NEXT: {{ $}} 50 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 51 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 52 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 53 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 54 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 55 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 56 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 57 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 58 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 59 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) 60 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 61 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 62 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] 63 ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] 64 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 65 ; 66 ; GFX12-LABEL: name: struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 67 ; GFX12: bb.1 (%ir-block.0): 68 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 69 ; GFX12-NEXT: {{ $}} 70 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 71 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 72 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 73 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 74 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 75 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 76 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 77 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 78 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 79 ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) 80 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub0 81 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub1 82 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] 83 ; GFX12-NEXT: $vgpr1 = COPY [[COPY8]] 84 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 85 %val = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 86 ret <2 x float> %val 87} 88 89; Natural mapping 90define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 91 ; GFX8-LABEL: name: struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 92 ; GFX8: bb.1 (%ir-block.0): 93 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 94 ; GFX8-NEXT: {{ $}} 95 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 96 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 97 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 98 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 99 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 100 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 101 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 102 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 103 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 104 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) 105 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0 106 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1 107 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2 108 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] 109 ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] 110 ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] 111 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 112 ; 113 ; GFX12-LABEL: name: struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 114 ; GFX12: bb.1 (%ir-block.0): 115 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 116 ; GFX12-NEXT: {{ $}} 117 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 118 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 119 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 120 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 121 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 122 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 123 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 124 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 125 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 126 ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) 127 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN]].sub0 128 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN]].sub1 129 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_VBUFFER_BOTHEN]].sub2 130 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] 131 ; GFX12-NEXT: $vgpr1 = COPY [[COPY8]] 132 ; GFX12-NEXT: $vgpr2 = COPY [[COPY9]] 133 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 134 %val = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 135 ret <3 x float> %val 136} 137 138; Natural mapping 139define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 140 ; GFX8-LABEL: name: struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 141 ; GFX8: bb.1 (%ir-block.0): 142 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 143 ; GFX8-NEXT: {{ $}} 144 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 145 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 146 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 147 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 148 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 149 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 150 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 151 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 152 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 153 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) 154 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0 155 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1 156 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2 157 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub3 158 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] 159 ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] 160 ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] 161 ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] 162 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 163 ; 164 ; GFX12-LABEL: name: struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 165 ; GFX12: bb.1 (%ir-block.0): 166 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 167 ; GFX12-NEXT: {{ $}} 168 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 169 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 170 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 171 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 172 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 173 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 174 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 175 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 176 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 177 ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) 178 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub0 179 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub1 180 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub2 181 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_VBUFFER_BOTHEN]].sub3 182 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] 183 ; GFX12-NEXT: $vgpr1 = COPY [[COPY8]] 184 ; GFX12-NEXT: $vgpr2 = COPY [[COPY9]] 185 ; GFX12-NEXT: $vgpr3 = COPY [[COPY10]] 186 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 187 %val = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 188 ret <4 x float> %val 189} 190 191; Natural mapping 192define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 193 ; GFX8-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0 194 ; GFX8: bb.1 (%ir-block.0): 195 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 196 ; GFX8-NEXT: {{ $}} 197 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 198 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 199 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 200 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 201 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 202 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 203 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 204 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 205 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 206 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 207 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 208 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 209 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 210 ; 211 ; GFX12-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0 212 ; GFX12: bb.1 (%ir-block.0): 213 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 214 ; GFX12-NEXT: {{ $}} 215 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 216 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 217 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 218 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 219 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 220 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 221 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 222 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 223 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] 224 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1 225 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 226 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 227 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 228 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0) 229 ret float %val 230} 231 232; Natural mapping 233define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset.base, i32 inreg %soffset) { 234 ; GFX8-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095 235 ; GFX8: bb.1 (%ir-block.0): 236 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 237 ; GFX8-NEXT: {{ $}} 238 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 239 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 240 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 241 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 242 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 243 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 244 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 245 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 246 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 247 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 248 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 249 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 250 ; 251 ; GFX12-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095 252 ; GFX12: bb.1 (%ir-block.0): 253 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 254 ; GFX12-NEXT: {{ $}} 255 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 256 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 257 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 258 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 259 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 260 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 261 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 262 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 263 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 264 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 265 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 266 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 267 %voffset = add i32 %voffset.base, 4095 268 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 269 ret float %val 270} 271 272define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_soffset_64(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset) { 273 ; GFX8-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_soffset_64 274 ; GFX8: bb.1 (%ir-block.0): 275 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 276 ; GFX8-NEXT: {{ $}} 277 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 278 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 279 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 280 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 281 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 282 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 283 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 284 ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 285 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 286 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 287 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 288 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 289 ; 290 ; GFX12-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_soffset_64 291 ; GFX12: bb.1 (%ir-block.0): 292 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1 293 ; GFX12-NEXT: {{ $}} 294 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 295 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 296 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 297 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 298 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 299 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 300 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 301 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64 302 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 303 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 304 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 305 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 306 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0) 307 ret float %val 308} 309 310; Need to legalize all reg operands 311define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 %soffset) { 312 ; GFX8-LABEL: name: struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset 313 ; GFX8: bb.1 (%ir-block.0): 314 ; GFX8-NEXT: successors: %bb.2(0x80000000) 315 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 316 ; GFX8-NEXT: {{ $}} 317 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 318 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 319 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 320 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 321 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 322 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 323 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 324 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 325 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 326 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 327 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 328 ; GFX8-NEXT: {{ $}} 329 ; GFX8-NEXT: bb.2: 330 ; GFX8-NEXT: successors: %bb.3(0x80000000) 331 ; GFX8-NEXT: {{ $}} 332 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 333 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 334 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 335 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 336 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 337 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 338 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 339 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 340 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 341 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec 342 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec 343 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 344 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 345 ; GFX8-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 346 ; GFX8-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 347 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 348 ; GFX8-NEXT: {{ $}} 349 ; GFX8-NEXT: bb.3: 350 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 351 ; GFX8-NEXT: {{ $}} 352 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 353 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 354 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 355 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 356 ; GFX8-NEXT: {{ $}} 357 ; GFX8-NEXT: bb.4: 358 ; GFX8-NEXT: successors: %bb.5(0x80000000) 359 ; GFX8-NEXT: {{ $}} 360 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 361 ; GFX8-NEXT: {{ $}} 362 ; GFX8-NEXT: bb.5: 363 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 364 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 365 ; 366 ; GFX12-LABEL: name: struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_voffset__vgpr_soffset 367 ; GFX12: bb.1 (%ir-block.0): 368 ; GFX12-NEXT: successors: %bb.2(0x80000000) 369 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 370 ; GFX12-NEXT: {{ $}} 371 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 372 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 373 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 374 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 375 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 376 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 377 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 378 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr4 379 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 380 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] 381 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 382 ; GFX12-NEXT: {{ $}} 383 ; GFX12-NEXT: bb.2: 384 ; GFX12-NEXT: successors: %bb.3(0x80000000) 385 ; GFX12-NEXT: {{ $}} 386 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 387 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 388 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 389 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 390 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 391 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 392 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 393 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 394 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 395 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY11]], [[COPY9]], implicit $exec 396 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY12]], [[COPY10]], implicit $exec 397 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 398 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 399 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec 400 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 401 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 402 ; GFX12-NEXT: {{ $}} 403 ; GFX12-NEXT: bb.3: 404 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 405 ; GFX12-NEXT: {{ $}} 406 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 407 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 408 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 409 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 410 ; GFX12-NEXT: {{ $}} 411 ; GFX12-NEXT: bb.4: 412 ; GFX12-NEXT: successors: %bb.5(0x80000000) 413 ; GFX12-NEXT: {{ $}} 414 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 415 ; GFX12-NEXT: {{ $}} 416 ; GFX12-NEXT: bb.5: 417 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 418 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 419 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 420 ret float %val 421} 422 423define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 424 ; GFX8-LABEL: name: struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 425 ; GFX8: bb.1 (%ir-block.0): 426 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 427 ; GFX8-NEXT: {{ $}} 428 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 429 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 430 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 431 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 432 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 433 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 434 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 435 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 436 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 437 ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 438 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]] 439 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 440 ; 441 ; GFX12-LABEL: name: struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 442 ; GFX12: bb.1 (%ir-block.0): 443 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 444 ; GFX12-NEXT: {{ $}} 445 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 446 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 447 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 448 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 449 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 450 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 451 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 452 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 453 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 454 ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 455 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN]] 456 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 457 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 458 %ext = zext i8 %val to i32 459 %cast = bitcast i32 %ext to float 460 ret float %cast 461} 462 463define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 464 ; GFX8-LABEL: name: struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 465 ; GFX8: bb.1 (%ir-block.0): 466 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 467 ; GFX8-NEXT: {{ $}} 468 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 469 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 470 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 471 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 472 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 473 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 474 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 475 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 476 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 477 ; GFX8-NEXT: [[BUFFER_LOAD_SBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 478 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_BOTHEN]] 479 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 480 ; 481 ; GFX12-LABEL: name: struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 482 ; GFX12: bb.1 (%ir-block.0): 483 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 484 ; GFX12-NEXT: {{ $}} 485 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 486 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 487 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 488 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 489 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 490 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 491 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 492 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 493 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 494 ; GFX12-NEXT: [[BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 495 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SBYTE_VBUFFER_BOTHEN]] 496 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 497 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 498 %ext = sext i8 %val to i32 499 %cast = bitcast i32 %ext to float 500 ret float %cast 501} 502 503define amdgpu_ps float @struct_buffer_load_i8_sext_wrong_width(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 504 ; GFX8-LABEL: name: struct_buffer_load_i8_sext_wrong_width 505 ; GFX8: bb.1 (%ir-block.0): 506 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 507 ; GFX8-NEXT: {{ $}} 508 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 509 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 510 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 511 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 512 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 513 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 514 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 515 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 516 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 517 ; GFX8-NEXT: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 518 ; GFX8-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 4, implicit $exec 519 ; GFX8-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] 520 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 521 ; 522 ; GFX12-LABEL: name: struct_buffer_load_i8_sext_wrong_width 523 ; GFX12: bb.1 (%ir-block.0): 524 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 525 ; GFX12-NEXT: {{ $}} 526 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 527 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 528 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 529 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 530 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 531 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 532 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 533 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 534 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 535 ; GFX12-NEXT: [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s8), addrspace 8) 536 ; GFX12-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_VBUFFER_BOTHEN]], 0, 4, implicit $exec 537 ; GFX12-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] 538 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 539 %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 540 %trunc = trunc i8 %val to i4 541 %ext = sext i4 %trunc to i32 542 %cast = bitcast i32 %ext to float 543 ret float %cast 544} 545 546define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 547 ; GFX8-LABEL: name: struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 548 ; GFX8: bb.1 (%ir-block.0): 549 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 550 ; GFX8-NEXT: {{ $}} 551 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 552 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 553 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 554 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 555 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 556 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 557 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 558 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 559 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 560 ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 561 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] 562 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 563 ; 564 ; GFX12-LABEL: name: struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 565 ; GFX12: bb.1 (%ir-block.0): 566 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 567 ; GFX12-NEXT: {{ $}} 568 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 569 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 570 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 571 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 572 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 573 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 574 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 575 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 576 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 577 ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 578 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN]] 579 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 580 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 581 %ext = zext i16 %val to i32 582 %cast = bitcast i32 %ext to float 583 ret float %cast 584} 585 586define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 587 ; GFX8-LABEL: name: struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 588 ; GFX8: bb.1 (%ir-block.0): 589 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 590 ; GFX8-NEXT: {{ $}} 591 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 592 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 593 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 594 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 595 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 596 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 597 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 598 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 599 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 600 ; GFX8-NEXT: [[BUFFER_LOAD_SSHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 601 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_BOTHEN]] 602 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 603 ; 604 ; GFX12-LABEL: name: struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 605 ; GFX12: bb.1 (%ir-block.0): 606 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 607 ; GFX12-NEXT: {{ $}} 608 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 609 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 610 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 611 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 612 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 613 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 614 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 615 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 616 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 617 ; GFX12-NEXT: [[BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 618 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_SSHORT_VBUFFER_BOTHEN]] 619 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 620 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 621 %ext = sext i16 %val to i32 622 %cast = bitcast i32 %ext to float 623 ret float %cast 624} 625 626define amdgpu_ps float @struct_buffer_load_i16_sext_wrong_width(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 627 ; GFX8-LABEL: name: struct_buffer_load_i16_sext_wrong_width 628 ; GFX8: bb.1 (%ir-block.0): 629 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 630 ; GFX8-NEXT: {{ $}} 631 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 632 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 633 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 634 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 635 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 636 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 637 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 638 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 639 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 640 ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 641 ; GFX8-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 8, implicit $exec 642 ; GFX8-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] 643 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 644 ; 645 ; GFX12-LABEL: name: struct_buffer_load_i16_sext_wrong_width 646 ; GFX12: bb.1 (%ir-block.0): 647 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 648 ; GFX12-NEXT: {{ $}} 649 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 650 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 651 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 652 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 653 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 654 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 655 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 656 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 657 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 658 ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 659 ; GFX12-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN]], 0, 8, implicit $exec 660 ; GFX12-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]] 661 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 662 %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 663 %trunc = trunc i16 %val to i8 664 %ext = sext i8 %trunc to i32 665 %cast = bitcast i32 %ext to float 666 ret float %cast 667} 668 669; Natural mapping 670define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 671 ; GFX8-LABEL: name: struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 672 ; GFX8: bb.1 (%ir-block.0): 673 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 674 ; GFX8-NEXT: {{ $}} 675 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 676 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 677 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 678 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 679 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 680 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 681 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 682 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 683 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 684 ; GFX8-NEXT: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 685 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]] 686 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 687 ; 688 ; GFX12-LABEL: name: struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 689 ; GFX12: bb.1 (%ir-block.0): 690 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 691 ; GFX12-NEXT: {{ $}} 692 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 693 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 694 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 695 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 696 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 697 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 698 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 699 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 700 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 701 ; GFX12-NEXT: [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (s16), align 1, addrspace 8) 702 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_VBUFFER_BOTHEN]] 703 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 704 %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 705 ret half %val 706} 707 708; Natural mapping 709define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 710 ; GFX8-LABEL: name: struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 711 ; GFX8: bb.1 (%ir-block.0): 712 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 713 ; GFX8-NEXT: {{ $}} 714 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 715 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 716 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 717 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 718 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 719 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 720 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 721 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 722 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 723 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) 724 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 725 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 726 ; 727 ; GFX12-LABEL: name: struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 728 ; GFX12: bb.1 (%ir-block.0): 729 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 730 ; GFX12-NEXT: {{ $}} 731 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 732 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 733 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 734 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 735 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 736 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 737 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 738 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 739 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 740 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<2 x s16>), align 1, addrspace 8) 741 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 742 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 743 %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 744 ret <2 x half> %val 745} 746 747; FIXME: Crashes 748; define amdgpu_ps <3 x half> @struct_buffer_load_v3f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 749; %val = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 750; ret <3 x half> %val 751; } 752 753; Natural mapping 754define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 755 ; GFX8-LABEL: name: struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 756 ; GFX8: bb.1 (%ir-block.0): 757 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 758 ; GFX8-NEXT: {{ $}} 759 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 760 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 761 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 762 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 763 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 764 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 765 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 766 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 767 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 768 ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) 769 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0 770 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1 771 ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] 772 ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] 773 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 774 ; 775 ; GFX12-LABEL: name: struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset 776 ; GFX12: bb.1 (%ir-block.0): 777 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 778 ; GFX12-NEXT: {{ $}} 779 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 780 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 781 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 782 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 783 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 784 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 785 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 786 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 787 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 788 ; GFX12-NEXT: [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, implicit $exec :: (dereferenceable load (<4 x s16>), align 1, addrspace 8) 789 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub0 790 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_VBUFFER_BOTHEN]].sub1 791 ; GFX12-NEXT: $vgpr0 = COPY [[COPY7]] 792 ; GFX12-NEXT: $vgpr1 = COPY [[COPY8]] 793 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 794 %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 795 ret <4 x half> %val 796} 797 798; Natural mapping + glc 799define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 800 ; GFX8-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc 801 ; GFX8: bb.1 (%ir-block.0): 802 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 803 ; GFX8-NEXT: {{ $}} 804 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 805 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 806 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 807 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 808 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 809 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 810 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 811 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 812 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 813 ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 814 ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]] 815 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 816 ; 817 ; GFX12-LABEL: name: struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_glc 818 ; GFX12: bb.1 (%ir-block.0): 819 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1 820 ; GFX12-NEXT: {{ $}} 821 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 822 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 823 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 824 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 825 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 826 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 827 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 828 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 829 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1 830 ; GFX12-NEXT: [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_VBUFFER_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 831 ; GFX12-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_VBUFFER_BOTHEN]] 832 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 833 %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1) 834 ret float %val 835} 836 837declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32 immarg) #0 838declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32 immarg) #0 839declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 840declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 841declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 842declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #0 843declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 844declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 845declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 846declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) #0 847 848attributes #0 = { nounwind readonly } 849