1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10_GFX11 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX12 %s 5 6define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 7 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 8 ; GFX10_GFX11: bb.1 (%ir-block.0): 9 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 10 ; GFX10_GFX11-NEXT: {{ $}} 11 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 12 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 13 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 14 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 15 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 16 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 17 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 18 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 19 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 20 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 21 ; 22 ; GFX12-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 23 ; GFX12: bb.1 (%ir-block.0): 24 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 25 ; GFX12-NEXT: {{ $}} 26 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 27 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 28 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 29 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 30 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 31 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 32 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 33 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 34 ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] 35 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 36 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 37 ret float %val 38} 39 40define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 41 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 42 ; GFX10_GFX11: bb.1 (%ir-block.0): 43 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 44 ; GFX10_GFX11-NEXT: {{ $}} 45 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 46 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 47 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 48 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 49 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 50 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 51 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 52 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) 53 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0 54 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1 55 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[COPY6]] 56 ; GFX10_GFX11-NEXT: $vgpr1 = COPY [[COPY7]] 57 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 58 ; 59 ; GFX12-LABEL: name: raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 60 ; GFX12: bb.1 (%ir-block.0): 61 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 62 ; GFX12-NEXT: {{ $}} 63 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 64 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 65 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 66 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 67 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 68 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 69 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 70 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<2 x s32>), align 1, addrspace 8) 71 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN]].sub0 72 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_VBUFFER_OFFEN]].sub1 73 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] 74 ; GFX12-NEXT: $vgpr1 = COPY [[COPY7]] 75 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 76 %val = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 77 ret <2 x float> %val 78} 79 80define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 81 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 82 ; GFX10_GFX11: bb.1 (%ir-block.0): 83 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 84 ; GFX10_GFX11-NEXT: {{ $}} 85 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 86 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 87 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 88 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 89 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 90 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 91 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 92 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) 93 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0 94 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1 95 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2 96 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[COPY6]] 97 ; GFX10_GFX11-NEXT: $vgpr1 = COPY [[COPY7]] 98 ; GFX10_GFX11-NEXT: $vgpr2 = COPY [[COPY8]] 99 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 100 ; 101 ; GFX12-LABEL: name: raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 102 ; GFX12: bb.1 (%ir-block.0): 103 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 104 ; GFX12-NEXT: {{ $}} 105 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 106 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 107 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 108 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 109 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 110 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 111 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 112 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<3 x s32>), align 1, addrspace 8) 113 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub0 114 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub1 115 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_VBUFFER_OFFEN]].sub2 116 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] 117 ; GFX12-NEXT: $vgpr1 = COPY [[COPY7]] 118 ; GFX12-NEXT: $vgpr2 = COPY [[COPY8]] 119 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 120 %val = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 121 ret <3 x float> %val 122} 123 124define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 125 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 126 ; GFX10_GFX11: bb.1 (%ir-block.0): 127 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 128 ; GFX10_GFX11-NEXT: {{ $}} 129 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 130 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 131 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 132 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 133 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 134 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 135 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 136 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) 137 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0 138 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1 139 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2 140 ; GFX10_GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub3 141 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[COPY6]] 142 ; GFX10_GFX11-NEXT: $vgpr1 = COPY [[COPY7]] 143 ; GFX10_GFX11-NEXT: $vgpr2 = COPY [[COPY8]] 144 ; GFX10_GFX11-NEXT: $vgpr3 = COPY [[COPY9]] 145 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 146 ; 147 ; GFX12-LABEL: name: raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset 148 ; GFX12: bb.1 (%ir-block.0): 149 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 150 ; GFX12-NEXT: {{ $}} 151 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 152 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 153 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 154 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 155 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 156 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 157 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 158 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) 159 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub0 160 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub1 161 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub2 162 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_VBUFFER_OFFEN]].sub3 163 ; GFX12-NEXT: $vgpr0 = COPY [[COPY6]] 164 ; GFX12-NEXT: $vgpr1 = COPY [[COPY7]] 165 ; GFX12-NEXT: $vgpr2 = COPY [[COPY8]] 166 ; GFX12-NEXT: $vgpr3 = COPY [[COPY9]] 167 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 168 %val = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 169 ret <4 x float> %val 170} 171 172define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 173 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset 174 ; GFX10_GFX11: bb.1 (%ir-block.0): 175 ; GFX10_GFX11-NEXT: successors: %bb.2(0x80000000) 176 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 177 ; GFX10_GFX11-NEXT: {{ $}} 178 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 179 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 180 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 181 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 182 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 183 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 184 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 185 ; GFX10_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 186 ; GFX10_GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 187 ; GFX10_GFX11-NEXT: {{ $}} 188 ; GFX10_GFX11-NEXT: bb.2: 189 ; GFX10_GFX11-NEXT: successors: %bb.3(0x80000000) 190 ; GFX10_GFX11-NEXT: {{ $}} 191 ; GFX10_GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 192 ; GFX10_GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 193 ; GFX10_GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 194 ; GFX10_GFX11-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 195 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 196 ; GFX10_GFX11-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 197 ; GFX10_GFX11-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 198 ; GFX10_GFX11-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 199 ; GFX10_GFX11-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 200 ; GFX10_GFX11-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 201 ; GFX10_GFX11-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 202 ; GFX10_GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 203 ; GFX10_GFX11-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 204 ; GFX10_GFX11-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 205 ; GFX10_GFX11-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 206 ; GFX10_GFX11-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 207 ; GFX10_GFX11-NEXT: {{ $}} 208 ; GFX10_GFX11-NEXT: bb.3: 209 ; GFX10_GFX11-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 210 ; GFX10_GFX11-NEXT: {{ $}} 211 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 212 ; GFX10_GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 213 ; GFX10_GFX11-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 214 ; GFX10_GFX11-NEXT: {{ $}} 215 ; GFX10_GFX11-NEXT: bb.4: 216 ; GFX10_GFX11-NEXT: successors: %bb.5(0x80000000) 217 ; GFX10_GFX11-NEXT: {{ $}} 218 ; GFX10_GFX11-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 219 ; GFX10_GFX11-NEXT: {{ $}} 220 ; GFX10_GFX11-NEXT: bb.5: 221 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 222 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 223 ; 224 ; GFX12-LABEL: name: raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset 225 ; GFX12: bb.1 (%ir-block.0): 226 ; GFX12-NEXT: successors: %bb.2(0x80000000) 227 ; GFX12-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 228 ; GFX12-NEXT: {{ $}} 229 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 230 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 231 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 232 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 233 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 234 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 235 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4 236 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] 237 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 238 ; GFX12-NEXT: {{ $}} 239 ; GFX12-NEXT: bb.2: 240 ; GFX12-NEXT: successors: %bb.3(0x80000000) 241 ; GFX12-NEXT: {{ $}} 242 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec 243 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY1]], implicit $exec 244 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 245 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 246 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 247 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 248 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 249 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 250 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 251 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY9]], [[COPY7]], implicit $exec 252 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY10]], [[COPY8]], implicit $exec 253 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 254 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 255 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec 256 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 257 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 258 ; GFX12-NEXT: {{ $}} 259 ; GFX12-NEXT: bb.3: 260 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 261 ; GFX12-NEXT: {{ $}} 262 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 263 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 264 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 265 ; GFX12-NEXT: {{ $}} 266 ; GFX12-NEXT: bb.4: 267 ; GFX12-NEXT: successors: %bb.5(0x80000000) 268 ; GFX12-NEXT: {{ $}} 269 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 270 ; GFX12-NEXT: {{ $}} 271 ; GFX12-NEXT: bb.5: 272 ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] 273 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 274 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0) 275 ret float %val 276} 277 278define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 279 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 280 ; GFX10_GFX11: bb.1 (%ir-block.0): 281 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 282 ; GFX10_GFX11-NEXT: {{ $}} 283 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 284 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 285 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 286 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 287 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 288 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 289 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 290 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 291 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 292 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 293 ; 294 ; GFX12-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc 295 ; GFX12: bb.1 (%ir-block.0): 296 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 297 ; GFX12-NEXT: {{ $}} 298 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 299 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 300 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 301 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 302 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 303 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 304 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 305 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 306 ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] 307 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 308 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1) 309 ret float %val 310} 311 312define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 313 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 314 ; GFX10_GFX11: bb.1 (%ir-block.0): 315 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 316 ; GFX10_GFX11-NEXT: {{ $}} 317 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 318 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 319 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 320 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 321 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 322 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 323 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 324 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 325 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 326 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 327 ; 328 ; GFX12-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc 329 ; GFX12: bb.1 (%ir-block.0): 330 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 331 ; GFX12-NEXT: {{ $}} 332 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 333 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 334 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 335 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 336 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 337 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 338 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 339 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 340 ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] 341 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 342 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2) 343 ret float %val 344} 345 346define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 347 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 348 ; GFX10_GFX11: bb.1 (%ir-block.0): 349 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 350 ; GFX10_GFX11-NEXT: {{ $}} 351 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 352 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 353 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 354 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 355 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 356 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 357 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 358 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 359 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 360 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 361 ; 362 ; GFX12-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc 363 ; GFX12: bb.1 (%ir-block.0): 364 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 365 ; GFX12-NEXT: {{ $}} 366 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 367 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 368 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 369 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 370 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 371 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 372 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 373 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 374 ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] 375 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 376 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3) 377 ret float %val 378} 379 380define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 381 ; GFX10_GFX11-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 382 ; GFX10_GFX11: bb.1 (%ir-block.0): 383 ; GFX10_GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 384 ; GFX10_GFX11-NEXT: {{ $}} 385 ; GFX10_GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 386 ; GFX10_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 387 ; GFX10_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 388 ; GFX10_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 389 ; GFX10_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 390 ; GFX10_GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 391 ; GFX10_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 392 ; GFX10_GFX11-NEXT: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 393 ; GFX10_GFX11-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]] 394 ; GFX10_GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 395 ; 396 ; GFX12-LABEL: name: raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc 397 ; GFX12: bb.1 (%ir-block.0): 398 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 399 ; GFX12-NEXT: {{ $}} 400 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 401 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 402 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 403 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 404 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 405 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 406 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 407 ; GFX12-NEXT: [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8) 408 ; GFX12-NEXT: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_VBUFFER_OFFEN]] 409 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 410 %val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4) 411 ret float %val 412} 413 414declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 415declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 416declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 417declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0 418 419attributes #0 = { nounwind readonly } 420