1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefixes=GCN,GFX67 %s 3# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer %s -o - | FileCheck -check-prefixes=GCN,GFX67 %s 4# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -run-pass=legalizer %s -o - | FileCheck -check-prefixes=GCN,GFX12 %s 5 6--- 7name: s_buffer_load_s32 8body: | 9 bb.0: 10 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 11 12 ; GCN-LABEL: name: s_buffer_load_s32 13 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 14 ; GCN-NEXT: {{ $}} 15 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 16 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 17 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) 18 ; GCN-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) 19 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 20 %1:_(s32) = G_CONSTANT i32 0 21 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 22 S_ENDPGM 0, implicit %2 23 24... 25 26--- 27name: s_buffer_load_v3s32 28body: | 29 bb.0: 30 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 31 32 ; GFX67-LABEL: name: s_buffer_load_v3s32 33 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 34 ; GFX67-NEXT: {{ $}} 35 ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 36 ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 37 ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 38 ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) 39 ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) 40 ; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) 41 ; 42 ; GFX12-LABEL: name: s_buffer_load_v3s32 43 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 44 ; GFX12-NEXT: {{ $}} 45 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 46 ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 47 ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 48 ; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) 49 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 50 %1:_(s32) = G_CONSTANT i32 0 51 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 52 S_ENDPGM 0, implicit %2 53 54... 55 56--- 57name: s_buffer_load_v3p3 58body: | 59 bb.0: 60 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 61 62 ; GFX67-LABEL: name: s_buffer_load_v3p3 63 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 64 ; GFX67-NEXT: {{ $}} 65 ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 66 ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 67 ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 68 ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) 69 ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) 70 ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) 71 ; GFX67-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) 72 ; 73 ; GFX12-LABEL: name: s_buffer_load_v3p3 74 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 75 ; GFX12-NEXT: {{ $}} 76 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 77 ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 78 ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 79 ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) 80 ; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) 81 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 82 %1:_(s32) = G_CONSTANT i32 0 83 %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 84 S_ENDPGM 0, implicit %2 85 86... 87 88--- 89name: s_buffer_load_v6s16 90body: | 91 bb.0: 92 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 93 94 ; GFX67-LABEL: name: s_buffer_load_v6s16 95 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 96 ; GFX67-NEXT: {{ $}} 97 ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 98 ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 99 ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 100 ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) 101 ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) 102 ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) 103 ; GFX67-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) 104 ; 105 ; GFX12-LABEL: name: s_buffer_load_v6s16 106 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 107 ; GFX12-NEXT: {{ $}} 108 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 109 ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 110 ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 111 ; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) 112 ; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) 113 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 114 %1:_(s32) = G_CONSTANT i32 0 115 %2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 116 S_ENDPGM 0, implicit %2 117 118... 119 120--- 121name: s_buffer_load_v6s32 122body: | 123 bb.0: 124 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 125 126 ; GCN-LABEL: name: s_buffer_load_v6s32 127 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 128 ; GCN-NEXT: {{ $}} 129 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 130 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 131 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32) 132 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) 133 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) 134 ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>) 135 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 136 %1:_(s32) = G_CONSTANT i32 0 137 %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 138 S_ENDPGM 0, implicit %2 139 140... 141 142--- 143name: s_buffer_load_v3s64 144body: | 145 bb.0: 146 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 147 148 ; GCN-LABEL: name: s_buffer_load_v3s64 149 ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 150 ; GCN-NEXT: {{ $}} 151 ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 152 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 153 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32) 154 ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>) 155 ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) 156 ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) 157 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 158 %1:_(s32) = G_CONSTANT i32 0 159 %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 160 S_ENDPGM 0, implicit %2 161 162... 163 164--- 165name: s_buffer_load_v12s8 166body: | 167 bb.0: 168 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 169 170 ; GFX67-LABEL: name: s_buffer_load_v12s8 171 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 172 ; GFX67-NEXT: {{ $}} 173 ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 174 ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 175 ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 176 ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) 177 ; GFX67-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 178 ; GFX67-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) 179 ; GFX67-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 180 ; GFX67-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) 181 ; GFX67-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 182 ; GFX67-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) 183 ; GFX67-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) 184 ; GFX67-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) 185 ; GFX67-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) 186 ; GFX67-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) 187 ; GFX67-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) 188 ; GFX67-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) 189 ; GFX67-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 190 ; GFX67-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]] 191 ; GFX67-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] 192 ; GFX67-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) 193 ; GFX67-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] 194 ; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 195 ; GFX67-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C2]](s32) 196 ; GFX67-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]] 197 ; GFX67-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 198 ; GFX67-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] 199 ; GFX67-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] 200 ; GFX67-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) 201 ; GFX67-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]] 202 ; GFX67-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) 203 ; GFX67-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR5]], [[C2]](s32) 204 ; GFX67-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[LSHR4]], [[SHL3]] 205 ; GFX67-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) 206 ; GFX67-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] 207 ; GFX67-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] 208 ; GFX67-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) 209 ; GFX67-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] 210 ; GFX67-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) 211 ; GFX67-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR8]], [[C2]](s32) 212 ; GFX67-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[SHL5]] 213 ; GFX67-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) 214 ; GFX67-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) 215 ; GFX67-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) 216 ; 217 ; GFX12-LABEL: name: s_buffer_load_v12s8 218 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 219 ; GFX12-NEXT: {{ $}} 220 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 221 ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 222 ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 223 ; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) 224 ; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 225 ; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) 226 ; GFX12-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 227 ; GFX12-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) 228 ; GFX12-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 229 ; GFX12-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) 230 ; GFX12-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) 231 ; GFX12-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 232 ; GFX12-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) 233 ; GFX12-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) 234 ; GFX12-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) 235 ; GFX12-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) 236 ; GFX12-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) 237 ; GFX12-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) 238 ; GFX12-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) 239 ; GFX12-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) 240 ; GFX12-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) 241 ; GFX12-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) 242 ; GFX12-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) 243 ; GFX12-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) 244 ; GFX12-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) 245 ; GFX12-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) 246 ; GFX12-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) 247 ; GFX12-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) 248 ; GFX12-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) 249 ; GFX12-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) 250 ; GFX12-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) 251 ; GFX12-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) 252 ; GFX12-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[TRUNC9]](s16) 253 ; GFX12-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC10]](s16), [[TRUNC11]](s16) 254 ; GFX12-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) 255 ; GFX12-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) 256 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 257 %1:_(s32) = G_CONSTANT i32 0 258 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 259 %3:_(<12 x s16>) = G_ANYEXT %2 260 S_ENDPGM 0, implicit %3 261 262... 263 264--- 265name: s_buffer_load_s96 266body: | 267 bb.0: 268 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 269 270 ; GFX67-LABEL: name: s_buffer_load_s96 271 ; GFX67: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 272 ; GFX67-NEXT: {{ $}} 273 ; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 274 ; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 275 ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 276 ; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) 277 ; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) 278 ; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) 279 ; 280 ; GFX12-LABEL: name: s_buffer_load_s96 281 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 282 ; GFX12-NEXT: {{ $}} 283 ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 284 ; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 285 ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16) 286 ; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>) 287 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 288 %1:_(s32) = G_CONSTANT i32 0 289 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 290 S_ENDPGM 0, implicit %2 291 292... 293