1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 2# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX10 3# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s -check-prefixes=CHECK,GFX12 4 5--- 6name: merge_s_buffer_load_x2 7tracksRegLiveness: true 8body: | 9 bb.0: 10 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 11 12 ; GFX10-LABEL: name: merge_s_buffer_load_x2 13 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 14 ; GFX10-NEXT: {{ $}} 15 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 16 ; GFX10-NEXT: early-clobber %3:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4) 17 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY %3.sub0 18 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed %3.sub1 19 ; GFX10-NEXT: S_ENDPGM 0 20 ; 21 ; GFX12-LABEL: name: merge_s_buffer_load_x2 22 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 23 ; GFX12-NEXT: {{ $}} 24 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 25 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64), align 4) 26 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[S_BUFFER_LOAD_DWORDX2_IMM]].sub0 27 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX2_IMM]].sub1 28 ; GFX12-NEXT: S_ENDPGM 0 29 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 30 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 31 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) 32 33 S_ENDPGM 0 34... 35--- 36 37name: merge_s_buffer_load_x1_x2 38tracksRegLiveness: true 39body: | 40 bb.0: 41 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 42 43 ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2 44 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 45 ; CHECK-NEXT: {{ $}} 46 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 47 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) 48 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s64)) 49 ; CHECK-NEXT: S_ENDPGM 0 50 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 51 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 52 %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64)) 53 54 S_ENDPGM 0 55... 56--- 57 58name: merge_s_buffer_load_x2_x1 59tracksRegLiveness: true 60body: | 61 bb.0: 62 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 63 64 ; GFX10-LABEL: name: merge_s_buffer_load_x2_x1 65 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 66 ; GFX10-NEXT: {{ $}} 67 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 68 ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s64)) 69 ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32)) 70 ; GFX10-NEXT: S_ENDPGM 0 71 ; 72 ; GFX12-LABEL: name: merge_s_buffer_load_x2_x1 73 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 74 ; GFX12-NEXT: {{ $}} 75 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 76 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8) 77 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1 78 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2 79 ; GFX12-NEXT: S_ENDPGM 0 80 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 81 %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) 82 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) 83 84 S_ENDPGM 0 85... 86--- 87 88name: merge_s_buffer_load_x4 89tracksRegLiveness: true 90body: | 91 bb.0: 92 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 93 94 ; GFX10-LABEL: name: merge_s_buffer_load_x4 95 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 96 ; GFX10-NEXT: {{ $}} 97 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 98 ; GFX10-NEXT: early-clobber %7:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4) 99 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY %7.sub0_sub1 100 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY killed %7.sub2_sub3 101 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY1]].sub0 102 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub1 103 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0 104 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1 105 ; GFX10-NEXT: S_ENDPGM 0 106 ; 107 ; GFX12-LABEL: name: merge_s_buffer_load_x4 108 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 109 ; GFX12-NEXT: {{ $}} 110 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 111 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128), align 4) 112 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2 113 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3 114 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1 115 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub2 116 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 117 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 118 ; GFX12-NEXT: S_ENDPGM 0 119 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 120 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 121 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) 122 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) 123 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) 124 125 S_ENDPGM 0 126... 127--- 128 129name: merge_s_buffer_load_x1_x3 130tracksRegLiveness: true 131body: | 132 bb.0: 133 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 134 135 ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3 136 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 137 ; CHECK-NEXT: {{ $}} 138 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 139 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) 140 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16) 141 ; CHECK-NEXT: S_ENDPGM 0 142 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 143 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 144 %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96)) 145 146 S_ENDPGM 0 147... 148--- 149 150name: merge_s_buffer_load_x3_x1 151tracksRegLiveness: true 152body: | 153 bb.0: 154 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 155 156 ; CHECK-LABEL: name: merge_s_buffer_load_x3_x1 157 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 158 ; CHECK-NEXT: {{ $}} 159 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 160 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128)) 161 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2 162 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3 163 ; CHECK-NEXT: S_ENDPGM 0 164 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 165 %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96)) 166 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) 167 168 S_ENDPGM 0 169... 170--- 171 172name: merge_s_buffer_load_x8 173tracksRegLiveness: true 174body: | 175 bb.0: 176 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 177 178 ; GFX10-LABEL: name: merge_s_buffer_load_x8 179 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 180 ; GFX10-NEXT: {{ $}} 181 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 182 ; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) 183 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub0_sub1_sub2_sub3 184 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub4_sub5_sub6_sub7 185 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1 186 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3 187 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 188 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 189 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0 190 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1 191 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 192 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3 193 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY9]].sub0 194 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub1 195 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY10]].sub0 196 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY10]].sub1 197 ; GFX10-NEXT: S_ENDPGM 0 198 ; 199 ; GFX12-LABEL: name: merge_s_buffer_load_x8 200 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 201 ; GFX12-NEXT: {{ $}} 202 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 203 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) 204 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 205 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 206 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 207 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3 208 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1 209 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2 210 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub0 211 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub1 212 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2 213 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3 214 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1 215 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2 216 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub0 217 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub1 218 ; GFX12-NEXT: S_ENDPGM 0 219 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 220 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 221 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) 222 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) 223 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) 224 %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) 225 %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) 226 %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s32)) 227 %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 28, 0 :: (dereferenceable invariant load (s32)) 228 229 S_ENDPGM 0 230... 231--- 232 233name: merge_s_buffer_load_x8_reordered 234tracksRegLiveness: true 235body: | 236 bb.0: 237 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 238 239 ; GFX10-LABEL: name: merge_s_buffer_load_x8_reordered 240 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 241 ; GFX10-NEXT: {{ $}} 242 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 243 ; GFX10-NEXT: early-clobber %15:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) 244 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %15.sub4_sub5_sub6_sub7 245 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %15.sub0_sub1_sub2_sub3 246 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY1]].sub0_sub1 247 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY1]].sub2_sub3 248 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub1 249 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub0 250 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 251 ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_64_xexec = COPY killed [[COPY2]].sub2_sub3 252 ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY7]].sub1 253 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY7]].sub0 254 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub1 255 ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub0 256 ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY8]].sub1 257 ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY8]].sub0 258 ; GFX10-NEXT: S_ENDPGM 0 259 ; 260 ; GFX12-LABEL: name: merge_s_buffer_load_x8_reordered 261 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 262 ; GFX12-NEXT: {{ $}} 263 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 264 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 4) 265 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 266 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 267 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 268 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY1]].sub3 269 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]].sub0_sub1 270 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub2 271 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY5]].sub1 272 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY5]].sub0 273 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sgpr_96 = COPY [[COPY2]].sub0_sub1_sub2 274 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub3 275 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:sreg_64_xexec = COPY [[COPY9]].sub0_sub1 276 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY9]].sub2 277 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY11]].sub1 278 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY11]].sub0 279 ; GFX12-NEXT: S_ENDPGM 0 280 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 281 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) 282 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s32)) 283 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 284 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 28, 0 :: (dereferenceable invariant load (s32)) 285 %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) 286 %6:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) 287 %7:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) 288 %8:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s32)) 289 290 S_ENDPGM 0 291... 292--- 293 294name: merge_s_buffer_load_x8_out_of_x2 295tracksRegLiveness: true 296body: | 297 bb.0: 298 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 299 300 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2 301 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 302 ; GFX10-NEXT: {{ $}} 303 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 304 ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) 305 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7 306 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3 307 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 308 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 309 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 310 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 311 ; GFX10-NEXT: S_ENDPGM 0 312 ; 313 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2 314 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 315 ; GFX12-NEXT: {{ $}} 316 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 317 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) 318 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 319 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 320 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 321 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 322 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 323 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 324 ; GFX12-NEXT: S_ENDPGM 0 325 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 326 %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) 327 %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) 328 %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) 329 %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) 330 331 S_ENDPGM 0 332... 333--- 334 335name: merge_s_buffer_load_x8_out_of_x4 336tracksRegLiveness: true 337body: | 338 bb.0: 339 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 340 341 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4 342 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 343 ; GFX10-NEXT: {{ $}} 344 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 345 ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 346 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 347 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 348 ; GFX10-NEXT: S_ENDPGM 0 349 ; 350 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4 351 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 352 ; GFX12-NEXT: {{ $}} 353 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 354 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 355 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 356 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 357 ; GFX12-NEXT: S_ENDPGM 0 358 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 359 %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) 360 %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) 361 362 S_ENDPGM 0 363... 364--- 365 366name: merge_s_buffer_load_x8_mixed 367tracksRegLiveness: true 368body: | 369 bb.0: 370 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 371 372 ; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed 373 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 374 ; GFX10-NEXT: {{ $}} 375 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 376 ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 377 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3 378 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7 379 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 380 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 381 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 382 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 383 ; GFX10-NEXT: S_ENDPGM 0 384 ; 385 ; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed 386 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 387 ; GFX12-NEXT: {{ $}} 388 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 389 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 390 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 391 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 392 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 393 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 394 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 395 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 396 ; GFX12-NEXT: S_ENDPGM 0 397 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 398 %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) 399 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) 400 %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) 401 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) 402 403 S_ENDPGM 0 404... 405--- 406 407name: merge_s_buffer_load_sgpr_imm 408tracksRegLiveness: true 409body: | 410 bb.0: 411 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 412 413 ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm 414 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 415 ; GFX10-NEXT: {{ $}} 416 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 417 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 418 ; GFX10-NEXT: early-clobber %8:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4) 419 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8.sub0_sub1 420 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY killed %8.sub2_sub3 421 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY2]].sub0 422 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub1 423 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 424 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 425 ; GFX10-NEXT: S_ENDPGM 0 426 ; 427 ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm 428 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 429 ; GFX12-NEXT: {{ $}} 430 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 431 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 432 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 4) 433 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1_sub2 434 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub3 435 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 436 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY2]].sub2 437 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY4]].sub0 438 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY4]].sub1 439 ; GFX12-NEXT: S_ENDPGM 0 440 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 441 %1:sreg_32 = COPY $sgpr4 442 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s32)) 443 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) 444 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s32)) 445 %5:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32)) 446 447 S_ENDPGM 0 448... 449--- 450 451name: no_merge_for_different_soffsets 452tracksRegLiveness: true 453body: | 454 bb.0: 455 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 456 457 ; CHECK-LABEL: name: no_merge_for_different_soffsets 458 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 459 ; CHECK-NEXT: {{ $}} 460 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 461 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 462 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr5 463 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32)) 464 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY2]], 8, 0 :: (dereferenceable invariant load (s32)) 465 ; CHECK-NEXT: S_ENDPGM 0 466 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 467 %1:sreg_32 = COPY $sgpr4 468 %2:sreg_32 = COPY $sgpr5 469 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) 470 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %2:sreg_32, 8, 0 :: (dereferenceable invariant load (s32)) 471 472 S_ENDPGM 0 473... 474--- 475 476name: no_merge_for_non_adjacent_offsets 477tracksRegLiveness: true 478body: | 479 bb.0: 480 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 481 482 ; CHECK-LABEL: name: no_merge_for_non_adjacent_offsets 483 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 484 ; CHECK-NEXT: {{ $}} 485 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 486 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 487 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 4, 0 :: (dereferenceable invariant load (s32)) 488 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[COPY]], [[COPY1]], 12, 0 :: (dereferenceable invariant load (s32)) 489 ; CHECK-NEXT: S_ENDPGM 0 490 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 491 %1:sreg_32 = COPY $sgpr4 492 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 4, 0 :: (dereferenceable invariant load (s32)) 493 %3:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %0:sgpr_128, %1:sreg_32, 12, 0 :: (dereferenceable invariant load (s32)) 494 495 S_ENDPGM 0 496... 497 498# The constrained multi-dword buffer load merge tests. 499 500--- 501name: merge_s_buffer_load_x1_x2ec 502tracksRegLiveness: true 503body: | 504 bb.0: 505 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 506 507 ; CHECK-LABEL: name: merge_s_buffer_load_x1_x2ec 508 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 509 ; CHECK-NEXT: {{ $}} 510 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 511 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) 512 ; CHECK-NEXT: early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s64)) 513 ; CHECK-NEXT: S_ENDPGM 0 514 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 515 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 516 early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s64)) 517 518 S_ENDPGM 0 519... 520--- 521 522name: merge_s_buffer_load_x2ec_x1 523tracksRegLiveness: true 524body: | 525 bb.0: 526 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 527 528 ; GFX10-LABEL: name: merge_s_buffer_load_x2ec_x1 529 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 530 ; GFX10-NEXT: {{ $}} 531 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 532 ; GFX10-NEXT: early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s64)) 533 ; GFX10-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 8, 0 :: (dereferenceable invariant load (s32)) 534 ; GFX10-NEXT: S_ENDPGM 0 535 ; 536 ; GFX12-LABEL: name: merge_s_buffer_load_x2ec_x1 537 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 538 ; GFX12-NEXT: {{ $}} 539 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 540 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX3_IMM:%[0-9]+]]:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s96), align 8) 541 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX3_IMM]].sub0_sub1 542 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX3_IMM]].sub2 543 ; GFX12-NEXT: S_ENDPGM 0 544 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 545 early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) 546 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s32)) 547 548 S_ENDPGM 0 549... 550--- 551 552name: merge_s_buffer_load_x1_x3ec 553tracksRegLiveness: true 554body: | 555 bb.0: 556 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 557 558 ; CHECK-LABEL: name: merge_s_buffer_load_x1_x3ec 559 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 560 ; CHECK-NEXT: {{ $}} 561 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 562 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s32)) 563 ; CHECK-NEXT: early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec [[COPY]], 4, 0 :: (dereferenceable invariant load (s96), align 16) 564 ; CHECK-NEXT: S_ENDPGM 0 565 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 566 %1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32)) 567 early-clobber %2:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 4, 0 :: (dereferenceable invariant load (s96)) 568 569 S_ENDPGM 0 570... 571--- 572 573name: merge_s_buffer_load_x3ec_x1 574tracksRegLiveness: true 575body: | 576 bb.0: 577 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 578 579 ; CHECK-LABEL: name: merge_s_buffer_load_x3ec_x1 580 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 581 ; CHECK-NEXT: {{ $}} 582 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 583 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128)) 584 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_96 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1_sub2 585 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub3 586 ; CHECK-NEXT: S_ENDPGM 0 587 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 588 early-clobber %1:sgpr_96 = S_BUFFER_LOAD_DWORDX3_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s96)) 589 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 12, 0 :: (dereferenceable invariant load (s32)) 590 591 S_ENDPGM 0 592... 593--- 594 595name: merge_s_buffer_load_x8_out_of_x2ec_reordered 596tracksRegLiveness: true 597body: | 598 bb.0: 599 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 600 601 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered 602 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 603 ; GFX10-NEXT: {{ $}} 604 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 605 ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) 606 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7 607 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3 608 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 609 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 610 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 611 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 612 ; GFX10-NEXT: S_ENDPGM 0 613 ; 614 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_reordered 615 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 616 ; GFX12-NEXT: {{ $}} 617 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 618 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) 619 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 620 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 621 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 622 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 623 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 624 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 625 ; GFX12-NEXT: S_ENDPGM 0 626 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 627 early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) 628 early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) 629 early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) 630 early-clobber %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) 631 632 S_ENDPGM 0 633... 634--- 635 636name: merge_s_buffer_load_x8_out_of_x2ec_x2 637tracksRegLiveness: true 638body: | 639 bb.0: 640 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 641 642 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2 643 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 644 ; GFX10-NEXT: {{ $}} 645 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 646 ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) 647 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub4_sub5_sub6_sub7 648 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub0_sub1_sub2_sub3 649 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 650 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 651 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 652 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 653 ; GFX10-NEXT: S_ENDPGM 0 654 ; 655 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x2ec_x2 656 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 657 ; GFX12-NEXT: {{ $}} 658 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 659 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 8) 660 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 661 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 662 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY [[COPY1]].sub0_sub1 663 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY1]].sub2_sub3 664 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY [[COPY2]].sub2_sub3 665 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub0_sub1 666 ; GFX12-NEXT: S_ENDPGM 0 667 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 668 early-clobber %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s64)) 669 early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) 670 %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64)) 671 %4:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) 672 673 S_ENDPGM 0 674... 675--- 676 677name: merge_s_buffer_load_x8_out_of_x4ec 678tracksRegLiveness: true 679body: | 680 bb.0: 681 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 682 683 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec 684 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 685 ; GFX10-NEXT: {{ $}} 686 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 687 ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 688 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 689 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 690 ; GFX10-NEXT: S_ENDPGM 0 691 ; 692 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec 693 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 694 ; GFX12-NEXT: {{ $}} 695 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 696 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 697 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 698 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 699 ; GFX12-NEXT: S_ENDPGM 0 700 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 701 early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) 702 early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) 703 704 S_ENDPGM 0 705... 706--- 707 708name: merge_s_buffer_load_x8_out_of_x4ec_x4 709tracksRegLiveness: true 710body: | 711 bb.0: 712 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 713 714 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4 715 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 716 ; GFX10-NEXT: {{ $}} 717 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 718 ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 719 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 720 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 721 ; GFX10-NEXT: S_ENDPGM 0 722 ; 723 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4ec_x4 724 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 725 ; GFX12-NEXT: {{ $}} 726 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 727 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 728 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 729 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 730 ; GFX12-NEXT: S_ENDPGM 0 731 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 732 early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) 733 %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) 734 735 S_ENDPGM 0 736... 737--- 738 739name: merge_s_buffer_load_x8_out_of_x4_x4ec 740tracksRegLiveness: true 741body: | 742 bb.0: 743 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 744 745 ; GFX10-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec 746 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 747 ; GFX10-NEXT: {{ $}} 748 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 749 ; GFX10-NEXT: early-clobber %3:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 750 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %3.sub0_sub1_sub2_sub3 751 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %3.sub4_sub5_sub6_sub7 752 ; GFX10-NEXT: S_ENDPGM 0 753 ; 754 ; GFX12-LABEL: name: merge_s_buffer_load_x8_out_of_x4_x4ec 755 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 756 ; GFX12-NEXT: {{ $}} 757 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 758 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 759 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 760 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 761 ; GFX12-NEXT: S_ENDPGM 0 762 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 763 %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) 764 early-clobber %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) 765 766 S_ENDPGM 0 767... 768--- 769 770name: merge_s_buffer_load_x8_mixed_including_ec_opcodes 771tracksRegLiveness: true 772body: | 773 bb.0: 774 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 775 776 ; GFX10-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes 777 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 778 ; GFX10-NEXT: {{ $}} 779 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 780 ; GFX10-NEXT: early-clobber %7:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM_ec [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 781 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY %7.sub0_sub1_sub2_sub3 782 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed %7.sub4_sub5_sub6_sub7 783 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 784 ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 785 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 786 ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 787 ; GFX10-NEXT: S_ENDPGM 0 788 ; 789 ; GFX12-LABEL: name: merge_s_buffer_load_x8_mixed_including_ec_opcodes 790 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 791 ; GFX12-NEXT: {{ $}} 792 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 793 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256), align 16) 794 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 795 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 796 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[COPY2]].sub0_sub1 797 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY killed [[COPY2]].sub2_sub3 798 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY3]].sub0 799 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec = COPY killed [[COPY3]].sub1 800 ; GFX12-NEXT: S_ENDPGM 0 801 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 802 early-clobber %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM_ec %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128)) 803 %2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32)) 804 early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM_ec %0:sgpr_128, 24, 0 :: (dereferenceable invariant load (s64)) 805 %4:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0:sgpr_128, 20, 0 :: (dereferenceable invariant load (s32)) 806 807 S_ENDPGM 0 808... 809--- 810 811name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec 812tracksRegLiveness: true 813body: | 814 bb.0: 815 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 816 817 ; GFX10-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec 818 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 819 ; GFX10-NEXT: {{ $}} 820 ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 821 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 822 ; GFX10-NEXT: early-clobber %4:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8) 823 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY %4.sub0_sub1 824 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed %4.sub2_sub3 825 ; GFX10-NEXT: S_ENDPGM 0 826 ; 827 ; GFX12-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec 828 ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 829 ; GFX12-NEXT: {{ $}} 830 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 831 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 832 ; GFX12-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128), align 8) 833 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1 834 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3 835 ; GFX12-NEXT: S_ENDPGM 0 836 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 837 %1:sreg_32 = COPY $sgpr4 838 early-clobber %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64)) 839 early-clobber %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64)) 840 841 S_ENDPGM 0 842... 843 844# No constrained opcode required when the MEM operand has met the required alignment. 845 846--- 847 848name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed 849tracksRegLiveness: true 850body: | 851 bb.0: 852 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 853 854 ; CHECK-LABEL: name: merge_s_buffer_load_x2_x2_no_constrained_opc_needed 855 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 856 ; CHECK-NEXT: {{ $}} 857 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 858 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s128)) 859 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_IMM]].sub0_sub1 860 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_IMM]].sub2_sub3 861 ; CHECK-NEXT: S_ENDPGM 0 862 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 863 %1:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s64), align 16) 864 %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_IMM %0:sgpr_128, 8, 0 :: (dereferenceable invariant load (s64)) 865 866 S_ENDPGM 0 867... 868--- 869 870name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed 871tracksRegLiveness: true 872body: | 873 bb.0: 874 liveins: $sgpr0_sgpr1_sgpr2_sgpr3 875 876 ; CHECK-LABEL: name: merge_s_buffer_load_x4_x4_no_constrained_opc_needed 877 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3 878 ; CHECK-NEXT: {{ $}} 879 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 880 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (dereferenceable invariant load (s256)) 881 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_128 = COPY [[S_BUFFER_LOAD_DWORDX8_IMM]].sub0_sub1_sub2_sub3 882 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_128 = COPY killed [[S_BUFFER_LOAD_DWORDX8_IMM]].sub4_sub5_sub6_sub7 883 ; CHECK-NEXT: S_ENDPGM 0 884 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 885 %1:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 0, 0 :: (dereferenceable invariant load (s128), align 32) 886 %2:sgpr_128 = S_BUFFER_LOAD_DWORDX4_IMM %0:sgpr_128, 16, 0 :: (dereferenceable invariant load (s128)) 887 888 S_ENDPGM 0 889... 890--- 891 892name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed 893tracksRegLiveness: true 894body: | 895 bb.0: 896 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 897 898 ; CHECK-LABEL: name: merge_s_buffer_load_sgpr_imm_x2ec_x2ec_no_constrained_opc_needed 899 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 900 ; CHECK-NEXT: {{ $}} 901 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 902 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4 903 ; CHECK-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR_IMM [[COPY]], [[COPY1]], 0, 0 :: (dereferenceable invariant load (s128)) 904 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub0_sub1 905 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY killed [[S_BUFFER_LOAD_DWORDX4_SGPR_IMM]].sub2_sub3 906 ; CHECK-NEXT: S_ENDPGM 0 907 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 908 %1:sreg_32 = COPY $sgpr4 909 %2:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 0, 0 :: (dereferenceable invariant load (s64), align 16) 910 %3:sgpr_64 = S_BUFFER_LOAD_DWORDX2_SGPR_IMM %0:sgpr_128, %1:sreg_32, 8, 0 :: (dereferenceable invariant load (s64)) 911 912 S_ENDPGM 0 913... 914